aboutsummaryrefslogtreecommitdiff
path: root/bridges/MediapartBridge.php
blob: 3c8c8317227afa91512f8440da8c091c4b47f284 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
<?php

class MediapartBridge extends FeedExpander
{
    const MAINTAINER = 'killruana';
    const NAME = 'Mediapart Bridge';
    const URI = 'https://www.mediapart.fr/';
    const PARAMETERS = [
        [
            'single_page_mode' => [
                'name' => 'Single page article',
                'type' => 'checkbox',
                'title' => 'Display long articles on a single page',
                'defaultValue' => 'checked'
            ],
            'mpsessid' => [
                'name' => 'MPSESSID',
                'type' => 'text',
                'title' => 'Value of the session cookie MPSESSID'
            ]
        ]
    ];
    const CACHE_TIMEOUT = 7200; // 2h
    const DESCRIPTION = 'Returns the newest articles.';

    public function collectData()
    {
        $url = self::URI . 'articles/feed';
        $this->collectExpandableDatas($url);
    }

    protected function parseItem($newsItem)
    {
        $item = parent::parseItem($newsItem);

        // Mediapart provide multiple type of contents.
        // We only process items relative to the newspaper
        // See issue #1292 - https://github.com/RSS-Bridge/rss-bridge/issues/1292
        if (strpos($item['uri'], self::URI . 'journal/') === 0) {
            // Enable single page mode?
            if ($this->getInput('single_page_mode') === true) {
                $item['uri'] .= '?onglet=full';
            }

            // If a session cookie is defined, get the full article
            $mpsessid = $this->getInput('mpsessid');
            if (!empty($mpsessid)) {
                // Set the session cookie
                $opt = [];
                $opt[CURLOPT_COOKIE] = 'MPSESSID=' . $mpsessid;

                // Get the page
                $articlePage = getSimpleHTMLDOM(
                    $newsItem->link . '?onglet=full',
                    [],
                    $opt
                );

                // Extract the article content
                $content = $articlePage->find('div.content-article', 0)->innertext;
                $content = sanitize($content);
                $content = defaultLinkTo($content, static::URI);
                $item['content'] .= $content;
            }
        }

        return $item;
    }
}