aboutsummaryrefslogtreecommitdiff
path: root/bridges/CssSelectorFeedExpanderBridge.php
blob: c4763a86cfc9553e551b2e68a17778ef3270b58d (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
<?php

class CssSelectorFeedExpanderBridge extends CssSelectorBridge
{
    const MAINTAINER = 'ORelio';
    const NAME = 'CSS Selector Feed Expander';
    const URI = 'https://github.com/RSS-Bridge/rss-bridge/';
    const DESCRIPTION = 'Expand any site RSS feed using CSS selectors (Advanced Users)';
    const PARAMETERS = [
        [
            'feed' => [
                'name' => 'Feed: URL of truncated RSS feed',
                'exampleValue' => 'https://example.com/feed.xml',
                'required' => true
            ],
            'content_selector' => [
                'name' => 'Selector for each article content',
                'title' => <<<EOT
                    This bridge works using CSS selectors, e.g. "div.article" will match <div class="article">.
                    Everything inside that element becomes feed item content.
                    EOT,
                'exampleValue' => 'article.content',
                'required' => true
            ],
            'content_cleanup' => [
                'name' => '[Optional] Content cleanup: List of items to remove',
                'title' => 'Selector for unnecessary elements to remove inside article contents.',
                'exampleValue' => 'div.ads, div.comments',
            ],
            'dont_expand_metadata' => [
                'name' => '[Optional] Don\'t expand metadata',
                'title' => "This bridge will attempt to fill missing fields using metadata from the webpage.\nCheck to disable.",
                'type' => 'checkbox',
            ],
            'discard_thumbnail' => [
                'name' => '[Optional] Discard thumbnail set by site author',
                'title' => 'Some sites set their logo as thumbnail for every article. Use this option to discard it.',
                'type' => 'checkbox',
            ],
            'thumbnail_as_header' => [
                'name' => '[Optional] Insert thumbnail as article header',
                'title' => 'Insert article main image on top of article contents.',
                'type' => 'checkbox',
            ],
            'limit' => self::LIMIT
        ]
    ];

    public function collectData()
    {
        $url = $this->getInput('feed');
        $content_selector = $this->getInput('content_selector');
        $content_cleanup = $this->getInput('content_cleanup');
        $dont_expand_metadata = $this->getInput('dont_expand_metadata');
        $discard_thumbnail = $this->getInput('discard_thumbnail');
        $thumbnail_as_header = $this->getInput('thumbnail_as_header');
        $limit = $this->getInput('limit');

        $feedParser = new FeedParser();
        $xml = getContents($url);
        $source_feed = $feedParser->parseFeed($xml);
        $items = $source_feed['items'];

        // Map Homepage URL (Default: Root page)
        if (isset($source_feed['uri'])) {
            $this->homepageUrl = $source_feed['uri'];
        } else {
            $this->homepageUrl = urljoin($url, '/');
        }

        // Map Feed Name (Default: Domain name)
        if (isset($source_feed['title'])) {
            $this->feedName = $source_feed['title'];
        } else {
            $this->feedName = explode('/', urljoin($url, '/'))[2];
        }

        // Apply item limit (Default: Global limit)
        if ($limit > 0) {
            $items = array_slice($items, 0, $limit);
        }

        // Expand feed items (CssSelectorBridge)
        foreach ($items as $item_from_feed) {
            $item_expanded = $this->expandEntryWithSelector(
                $item_from_feed['uri'],
                $content_selector,
                $content_cleanup
            );

            if ($dont_expand_metadata) {
                // Take feed item, only replace content from expanded data
                $content = $item_expanded['content'];
                $item_expanded = $item_from_feed;
                $item_expanded['content'] = $content;
            } else {
                // Take expanded item, but give priority to metadata already in source item
                foreach ($item_from_feed as $field => $val) {
                    if ($field !== 'content' && !empty($val)) {
                        $item_expanded[$field] = $val;
                    }
                }
            }

            if ($discard_thumbnail && isset($item_expanded['enclosures'])) {
                unset($item_expanded['enclosures']);
            }

            if ($thumbnail_as_header && isset($item_expanded['enclosures'][0])) {
                $item_expanded['content'] = '<p><img src="'
                    . $item_expanded['enclosures'][0]
                    . '" /></p>'
                    . $item_expanded['content'];
            }

            $this->items[] = $item_expanded;
        }
    }
}