aboutsummaryrefslogtreecommitdiff
path: root/bridges/NextInpactBridge.php
blob: c3cca30d2c129d9248dc433418b0a76443c28fe5 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
<?php
class NextInpactBridge extends FeedExpander {

	const MAINTAINER = 'qwertygc and ORelio';
	const NAME = 'NextInpact Bridge';
	const URI = 'https://www.nextinpact.com/';
	const URI_HARDWARE = 'https://www.inpact-hardware.com/';
	const DESCRIPTION = 'Returns the newest articles.';

	const PARAMETERS = array( array(
		'feed' => array(
			'name' => 'Feed',
			'type' => 'list',
			'values' => array(
				'Nos actualités' => array(
					'Toutes nos publications' => 'news',
					'Toutes nos publications sauf #LeBrief' => 'nobrief',
					'Toutes nos publications sauf INpact Hardware' => 'noih',
					'Seulement les publications INpact Hardware' => 'hardware:news',
					'Seulement les publications Next INpact' => 'nobrief-noih',
					'Seulement les publications #LeBrief' => 'lebrief',
				),
				'Flux spécifiques' => array(
					'Le blog' => 'blog',
					'Les bons plans' => 'bonsplans',
					'Publications INpact Hardware en accès libre' => 'hardware:acces-libre',
					'Publications Next INpact en accès libre' => 'acces-libre',
				),
				'Flux thématiques' => array(
					'Tech' => 'category:1',
					'Logiciel' => 'category:2',
					'Internet' => 'category:3',
					'Mobilité' => 'category:4',
					'Droit' => 'category:5',
					'Économie' => 'category:6',
					'Culture numérique' => 'category:7',
					'Next INpact' => 'category:8',
				)
			)
		),
		'filter_premium' => array(
			'name' => 'Premium',
			'type' => 'list',
			'values' => array(
				'No filter' => '0',
				'Hide Premium' => '1',
				'Only Premium' => '2'
			)
		),
		'filter_brief' => array(
			'name' => 'Brief',
			'type' => 'list',
			'values' => array(
				'No filter' => '0',
				'Hide Brief' => '1',
				'Only Brief' => '2'
			)
		)
	));

	public function collectData(){
		$feed = $this->getInput('feed');
		$base_uri = self::URI;
		$args = '';

		if (empty($feed)) {
			// Default to All articles
			$feed = 'news';
		}

		if (strpos($feed, 'hardware:') === 0) {
			// Feed hosted on Hardware domain
			$base_uri = self::URI_HARDWARE;
			$feed = str_replace('hardware:', '', $feed);
		}

		if (strpos($feed, 'category:') === 0) {
			// Feed with specific category parameter
			$args = '?CategoryIds=' . str_replace('category:', '', $feed);
			$feed = 'params';
		}

		$this->collectExpandableDatas($base_uri . 'rss/' . $feed . '.xml' . $args);
	}

	protected function parseItem($newsItem){
		$item = parent::parseItem($newsItem);
		$item['content'] = $this->extractContent($item, $item['uri']);
		if (is_null($item['content']))
			return null; //Filtered article
		return $item;
	}

	private function extractContent($item, $url){
		$html = getSimpleHTMLDOMCached($url);
		if (!is_object($html))
			return 'Failed to request NextInpact: ' . $url;

		// Filter premium and brief articles?
		$brief_selector = 'div.brief-container';
		foreach(array(
			'filter_premium' => 'p.red-msg',
			'filter_brief' => $brief_selector
		) as $param_name => $selector) {
			$param_val = intval($this->getInput($param_name));
			if ($param_val != 0) {
				$element_present = is_object($html->find($selector, 0));
				$element_wanted = ($param_val == 2);
				if ($element_present != $element_wanted) {
					return null; //Filter article
				}
			}
		}

		$article_content = $html->find('div.article-content', 0);
		if (!is_object($article_content)) {
			$article_content = $html->find('div.content', 0);
		}
		if (is_object($article_content)) {

			// Subtitle
			$subtitle = $html->find('small.subtitle', 0);
			if(!is_object($subtitle) && !is_object($html->find($brief_selector, 0))) {
				$subtitle = $html->find('small', 0);
			}
			if(!is_object($subtitle)) {
				$content_wrapper = $html->find('div.content-wrapper', 0);
				if (is_object($content_wrapper)) {
					$subtitle = $content_wrapper->find('h2.title', 0);
				}
			}
			if(is_object($subtitle) && (!isset($item['title']) || $subtitle->plaintext != $item['title'])) {
				$subtitle = '<p><em>' . trim($subtitle->plaintext) . '</em></p>';
			} else {
				$subtitle = '';
			}

			// Image
			$postimg = $html->find('div.article-image, div.image-container', 0);
			if(is_object($postimg)) {
				$postimg = $postimg->find('img', 0);
				if (!empty($postimg->src)) {
					$postimg = $postimg->src;
				} else {
					$postimg = $postimg->srcset; //"url 355w, url 1003w, url 748w"
					$postimg = explode(', ', $postimg); //split by ', ' to get each url separately
					$postimg = end($postimg); //Get last item: "url 748w" which is of largest size
					$postimg = explode(' ', $postimg); //split by ' ' to separate url from res
					$postimg = array_reverse($postimg); //reverse array content to have url last
					$postimg = end($postimg); //Get last item of array: "url"
				}
				$postimg = '<p><img src="' . $postimg . '" alt="-" /></p>';
			} else {
				$postimg = '';
			}

			// Paywall
			$paywall = $html->find('div.paywall-restriction', 0);
			if (is_object($paywall) && is_object($paywall->find('p.red-msg', 0))) {
				$paywall = '<p><em>' . $paywall->find('span.head-mention', 0)->innertext . '</em></p>';
			} else {
				$paywall = '';
			}

			// Content
			$article_content = $article_content->outertext;
			$article_content = str_replace('>Signaler une erreur</span>', '></span>', $article_content);

			// Result
			$text = $subtitle
				. $postimg
				. $article_content
				. $paywall;

		} else {
			$text = '<p><em>Failed to retrieve full article content</em></p>';
			if (isset($item['content'])) {
				$text = $item['content'] . $text;
			}
		}

		return $text;
	}
}