aboutsummaryrefslogtreecommitdiff
path: root/bridges/UsbekEtRicaBridge.php
blob: d5fd507a7444390461d5a62d303e5f27f2a3fd2c (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
<?php
class UsbekEtRicaBridge extends BridgeAbstract {

	const MAINTAINER = 'logmanoriginal';
	const NAME = 'Usbek & Rica Bridge';
	const URI = 'https://usbeketrica.com';
	const DESCRIPTION = 'Returns latest articles from the front page';

	const PARAMETERS = array(
		array(
			'limit' => array(
				'name' => 'Number of articles to return',
				'type' => 'number',
				'required' => false,
				'title' => 'Specifies the maximum number of articles to return',
				'defaultValue' => -1
			),
			'fullarticle' => array(
				'name' => 'Load full article',
				'type' => 'checkbox',
				'required' => false,
				'title' => 'Activate to load full articles',
			)
		)
	);

	public function collectData(){
		$limit = $this->getInput('limit');
		$fullarticle = $this->getInput('fullarticle');
		$html = getSimpleHTMLDOM($this->getURI());

		$articles = $html->find('article');

		foreach($articles as $article) {
			$item = array();

			$title = $article->find('h2', 0);
			if($title) {
				$item['title'] = $title->plaintext;
			} else {
				// Sometimes we get rubbish, ignore.
				continue;
			}

			$author = $article->find('div.author span', 0);
			if($author) {
				$item['author'] = $author->plaintext;
			}

			$u = $article->find('a.card-img', 0);

			$uri = $u->href;
			if(substr($uri, 0, 1) === 'h') { // absolute uri
				$item['uri'] = $uri;
			} else { // relative uri
				$item['uri'] = $this->getURI() . $uri;
			}

			if($fullarticle) {
				$content = $this->loadFullArticle($item['uri']);
			}

			if($fullarticle && !is_null($content)) {
				$item['content'] = $content;
			} else {
				$excerpt = $article->find('div.card-excerpt', 0);
				if($excerpt) {
					$item['content'] = $excerpt->plaintext;
				}
			}

			$image = $article->find('div.card-img img', 0);
			if($image) {
				$item['enclosures'] = array(
					$image->src
				);
			}

			$this->items[] = $item;

			if($limit > 0 && count($this->items) >= $limit) {
				break;
			}
		}
	}

	/**
	* Loads the full article and returns the contents
	* @param $uri The article URI
	* @return The article content
	*/
	private function loadFullArticle($uri){
		$html = getSimpleHTMLDOMCached($uri);

		$content = $html->find('div.rich-text', 1);
		if($content) {
			return $this->replaceUriInHtmlElement($content);
		}

		return null;
	}

	/**
	* Replaces all relative URIs with absolute ones
	* @param $element A simplehtmldom element
	* @return The $element->innertext with all URIs replaced
	*/
	private function replaceUriInHtmlElement($element){
		return str_replace('href="/', 'href="' . $this->getURI() . '/', $element->innertext);
	}
}