aboutsummaryrefslogtreecommitdiff
path: root/bridges/CNETBridge.php
diff options
context:
space:
mode:
Diffstat (limited to 'bridges/CNETBridge.php')
-rw-r--r--bridges/CNETBridge.php196
1 files changed, 101 insertions, 95 deletions
diff --git a/bridges/CNETBridge.php b/bridges/CNETBridge.php
index 27946f25..34442abd 100644
--- a/bridges/CNETBridge.php
+++ b/bridges/CNETBridge.php
@@ -1,108 +1,114 @@
<?php
-class CNETBridge extends BridgeAbstract {
- const MAINTAINER = 'ORelio';
- const NAME = 'CNET News';
- const URI = 'https://www.cnet.com/';
- const CACHE_TIMEOUT = 3600; // 1h
- const DESCRIPTION = 'Returns the newest articles.';
- const PARAMETERS = array(
- array(
- 'topic' => array(
- 'name' => 'Topic',
- 'type' => 'list',
- 'values' => array(
- 'All articles' => '',
- 'Apple' => 'apple',
- 'Google' => 'google',
- 'Microsoft' => 'tags-microsoft',
- 'Computers' => 'topics-computers',
- 'Mobile' => 'topics-mobile',
- 'Sci-Tech' => 'topics-sci-tech',
- 'Security' => 'topics-security',
- 'Internet' => 'topics-internet',
- 'Tech Industry' => 'topics-tech-industry'
- )
- )
- )
- );
+class CNETBridge extends BridgeAbstract
+{
+ const MAINTAINER = 'ORelio';
+ const NAME = 'CNET News';
+ const URI = 'https://www.cnet.com/';
+ const CACHE_TIMEOUT = 3600; // 1h
+ const DESCRIPTION = 'Returns the newest articles.';
+ const PARAMETERS = [
+ [
+ 'topic' => [
+ 'name' => 'Topic',
+ 'type' => 'list',
+ 'values' => [
+ 'All articles' => '',
+ 'Apple' => 'apple',
+ 'Google' => 'google',
+ 'Microsoft' => 'tags-microsoft',
+ 'Computers' => 'topics-computers',
+ 'Mobile' => 'topics-mobile',
+ 'Sci-Tech' => 'topics-sci-tech',
+ 'Security' => 'topics-security',
+ 'Internet' => 'topics-internet',
+ 'Tech Industry' => 'topics-tech-industry'
+ ]
+ ]
+ ]
+ ];
- private function cleanArticle($article_html) {
- $offset_p = strpos($article_html, '<p>');
- $offset_figure = strpos($article_html, '<figure');
- $offset = ($offset_figure < $offset_p ? $offset_figure : $offset_p);
- $article_html = substr($article_html, $offset);
- $article_html = str_replace('href="/', 'href="' . self::URI, $article_html);
- $article_html = str_replace(' height="0"', '', $article_html);
- $article_html = str_replace('<noscript>', '', $article_html);
- $article_html = str_replace('</noscript>', '', $article_html);
- $article_html = StripWithDelimiters($article_html, '<a class="clickToEnlarge', '</a>');
- $article_html = stripWithDelimiters($article_html, '<span class="nowPlaying', '</span>');
- $article_html = stripWithDelimiters($article_html, '<span class="duration', '</span>');
- $article_html = stripWithDelimiters($article_html, '<script', '</script>');
- $article_html = stripWithDelimiters($article_html, '<svg', '</svg>');
- return $article_html;
- }
+ private function cleanArticle($article_html)
+ {
+ $offset_p = strpos($article_html, '<p>');
+ $offset_figure = strpos($article_html, '<figure');
+ $offset = ($offset_figure < $offset_p ? $offset_figure : $offset_p);
+ $article_html = substr($article_html, $offset);
+ $article_html = str_replace('href="/', 'href="' . self::URI, $article_html);
+ $article_html = str_replace(' height="0"', '', $article_html);
+ $article_html = str_replace('<noscript>', '', $article_html);
+ $article_html = str_replace('</noscript>', '', $article_html);
+ $article_html = StripWithDelimiters($article_html, '<a class="clickToEnlarge', '</a>');
+ $article_html = stripWithDelimiters($article_html, '<span class="nowPlaying', '</span>');
+ $article_html = stripWithDelimiters($article_html, '<span class="duration', '</span>');
+ $article_html = stripWithDelimiters($article_html, '<script', '</script>');
+ $article_html = stripWithDelimiters($article_html, '<svg', '</svg>');
+ return $article_html;
+ }
- public function collectData() {
+ public function collectData()
+ {
+ // Retrieve and check user input
+ $topic = str_replace('-', '/', $this->getInput('topic'));
+ if (!empty($topic) && (substr_count($topic, '/') > 1 || !ctype_alpha(str_replace('/', '', $topic)))) {
+ returnClientError('Invalid topic: ' . $topic);
+ }
- // Retrieve and check user input
- $topic = str_replace('-', '/', $this->getInput('topic'));
- if (!empty($topic) && (substr_count($topic, '/') > 1 || !ctype_alpha(str_replace('/', '', $topic))))
- returnClientError('Invalid topic: ' . $topic);
+ // Retrieve webpage
+ $pageUrl = self::URI . (empty($topic) ? 'news/' : $topic . '/');
+ $html = getSimpleHTMLDOM($pageUrl);
- // Retrieve webpage
- $pageUrl = self::URI . (empty($topic) ? 'news/' : $topic . '/');
- $html = getSimpleHTMLDOM($pageUrl);
+ // Process articles
+ foreach ($html->find('div.assetBody, div.riverPost') as $element) {
+ if (count($this->items) >= 10) {
+ break;
+ }
- // Process articles
- foreach($html->find('div.assetBody, div.riverPost') as $element) {
+ $article_title = trim($element->find('h2, h3', 0)->plaintext);
+ $article_uri = self::URI . substr($element->find('a', 0)->href, 1);
+ $article_thumbnail = $element->parent()->find('img[src]', 0)->src;
+ $article_timestamp = strtotime($element->find('time.assetTime, div.timeAgo', 0)->plaintext);
+ $article_author = trim($element->find('a[rel=author], a.name', 0)->plaintext);
+ $article_content = '<p><b>' . trim($element->find('p.dek', 0)->plaintext) . '</b></p>';
- if(count($this->items) >= 10) {
- break;
- }
+ if (is_null($article_thumbnail)) {
+ $article_thumbnail = extractFromDelimiters($element->innertext, '<img src="', '"');
+ }
- $article_title = trim($element->find('h2, h3', 0)->plaintext);
- $article_uri = self::URI . substr($element->find('a', 0)->href, 1);
- $article_thumbnail = $element->parent()->find('img[src]', 0)->src;
- $article_timestamp = strtotime($element->find('time.assetTime, div.timeAgo', 0)->plaintext);
- $article_author = trim($element->find('a[rel=author], a.name', 0)->plaintext);
- $article_content = '<p><b>' . trim($element->find('p.dek', 0)->plaintext) . '</b></p>';
+ if (!empty($article_title) && !empty($article_uri) && strpos($article_uri, self::URI . 'news/') !== false) {
+ $article_html = getSimpleHTMLDOMCached($article_uri) or $article_html = null;
- if (is_null($article_thumbnail))
- $article_thumbnail = extractFromDelimiters($element->innertext, '<img src="', '"');
+ if (!is_null($article_html)) {
+ if (empty($article_thumbnail)) {
+ $article_thumbnail = $article_html->find('div.originalImage', 0);
+ }
+ if (empty($article_thumbnail)) {
+ $article_thumbnail = $article_html->find('span.imageContainer', 0);
+ }
+ if (is_object($article_thumbnail)) {
+ $article_thumbnail = $article_thumbnail->find('img', 0)->src;
+ }
- if (!empty($article_title) && !empty($article_uri) && strpos($article_uri, self::URI . 'news/') !== false) {
+ $article_content .= trim(
+ $this->cleanArticle(
+ extractFromDelimiters(
+ $article_html,
+ '<article',
+ '<footer'
+ )
+ )
+ );
+ }
- $article_html = getSimpleHTMLDOMCached($article_uri) or $article_html = null;
-
- if (!is_null($article_html)) {
-
- if (empty($article_thumbnail))
- $article_thumbnail = $article_html->find('div.originalImage', 0);
- if (empty($article_thumbnail))
- $article_thumbnail = $article_html->find('span.imageContainer', 0);
- if (is_object($article_thumbnail))
- $article_thumbnail = $article_thumbnail->find('img', 0)->src;
-
- $article_content .= trim(
- $this->cleanArticle(
- extractFromDelimiters(
- $article_html, '<article', '<footer'
- )
- )
- );
- }
-
- $item = array();
- $item['uri'] = $article_uri;
- $item['title'] = $article_title;
- $item['author'] = $article_author;
- $item['timestamp'] = $article_timestamp;
- $item['enclosures'] = array($article_thumbnail);
- $item['content'] = $article_content;
- $this->items[] = $item;
- }
- }
- }
+ $item = [];
+ $item['uri'] = $article_uri;
+ $item['title'] = $article_title;
+ $item['author'] = $article_author;
+ $item['timestamp'] = $article_timestamp;
+ $item['enclosures'] = [$article_thumbnail];
+ $item['content'] = $article_content;
+ $this->items[] = $item;
+ }
+ }
+ }
}