aboutsummaryrefslogtreecommitdiff
path: root/bridges/EuronewsBridge.php
diff options
context:
space:
mode:
Diffstat (limited to 'bridges/EuronewsBridge.php')
-rw-r--r--bridges/EuronewsBridge.php392
1 files changed, 197 insertions, 195 deletions
diff --git a/bridges/EuronewsBridge.php b/bridges/EuronewsBridge.php
index df11014c..2508a274 100644
--- a/bridges/EuronewsBridge.php
+++ b/bridges/EuronewsBridge.php
@@ -1,209 +1,211 @@
<?php
+
class EuronewsBridge extends BridgeAbstract
{
- const MAINTAINER = 'sqrtminusone';
- const NAME = 'Euronews Bridge';
- const URI = 'https://www.euronews.com/';
- const CACHE_TIMEOUT = 600; // 10 minutes
- const DESCRIPTION = 'Return articles from the "Just In" feed of Euronews.';
+ const MAINTAINER = 'sqrtminusone';
+ const NAME = 'Euronews Bridge';
+ const URI = 'https://www.euronews.com/';
+ const CACHE_TIMEOUT = 600; // 10 minutes
+ const DESCRIPTION = 'Return articles from the "Just In" feed of Euronews.';
- const PARAMETERS = array(
- '' => array(
- 'lang' => array(
- 'name' => 'Language',
- 'type' => 'list',
- 'defaultValue' => 'euronews.com',
- 'values' => array(
- 'English' => 'euronews.com',
- 'French' => 'fr.euronews.com',
- 'German' => 'de.euronews.com',
- 'Italian' => 'it.euronews.com',
- 'Spanish' => 'es.euronews.com',
- 'Portuguese' => 'pt.euronews.com',
- 'Russian' => 'ru.euronews.com',
- 'Turkish' => 'tr.euronews.com',
- 'Greek' => 'gr.euronews.com',
- 'Hungarian' => 'hu.euronews.com',
- 'Persian' => 'per.euronews.com',
- 'Arabic' => 'arabic.euronews.com',
- /* These versions don't have timeline.json */
- // 'Albanian' => 'euronews.al',
- // 'Romanian' => 'euronews.ro',
- // 'Georigian' => 'euronewsgeorgia.com',
- // 'Bulgarian' => 'euronewsbulgaria.com'
- // 'Serbian' => 'euronews.rs'
- )
- ),
- 'limit' => array(
- 'name' => 'Limit of items per feed',
- 'required' => true,
- 'type' => 'number',
- 'defaultValue' => 10,
- 'title' => 'Maximum number of returned feed items. Maximum 50, default 10'
- ),
- )
- );
+ const PARAMETERS = [
+ '' => [
+ 'lang' => [
+ 'name' => 'Language',
+ 'type' => 'list',
+ 'defaultValue' => 'euronews.com',
+ 'values' => [
+ 'English' => 'euronews.com',
+ 'French' => 'fr.euronews.com',
+ 'German' => 'de.euronews.com',
+ 'Italian' => 'it.euronews.com',
+ 'Spanish' => 'es.euronews.com',
+ 'Portuguese' => 'pt.euronews.com',
+ 'Russian' => 'ru.euronews.com',
+ 'Turkish' => 'tr.euronews.com',
+ 'Greek' => 'gr.euronews.com',
+ 'Hungarian' => 'hu.euronews.com',
+ 'Persian' => 'per.euronews.com',
+ 'Arabic' => 'arabic.euronews.com',
+ /* These versions don't have timeline.json */
+ // 'Albanian' => 'euronews.al',
+ // 'Romanian' => 'euronews.ro',
+ // 'Georigian' => 'euronewsgeorgia.com',
+ // 'Bulgarian' => 'euronewsbulgaria.com'
+ // 'Serbian' => 'euronews.rs'
+ ]
+ ],
+ 'limit' => [
+ 'name' => 'Limit of items per feed',
+ 'required' => true,
+ 'type' => 'number',
+ 'defaultValue' => 10,
+ 'title' => 'Maximum number of returned feed items. Maximum 50, default 10'
+ ],
+ ]
+ ];
- public function collectData()
- {
- $limit = $this->getInput('limit');
- $root_url = 'https://' . $this->getInput('lang');
- $url = $root_url . '/api/timeline.json?limit=' . $limit;
- $json = getContents($url);
- $data = json_decode($json, true);
+ public function collectData()
+ {
+ $limit = $this->getInput('limit');
+ $root_url = 'https://' . $this->getInput('lang');
+ $url = $root_url . '/api/timeline.json?limit=' . $limit;
+ $json = getContents($url);
+ $data = json_decode($json, true);
- foreach ($data as $datum) {
- $datum_uri = $root_url . $datum['fullUrl'];
- $url_datum = $this->getItemContent($datum_uri);
- $categories = array();
- if (array_key_exists('program', $datum)) {
- if (array_key_exists('title', $datum['program'])) {
- $categories[] = $datum['program']['title'];
- }
- }
- if (array_key_exists('themes', $datum)) {
- foreach ($datum['themes'] as $theme) {
- $categories[] = $theme['title'];
- }
- }
- $item = array(
- 'uri' => $datum_uri,
- 'title' => $datum['title'],
- 'uid' => strval($datum['id']),
- 'timestamp' => $datum['publishedAt'],
- 'content' => $url_datum['content'],
- 'author' => $url_datum['author'],
- 'enclosures' => $url_datum['enclosures'],
- 'categories' => array_unique($categories)
- );
- $this->items[] = $item;
- }
- }
+ foreach ($data as $datum) {
+ $datum_uri = $root_url . $datum['fullUrl'];
+ $url_datum = $this->getItemContent($datum_uri);
+ $categories = [];
+ if (array_key_exists('program', $datum)) {
+ if (array_key_exists('title', $datum['program'])) {
+ $categories[] = $datum['program']['title'];
+ }
+ }
+ if (array_key_exists('themes', $datum)) {
+ foreach ($datum['themes'] as $theme) {
+ $categories[] = $theme['title'];
+ }
+ }
+ $item = [
+ 'uri' => $datum_uri,
+ 'title' => $datum['title'],
+ 'uid' => strval($datum['id']),
+ 'timestamp' => $datum['publishedAt'],
+ 'content' => $url_datum['content'],
+ 'author' => $url_datum['author'],
+ 'enclosures' => $url_datum['enclosures'],
+ 'categories' => array_unique($categories)
+ ];
+ $this->items[] = $item;
+ }
+ }
- private function getItemContent($url)
- {
- try {
- $html = getSimpleHTMLDOMCached($url);
- } catch (Exception $e) {
- // Every once in a while it fails with too many redirects
- return array('author' => null, 'content' => null, 'enclosures' => null);
- }
- $data = $html->find('script[type="application/ld+json"]', 0)->innertext;
- $json = json_decode($data, true);
- $author = 'Euronews';
- $content = '';
- $enclosures = array();
- if (array_key_exists('@graph', $json)) {
- foreach ($json['@graph'] as $item) {
- if ($item['@type'] == 'NewsArticle') {
- if (array_key_exists('author', $item)) {
- $author = $item['author']['name'];
- }
- if (array_key_exists('image', $item)) {
- $content .= '<figure>';
- $content .= '<img src="' . $item['image']['url'] . '">';
- $content .= '<figcaption>' . $item['image']['caption'] . '</figcaption>';
- $content .= '</figure><br>';
- }
- if (array_key_exists('video', $item)) {
- $enclosures[] = $item['video']['contentUrl'];
- }
- }
- }
- }
+ private function getItemContent($url)
+ {
+ try {
+ $html = getSimpleHTMLDOMCached($url);
+ } catch (Exception $e) {
+ // Every once in a while it fails with too many redirects
+ return ['author' => null, 'content' => null, 'enclosures' => null];
+ }
+ $data = $html->find('script[type="application/ld+json"]', 0)->innertext;
+ $json = json_decode($data, true);
+ $author = 'Euronews';
+ $content = '';
+ $enclosures = [];
+ if (array_key_exists('@graph', $json)) {
+ foreach ($json['@graph'] as $item) {
+ if ($item['@type'] == 'NewsArticle') {
+ if (array_key_exists('author', $item)) {
+ $author = $item['author']['name'];
+ }
+ if (array_key_exists('image', $item)) {
+ $content .= '<figure>';
+ $content .= '<img src="' . $item['image']['url'] . '">';
+ $content .= '<figcaption>' . $item['image']['caption'] . '</figcaption>';
+ $content .= '</figure><br>';
+ }
+ if (array_key_exists('video', $item)) {
+ $enclosures[] = $item['video']['contentUrl'];
+ }
+ }
+ }
+ }
- // Normal article
- $article_content = $html->find('.c-article-content', 0);
- if ($article_content) {
- // Usually the .c-article-content is the root of the
- // content, but once in a blue moon the root is the second
- // div
- if ((count($article_content->children()) == 2)
- && ($article_content->children(1)->tag == 'div')
- ) {
- $article_content = $article_content->children(1);
- }
- // The content is interspersed with links and stuff, so we
- // iterate over the children
- foreach ($article_content->children() as $element) {
- if ($element->tag == 'p') {
- $scribble_live = $element->find('#scribblelive-items', 0);
- if (is_null($scribble_live)) {
- // A normal paragraph
- $content .= '<p>' . $element->innertext . '</p>';
- } else {
- // LIVE mode
- foreach ($scribble_live->children() as $child) {
- if ($child->tag == 'div') {
- $content .= '<div>' . $child->innertext . '</div>';
- }
- }
- }
- } elseif (preg_match('/h[1-6]/', $element->tag)) {
- // Header
- $content .= '<h' . $element->tag[1] . '>' . $element->innertext . '</h' . $element->tag[1] . '>';
- } elseif ($element->tag == 'div') {
- if (preg_match('/.*widget--type-image.*/', $element->class)) {
- // Image
- $content .= '<figure>';
- $content .= '<img src="' . $element->find('img', 0)->src . '">';
- $caption = $element->find('figcaption', 0);
- if ($caption) {
- $content .= '<figcaption>' . $element->plaintext . '</figcaption>';
- }
- $content .= '</figure><br>';
- } elseif (preg_match('/.*widget--type-quotation.*/', $element->class)) {
- // Quotation
- $quote = $element->find('.widget__quoteText', 0);
- $author = $element->find('.widget__author', 0);
- $content .= '<figure>';
- $content .= '<blockquote>' . $quote->plaintext . '</blockquote>';
- if ($author) {
- $content .= '<figcaption>' . $author->plaintext . '</figcaption>';
- }
- $content .= '</figure><br>';
- }
- }
- }
- }
+ // Normal article
+ $article_content = $html->find('.c-article-content', 0);
+ if ($article_content) {
+ // Usually the .c-article-content is the root of the
+ // content, but once in a blue moon the root is the second
+ // div
+ if (
+ (count($article_content->children()) == 2)
+ && ($article_content->children(1)->tag == 'div')
+ ) {
+ $article_content = $article_content->children(1);
+ }
+ // The content is interspersed with links and stuff, so we
+ // iterate over the children
+ foreach ($article_content->children() as $element) {
+ if ($element->tag == 'p') {
+ $scribble_live = $element->find('#scribblelive-items', 0);
+ if (is_null($scribble_live)) {
+ // A normal paragraph
+ $content .= '<p>' . $element->innertext . '</p>';
+ } else {
+ // LIVE mode
+ foreach ($scribble_live->children() as $child) {
+ if ($child->tag == 'div') {
+ $content .= '<div>' . $child->innertext . '</div>';
+ }
+ }
+ }
+ } elseif (preg_match('/h[1-6]/', $element->tag)) {
+ // Header
+ $content .= '<h' . $element->tag[1] . '>' . $element->innertext . '</h' . $element->tag[1] . '>';
+ } elseif ($element->tag == 'div') {
+ if (preg_match('/.*widget--type-image.*/', $element->class)) {
+ // Image
+ $content .= '<figure>';
+ $content .= '<img src="' . $element->find('img', 0)->src . '">';
+ $caption = $element->find('figcaption', 0);
+ if ($caption) {
+ $content .= '<figcaption>' . $element->plaintext . '</figcaption>';
+ }
+ $content .= '</figure><br>';
+ } elseif (preg_match('/.*widget--type-quotation.*/', $element->class)) {
+ // Quotation
+ $quote = $element->find('.widget__quoteText', 0);
+ $author = $element->find('.widget__author', 0);
+ $content .= '<figure>';
+ $content .= '<blockquote>' . $quote->plaintext . '</blockquote>';
+ if ($author) {
+ $content .= '<figcaption>' . $author->plaintext . '</figcaption>';
+ }
+ $content .= '</figure><br>';
+ }
+ }
+ }
+ }
- // Video article
- if (is_null($article_content)) {
- $image = $html->find('.c-article-media__img', 0);
- if ($image) {
- $content .= '<figure>';
- $content .= '<img src="' . $image->src . '">';
- $content .= '</figure><br>';
- }
+ // Video article
+ if (is_null($article_content)) {
+ $image = $html->find('.c-article-media__img', 0);
+ if ($image) {
+ $content .= '<figure>';
+ $content .= '<img src="' . $image->src . '">';
+ $content .= '</figure><br>';
+ }
- $description = $html->find('.m-object__description', 0);
- if ($description) {
- // In some editions the description is a link to the
- // current page
- $content .= '<div>' . $description->plaintext . '</div>';
- }
+ $description = $html->find('.m-object__description', 0);
+ if ($description) {
+ // In some editions the description is a link to the
+ // current page
+ $content .= '<div>' . $description->plaintext . '</div>';
+ }
- // Euronews usually hosts videos on dailymotion...
- $player_div = $html->find('.dmPlayer', 0);
- if ($player_div) {
- $video_id = $player_div->getAttribute('data-video-id');
- $video_url = 'https://www.dailymotion.com/video/' . $video_id;
- $content .= '<a href="' . $video_url . '">' . $video_url . '</a>';
- }
+ // Euronews usually hosts videos on dailymotion...
+ $player_div = $html->find('.dmPlayer', 0);
+ if ($player_div) {
+ $video_id = $player_div->getAttribute('data-video-id');
+ $video_url = 'https://www.dailymotion.com/video/' . $video_id;
+ $content .= '<a href="' . $video_url . '">' . $video_url . '</a>';
+ }
- // ...or on YouTube
- $player_div = $html->find('.js-player-pfp', 0);
- if ($player_div) {
- $video_id = $player_div->getAttribute('data-video-id');
- $video_url = 'https://www.youtube.com/watch?v=' . $video_id;
- $content .= '<a href="' . $video_url . '">' . $video_url . '</a>';
- }
- }
+ // ...or on YouTube
+ $player_div = $html->find('.js-player-pfp', 0);
+ if ($player_div) {
+ $video_id = $player_div->getAttribute('data-video-id');
+ $video_url = 'https://www.youtube.com/watch?v=' . $video_id;
+ $content .= '<a href="' . $video_url . '">' . $video_url . '</a>';
+ }
+ }
- return array(
- 'author' => $author,
- 'content' => $content,
- 'enclosures' => $enclosures
- );
- }
+ return [
+ 'author' => $author,
+ 'content' => $content,
+ 'enclosures' => $enclosures
+ ];
+ }
}