diff options
author | 2022-07-01 15:10:30 +0200 | |
---|---|---|
committer | 2022-07-01 15:10:30 +0200 | |
commit | 4f75591060d95208a301bc6bf460d875631b29cc (patch) | |
tree | 4e37d86840e8d990a563ba75d3de6f84a53cc2de /bridges/ReutersBridge.php | |
parent | 66568e3a39c61546c09a47a5688914a0bdf3c60c (diff) | |
download | rss-bridge-4f75591060d95208a301bc6bf460d875631b29cc.tar.gz rss-bridge-4f75591060d95208a301bc6bf460d875631b29cc.tar.zst rss-bridge-4f75591060d95208a301bc6bf460d875631b29cc.zip |
Reformat codebase v4 (#2872)
Reformat code base to PSR12
Co-authored-by: rssbridge <noreply@github.com>
Diffstat (limited to 'bridges/ReutersBridge.php')
-rw-r--r-- | bridges/ReutersBridge.php | 1229 |
1 files changed, 620 insertions, 609 deletions
diff --git a/bridges/ReutersBridge.php b/bridges/ReutersBridge.php index 196139b3..853b134b 100644 --- a/bridges/ReutersBridge.php +++ b/bridges/ReutersBridge.php @@ -1,473 +1,483 @@ <?php + class ReutersBridge extends BridgeAbstract { - const MAINTAINER = 'hollowleviathan, spraynard, csisoap'; - const NAME = 'Reuters Bridge'; - const URI = 'https://www.reuters.com'; - const CACHE_TIMEOUT = 1800; // 30min - const DESCRIPTION = 'Returns news from Reuters'; - - private $feedName = self::NAME; - private $useWireAPI = false; - - /** - * Wireitem types allowed in the final story output - */ - const ALLOWED_WIREITEM_TYPES = array( - 'story', - 'headlines' - ); - - /** - * Wireitem template types allowed in the final story output - */ - const ALLOWED_TEMPLATE_TYPES = array( - 'story', - 'headlines' - ); - - const PARAMETERS = array( - array( - 'feed' => array( - 'name' => 'News Feed', - 'type' => 'list', - 'title' => 'Feeds from Reuters U.S/International edition', - 'values' => array( - 'Top News' => 'home/topnews', - 'Fact Check' => 'chan:abtpk0vm', - 'Entertainment' => 'chan:8ym8q8dl', - 'Politics' => 'politics', - 'Wire' => 'wire', - 'Breakingviews' => '/breakingviews', - 'World' => array( - 'World' => 'world', - 'Africa' => '/world/africa', - 'Americas' => '/world/americas', - 'Asia-Pacific' => '/world/asia-pacific', - 'China' => 'china', - 'europe' => '/world/europe', - 'India' => '/world/india', - 'Middle East' => '/world/middle-east', - 'UK' => 'chan:61leiu7j', - 'USA News' => 'us', - 'The Great Reboot' => '/world/the-great-reboot', - 'Reuters Next' => '/world/reuters-next' - ), - 'Business' => array( - 'Business' => 'business', - 'Aerospace and Defense' => 'aerospace', - 'Autos Transportation' => '/business/autos-transportation', - 'Energy' => 'energy', - 'Finance' => '/business/finance', - 'Health' => 'chan:8hw7807a', - 'Media Telecom' => '/business/media-telecom', - 'Retail Consumer' => '/business/retail-consumer', - 'Sustainable Business' => '/business/sustainable-business', - 'Change Suite' => '/business/change-suite', - 'Future of Health' => '/business/future-of-health', - 'Future of Money' => '/business/future-of-money', - 'Take Five' => '/business/take-five', - 'Reuters Impact' => '/business/reuters-impact', - ), - 'Legal' => array( - 'Legal' => '/legal', - 'Government' => '/legal/government', - 'Legal Industry' => '/legal/legalindustry', - 'Litigation' => '/legal/litigation', - 'Transactional' => '/legal/transactional', - ), - 'Markets' => array( - 'Markets' => 'markets', - 'Asian Markets' => '/markets/asia', - 'Commodities' => '/markets/commodities', - 'Currencies' => '/markets/currencies', - 'Deals' => '/markets/deals', - 'European Markets' => '/markets/europe', - 'Funds' => '/markets/fund', - 'Global Market Data' => '/markets/global-market-data', - 'Rates & Bonds' => '/markets/rates-bonds', - 'Stocks' => '/markets/stocks', - 'U.S Markets' => '/markets/us', - 'Wealth' => '/markets/wealth', - 'Macro Matters' => '/markets/macromatters', - ), - 'Technology' => array( - 'Technology' => 'tech', - 'Disrupted' => '/technology/disrupted', - 'Reuters Momentum' => '/technology/reuters-momentum', - ), - 'Sports' => array( - 'Sports' => 'sports', - 'Athletics' => '/lifestyle/sports/athletics', - 'Cricket' => '/lifestyle/sports/cricket', - 'Cycling' => '/lifestyle/sports/cycling', - 'Golf' => '/lifestyle/sports/golf', - 'Motor Sports' => '/lifestyle/sports/motor-sports', - 'Soccer' => '/lifestyle/sports/soccer', - 'Tennis' => '/lifestyle/sports/tennis', - ), - 'Lifestyle' => array( - 'Lifestyle' => 'life', - 'Oddly Enough' => '/lifestyle/oddly-enough', - 'Science' => 'science', - ) - ) - ) - ) - ); - - const BACKWARD_COMPATIBILITY = array( - 'world' => '/world', - 'china' => '/world/china', - 'chan:61leiu7j' => '/world/uk', - 'us' => '/world/us', - 'business' => '/business', - 'aerospace' => '/business/aerospace-defense', - 'energy' => '/business/energy', - 'environment' => '/business/environment', - 'chan:8hw7807a' => '/business/healthcare-pharmaceuticals', - 'markets' => '/markets', - 'tech' => '/technology', - 'sports' => '/lifestyle/sports', - 'life' => '/lifestyle', - 'science' => '/lifestyle/science', - 'home/topnews' => '/home', - ); - - const OLD_WIRE_SECTION = array( - 'home/topnews', - 'chan:abtpk0vm', - 'chan:8ym8q8dl', - 'politics', - 'wire' - ); - - /** - * Performs an HTTP request to the Reuters API and returns decoded JSON - * in the form of an associative array - * @param string $feed_uri Full API URL to fetch data - * @return array - */ - private function getJson($uri) - { - $returned_data = getContents($uri); - return json_decode($returned_data, true); - } - - /** - * Takes in data from Reuters Wire API and - * creates structured data in the form of a list - * of story information. - * @param array $data JSON collected from the Reuters Wire API - */ - private function processData($data) - { - /** - * Gets a list of wire items which are groups of templates - */ - $reuters_allowed_wireitems = array_filter( - $data, function ($wireitem) { - return in_array( - $wireitem['wireitem_type'], - self::ALLOWED_WIREITEM_TYPES - ); - } - ); - - /* - * Gets a list of "Templates", which is data containing a story - */ - $reuters_wireitem_templates = array_reduce( - $reuters_allowed_wireitems, - function (array $carry, array $wireitem) { - $wireitem_templates = $wireitem['templates']; - return array_merge( - $carry, - array_filter( - $wireitem_templates, function ( - array $template_data - ) { - return in_array( - $template_data['type'], - self::ALLOWED_TEMPLATE_TYPES - ); - } - ) - ); - }, - array() - ); - - return $reuters_wireitem_templates; - } - - private function getSectionEndpoint() { - $endpoint = $this->getInput('feed'); - if(isset(self::BACKWARD_COMPATIBILITY[$endpoint])) { - $endpoint = self::BACKWARD_COMPATIBILITY[$endpoint]; - } elseif (in_array($endpoint, self::OLD_WIRE_SECTION)) { - $this->useWireAPI = true; - } - return $endpoint; - } - - /** - * @param string $endpoint - A endpoint is provided could be article URI or ID. - * @param string $fetch_type - Provide what kind of fetch do you want? Article or Section. - * @param boolean $is_article_uid {true|false} - A boolean flag to determined if using UID instead of url to fetch. - * @return string A completed API URL to fetch data - */ - private function getAPIURL($endpoint, $fetch_type, $is_article_uid = false) { - $base_url = self::URI . '/pf/api/v3/content/fetch/'; - $wire_url = 'https://wireapi.reuters.com/v8'; - switch($fetch_type) { - case 'article': - if($this->useWireAPI) { - return $wire_url . $endpoint; - } - - $base_query = array( - 'website' => 'reuters', - ); - $query = array(); - - if ($is_article_uid) { - $query = array( - 'id' => $endpoint - ); - } else { - $query = array( - 'website_url' => $endpoint, - ); - } - - $query = array_merge($base_query, $query); - $json_query = json_encode($query); - return $base_url . 'article-by-id-or-url-v1?query=' . $json_query; - break; - case 'section': - if($this->useWireAPI) { - if(strpos($endpoint, 'chan:') !== false) { - // Now checking whether that feed has unique ID or not. - $feed_uri = "/feed/rapp/us/wirefeed/$endpoint"; - } else { - $feed_uri = "/feed/rapp/us/tabbar/feeds/$endpoint"; - } - return $wire_url . $feed_uri; - } - $query = array( - 'section_id' => $endpoint, - 'size' => 30, - 'website' => 'reuters' - ); - - if ($endpoint != '/home') { - $query = array_merge($query, array( - 'fetch_type' => 'section', - )); - } - - $json_query = json_encode($query); - return $base_url . 'articles-by-section-alias-or-id-v1?query=' . $json_query; - break; - } - returnServerError('unsupported endpoint'); - } - - private function addStories($title, $content, $timestamp, $author, $url, $category) { - $item = array(); - $item['categories'] = $category; - $item['author'] = $author; - $item['content'] = $content; - $item['title'] = $title; - $item['timestamp'] = $timestamp; - $item['uri'] = $url; - $this->items[] = $item; - } - - private function getArticle($feed_uri, $is_article_uid = false) - { - // This will make another request to API to get full detail of article and author's name. - $url = $this->getAPIURL($feed_uri, 'article', $is_article_uid); - $rawData = $this->getJson($url); - - if(json_last_error() != JSON_ERROR_NONE) { // Checking whether a valid JSON or not - return $this->handleRedirectedArticle($url); - } - - $article_content = ''; - $authorlist = ''; - $category = array(); - $image_list = array(); - $published_at = ''; - if($this->useWireAPI) { - $reuters_wireitems = $rawData['wireitems']; - $processedData = $this->processData($reuters_wireitems); - - $first = reset($processedData); - $article_content = $first['story']['body_items']; - $authorlist = $first['story']['authors']; - $category = array($first['story']['channel']['name']); - $image_list = $first['story']['images']; - $published_at = $first['story']['published_at']; - } else { - $article_content = $rawData['result']['content_elements']; - $authorlist = $rawData['result']['authors']; - $category = array($rawData['result']['taxonomy']['ads_primary_section']['name']); - $image_list = array(); - if(!empty($rawData['result']['related_content']['galleries'])) { - $galleries = $rawData['result']['related_content']['galleries']; - foreach($galleries as $gallery) { - $image_list = array_merge($image_list, $gallery['content_elements']); - } - } else if(!empty($rawData['result']['related_content']['images'])) { - $image_list = $rawData['result']['related_content']['images']; - } - $published_at = $rawData['result']['published_time']; - } - - $content_detail = array( - 'content' => $this->handleArticleContent($article_content), - 'author' => $this->handleAuthorName($authorlist), - 'category' => $category, - 'images' => $this->handleImage($image_list), - 'published_at' => $published_at - ); - return $content_detail; - } - - private function handleRedirectedArticle($url) { - $html = getSimpleHTMLDOMCached($url, 86400); // Duration 24h - - $description = ''; - $author = ''; - $images = ''; - $meta_items = $html->find('meta'); - foreach($meta_items as $meta) { - switch ($meta->name) { - case 'description': - $description = $meta->content; - break; - case 'author': - case 'twitter:creator': - $author = $meta->content; - break; - case 'twitter:image:src': - case 'twitter:image': - $url = $meta->content; - $images = "<img src=$url" . '>'; - break; - } - } - - return array( - 'content' => $description, - 'author' => $author, - 'category' => '', - 'images' => $images, - 'published_at' => '', - 'status' => 'redirected' - ); - } - - private function handleImage($images) { - $img_placeholder = ''; - - foreach($images as $image) { // Add more image to article. - $image_url = $image['url']; - $image_caption = $image['caption']; - $image_alt_text = ''; - if(isset($image['alt_text'])) { - $image_alt_text = $image['alt_text']; - } else { - $image_alt_text = $image_caption; - } - $img = "<img src=\"$image_url\" alt=\"$image_alt_text\">"; - $img_caption = "<figcaption style=\"text-align: center;\"><i>$image_caption</i></figcaption>"; - $figure = "<figure>$img \t $img_caption</figure>"; - $img_placeholder = $img_placeholder . $figure; - } - - return $img_placeholder; - } - - private function handleAuthorName($authors) { - $author = ''; - $counter = 0; - foreach ($authors as $data) { - //Formatting author's name. - $name = $data['name']; - $counter++; - if($counter == count($authors)) { - $author .= $name; - } else { - $author .= $name . ', '; - } - } - return $author; - } - - private function handleArticleContent($contents) { - $description = ''; - foreach ($contents as $content) { - $data; - if(isset($content['content'])) { - $data = $content['content']; - } - switch($content['type']) { - case 'paragraph': - $description = $description . "<p>$data</p>"; - break; - case 'heading': - $description = $description . "<h3>$data</h3>"; - break; - case 'infographics': - $description = $description . "<img src=\"$data\">"; - break; - case 'inline_items': - $item_list = $content['items']; - $description = $description . '<p>'; - foreach ($item_list as $item) { - if($item['type'] == 'text') { - $description = $description . $item['content']; - } else { - $description = $description . $item['symbol']; - } - } - $description = $description . '</p>'; - break; - case 'p_table': - $description = $description . $content['content']; - break; - case 'upstream_embed': - $media_type = $content['media_type']; - $cid = $content['cid']; - $embed = ''; - switch ($media_type) { - case 'tweet': - try { - $tweet_url = "https://twitter.com/dummyname/statuses/$cid"; - $get_embed_url = 'https://publish.twitter.com/oembed?url=' - . urlencode($tweet_url) . - '&partner=&hide_thread=false'; - $oembed_json = json_decode(getContents($get_embed_url), true); - $embed .= $oembed_json['html']; - } catch (Exception $e) { // In case not found any tweet. - $embed .= ''; - } - break; - case 'instagram': - $url = "https://instagram.com/p/$cid/media/?size=l"; - $embed .= <<<EOD + const MAINTAINER = 'hollowleviathan, spraynard, csisoap'; + const NAME = 'Reuters Bridge'; + const URI = 'https://www.reuters.com'; + const CACHE_TIMEOUT = 1800; // 30min + const DESCRIPTION = 'Returns news from Reuters'; + + private $feedName = self::NAME; + private $useWireAPI = false; + + /** + * Wireitem types allowed in the final story output + */ + const ALLOWED_WIREITEM_TYPES = [ + 'story', + 'headlines' + ]; + + /** + * Wireitem template types allowed in the final story output + */ + const ALLOWED_TEMPLATE_TYPES = [ + 'story', + 'headlines' + ]; + + const PARAMETERS = [ + [ + 'feed' => [ + 'name' => 'News Feed', + 'type' => 'list', + 'title' => 'Feeds from Reuters U.S/International edition', + 'values' => [ + 'Top News' => 'home/topnews', + 'Fact Check' => 'chan:abtpk0vm', + 'Entertainment' => 'chan:8ym8q8dl', + 'Politics' => 'politics', + 'Wire' => 'wire', + 'Breakingviews' => '/breakingviews', + 'World' => [ + 'World' => 'world', + 'Africa' => '/world/africa', + 'Americas' => '/world/americas', + 'Asia-Pacific' => '/world/asia-pacific', + 'China' => 'china', + 'europe' => '/world/europe', + 'India' => '/world/india', + 'Middle East' => '/world/middle-east', + 'UK' => 'chan:61leiu7j', + 'USA News' => 'us', + 'The Great Reboot' => '/world/the-great-reboot', + 'Reuters Next' => '/world/reuters-next' + ], + 'Business' => [ + 'Business' => 'business', + 'Aerospace and Defense' => 'aerospace', + 'Autos Transportation' => '/business/autos-transportation', + 'Energy' => 'energy', + 'Finance' => '/business/finance', + 'Health' => 'chan:8hw7807a', + 'Media Telecom' => '/business/media-telecom', + 'Retail Consumer' => '/business/retail-consumer', + 'Sustainable Business' => '/business/sustainable-business', + 'Change Suite' => '/business/change-suite', + 'Future of Health' => '/business/future-of-health', + 'Future of Money' => '/business/future-of-money', + 'Take Five' => '/business/take-five', + 'Reuters Impact' => '/business/reuters-impact', + ], + 'Legal' => [ + 'Legal' => '/legal', + 'Government' => '/legal/government', + 'Legal Industry' => '/legal/legalindustry', + 'Litigation' => '/legal/litigation', + 'Transactional' => '/legal/transactional', + ], + 'Markets' => [ + 'Markets' => 'markets', + 'Asian Markets' => '/markets/asia', + 'Commodities' => '/markets/commodities', + 'Currencies' => '/markets/currencies', + 'Deals' => '/markets/deals', + 'European Markets' => '/markets/europe', + 'Funds' => '/markets/fund', + 'Global Market Data' => '/markets/global-market-data', + 'Rates & Bonds' => '/markets/rates-bonds', + 'Stocks' => '/markets/stocks', + 'U.S Markets' => '/markets/us', + 'Wealth' => '/markets/wealth', + 'Macro Matters' => '/markets/macromatters', + ], + 'Technology' => [ + 'Technology' => 'tech', + 'Disrupted' => '/technology/disrupted', + 'Reuters Momentum' => '/technology/reuters-momentum', + ], + 'Sports' => [ + 'Sports' => 'sports', + 'Athletics' => '/lifestyle/sports/athletics', + 'Cricket' => '/lifestyle/sports/cricket', + 'Cycling' => '/lifestyle/sports/cycling', + 'Golf' => '/lifestyle/sports/golf', + 'Motor Sports' => '/lifestyle/sports/motor-sports', + 'Soccer' => '/lifestyle/sports/soccer', + 'Tennis' => '/lifestyle/sports/tennis', + ], + 'Lifestyle' => [ + 'Lifestyle' => 'life', + 'Oddly Enough' => '/lifestyle/oddly-enough', + 'Science' => 'science', + ] + ] + ] + ] + ]; + + const BACKWARD_COMPATIBILITY = [ + 'world' => '/world', + 'china' => '/world/china', + 'chan:61leiu7j' => '/world/uk', + 'us' => '/world/us', + 'business' => '/business', + 'aerospace' => '/business/aerospace-defense', + 'energy' => '/business/energy', + 'environment' => '/business/environment', + 'chan:8hw7807a' => '/business/healthcare-pharmaceuticals', + 'markets' => '/markets', + 'tech' => '/technology', + 'sports' => '/lifestyle/sports', + 'life' => '/lifestyle', + 'science' => '/lifestyle/science', + 'home/topnews' => '/home', + ]; + + const OLD_WIRE_SECTION = [ + 'home/topnews', + 'chan:abtpk0vm', + 'chan:8ym8q8dl', + 'politics', + 'wire' + ]; + + /** + * Performs an HTTP request to the Reuters API and returns decoded JSON + * in the form of an associative array + * @param string $feed_uri Full API URL to fetch data + * @return array + */ + private function getJson($uri) + { + $returned_data = getContents($uri); + return json_decode($returned_data, true); + } + + /** + * Takes in data from Reuters Wire API and + * creates structured data in the form of a list + * of story information. + * @param array $data JSON collected from the Reuters Wire API + */ + private function processData($data) + { + /** + * Gets a list of wire items which are groups of templates + */ + $reuters_allowed_wireitems = array_filter( + $data, + function ($wireitem) { + return in_array( + $wireitem['wireitem_type'], + self::ALLOWED_WIREITEM_TYPES + ); + } + ); + + /* + * Gets a list of "Templates", which is data containing a story + */ + $reuters_wireitem_templates = array_reduce( + $reuters_allowed_wireitems, + function (array $carry, array $wireitem) { + $wireitem_templates = $wireitem['templates']; + return array_merge( + $carry, + array_filter( + $wireitem_templates, + function ( + array $template_data + ) { + return in_array( + $template_data['type'], + self::ALLOWED_TEMPLATE_TYPES + ); + } + ) + ); + }, + [] + ); + + return $reuters_wireitem_templates; + } + + private function getSectionEndpoint() + { + $endpoint = $this->getInput('feed'); + if (isset(self::BACKWARD_COMPATIBILITY[$endpoint])) { + $endpoint = self::BACKWARD_COMPATIBILITY[$endpoint]; + } elseif (in_array($endpoint, self::OLD_WIRE_SECTION)) { + $this->useWireAPI = true; + } + return $endpoint; + } + + /** + * @param string $endpoint - A endpoint is provided could be article URI or ID. + * @param string $fetch_type - Provide what kind of fetch do you want? Article or Section. + * @param boolean $is_article_uid {true|false} - A boolean flag to determined if using UID instead of url to fetch. + * @return string A completed API URL to fetch data + */ + private function getAPIURL($endpoint, $fetch_type, $is_article_uid = false) + { + $base_url = self::URI . '/pf/api/v3/content/fetch/'; + $wire_url = 'https://wireapi.reuters.com/v8'; + switch ($fetch_type) { + case 'article': + if ($this->useWireAPI) { + return $wire_url . $endpoint; + } + + $base_query = [ + 'website' => 'reuters', + ]; + $query = []; + + if ($is_article_uid) { + $query = [ + 'id' => $endpoint + ]; + } else { + $query = [ + 'website_url' => $endpoint, + ]; + } + + $query = array_merge($base_query, $query); + $json_query = json_encode($query); + return $base_url . 'article-by-id-or-url-v1?query=' . $json_query; + break; + case 'section': + if ($this->useWireAPI) { + if (strpos($endpoint, 'chan:') !== false) { + // Now checking whether that feed has unique ID or not. + $feed_uri = "/feed/rapp/us/wirefeed/$endpoint"; + } else { + $feed_uri = "/feed/rapp/us/tabbar/feeds/$endpoint"; + } + return $wire_url . $feed_uri; + } + $query = [ + 'section_id' => $endpoint, + 'size' => 30, + 'website' => 'reuters' + ]; + + if ($endpoint != '/home') { + $query = array_merge($query, [ + 'fetch_type' => 'section', + ]); + } + + $json_query = json_encode($query); + return $base_url . 'articles-by-section-alias-or-id-v1?query=' . $json_query; + break; + } + returnServerError('unsupported endpoint'); + } + + private function addStories($title, $content, $timestamp, $author, $url, $category) + { + $item = []; + $item['categories'] = $category; + $item['author'] = $author; + $item['content'] = $content; + $item['title'] = $title; + $item['timestamp'] = $timestamp; + $item['uri'] = $url; + $this->items[] = $item; + } + + private function getArticle($feed_uri, $is_article_uid = false) + { + // This will make another request to API to get full detail of article and author's name. + $url = $this->getAPIURL($feed_uri, 'article', $is_article_uid); + $rawData = $this->getJson($url); + + if (json_last_error() != JSON_ERROR_NONE) { // Checking whether a valid JSON or not + return $this->handleRedirectedArticle($url); + } + + $article_content = ''; + $authorlist = ''; + $category = []; + $image_list = []; + $published_at = ''; + if ($this->useWireAPI) { + $reuters_wireitems = $rawData['wireitems']; + $processedData = $this->processData($reuters_wireitems); + + $first = reset($processedData); + $article_content = $first['story']['body_items']; + $authorlist = $first['story']['authors']; + $category = [$first['story']['channel']['name']]; + $image_list = $first['story']['images']; + $published_at = $first['story']['published_at']; + } else { + $article_content = $rawData['result']['content_elements']; + $authorlist = $rawData['result']['authors']; + $category = [$rawData['result']['taxonomy']['ads_primary_section']['name']]; + $image_list = []; + if (!empty($rawData['result']['related_content']['galleries'])) { + $galleries = $rawData['result']['related_content']['galleries']; + foreach ($galleries as $gallery) { + $image_list = array_merge($image_list, $gallery['content_elements']); + } + } elseif (!empty($rawData['result']['related_content']['images'])) { + $image_list = $rawData['result']['related_content']['images']; + } + $published_at = $rawData['result']['published_time']; + } + + $content_detail = [ + 'content' => $this->handleArticleContent($article_content), + 'author' => $this->handleAuthorName($authorlist), + 'category' => $category, + 'images' => $this->handleImage($image_list), + 'published_at' => $published_at + ]; + return $content_detail; + } + + private function handleRedirectedArticle($url) + { + $html = getSimpleHTMLDOMCached($url, 86400); // Duration 24h + + $description = ''; + $author = ''; + $images = ''; + $meta_items = $html->find('meta'); + foreach ($meta_items as $meta) { + switch ($meta->name) { + case 'description': + $description = $meta->content; + break; + case 'author': + case 'twitter:creator': + $author = $meta->content; + break; + case 'twitter:image:src': + case 'twitter:image': + $url = $meta->content; + $images = "<img src=$url" . '>'; + break; + } + } + + return [ + 'content' => $description, + 'author' => $author, + 'category' => '', + 'images' => $images, + 'published_at' => '', + 'status' => 'redirected' + ]; + } + + private function handleImage($images) + { + $img_placeholder = ''; + + foreach ($images as $image) { // Add more image to article. + $image_url = $image['url']; + $image_caption = $image['caption']; + $image_alt_text = ''; + if (isset($image['alt_text'])) { + $image_alt_text = $image['alt_text']; + } else { + $image_alt_text = $image_caption; + } + $img = "<img src=\"$image_url\" alt=\"$image_alt_text\">"; + $img_caption = "<figcaption style=\"text-align: center;\"><i>$image_caption</i></figcaption>"; + $figure = "<figure>$img \t $img_caption</figure>"; + $img_placeholder = $img_placeholder . $figure; + } + + return $img_placeholder; + } + + private function handleAuthorName($authors) + { + $author = ''; + $counter = 0; + foreach ($authors as $data) { + //Formatting author's name. + $name = $data['name']; + $counter++; + if ($counter == count($authors)) { + $author .= $name; + } else { + $author .= $name . ', '; + } + } + return $author; + } + + private function handleArticleContent($contents) + { + $description = ''; + foreach ($contents as $content) { + $data; + if (isset($content['content'])) { + $data = $content['content']; + } + switch ($content['type']) { + case 'paragraph': + $description = $description . "<p>$data</p>"; + break; + case 'heading': + $description = $description . "<h3>$data</h3>"; + break; + case 'infographics': + $description = $description . "<img src=\"$data\">"; + break; + case 'inline_items': + $item_list = $content['items']; + $description = $description . '<p>'; + foreach ($item_list as $item) { + if ($item['type'] == 'text') { + $description = $description . $item['content']; + } else { + $description = $description . $item['symbol']; + } + } + $description = $description . '</p>'; + break; + case 'p_table': + $description = $description . $content['content']; + break; + case 'upstream_embed': + $media_type = $content['media_type']; + $cid = $content['cid']; + $embed = ''; + switch ($media_type) { + case 'tweet': + try { + $tweet_url = "https://twitter.com/dummyname/statuses/$cid"; + $get_embed_url = 'https://publish.twitter.com/oembed?url=' + . urlencode($tweet_url) . + '&partner=&hide_thread=false'; + $oembed_json = json_decode(getContents($get_embed_url), true); + $embed .= $oembed_json['html']; + } catch (Exception $e) { // In case not found any tweet. + $embed .= ''; + } + break; + case 'instagram': + $url = "https://instagram.com/p/$cid/media/?size=l"; + $embed .= <<<EOD <img src="{$url}" alt="instagram-image-$cid" > EOD; - break; - case 'youtube': - $url = "https://www.youtube.com/embed/$cid"; - $embed .= <<<EOD + break; + case 'youtube': + $url = "https://www.youtube.com/embed/$cid"; + $embed .= <<<EOD <iframe width="560" height="315" @@ -477,151 +487,152 @@ EOD; > </iframe> EOD; - break; - } - $description .= $embed; - break; - case 'social_media': - if ($content['sub_type'] == 'twitter') { - $description .= $content['html']; - } - break; - case 'table': - $table = '<table>'; - $theaders = $content['header']; - $tr = '<tr>'; - foreach($theaders as $header) { - $tr .= '<th>' . $header . '</th>'; - } - $tr .= '</tr>'; - $table .= $tr; - $rows = $content['rows']; - foreach($rows as $row) { - $tr = '<tr>'; - foreach($row as $data) { - $tr .= '<td>' . $data . '</td>'; - } - $tr .= '</tr>'; - $table .= $tr; - } - $table .= '</table>'; - $description .= $table; - break; - case 'image': - $description .= $this->handleImage(array($content)); - } - } - - return $description; - } - - /** - * @param array $stories - */ - private function addRelatedStories($stories) { - foreach($stories as $story) { - $story_data = $this->getArticle($story['url']); - $title = $story['caption']; - $url = self::URI . $story['url']; - if(isset($story_data['status']) && $story_data['status'] != 'redirected') { - $article_body = defaultLinkTo($story_data['content'], $this->getURI()); - } else { - $article_body = $story_data['content']; - } - $content = $article_body . $story_data['images']; - $timestamp = $story_data['published_at']; - $category = $story_data['category']; - $author = $story_data['author']; - $this->addStories($title, $content, $timestamp, $author, $url, $category); - } - } - - public function getName() { - return $this->feedName; - } - - public function collectData() - { - $endpoint = $this->getSectionEndpoint(); - $url = $this->getAPIURL($endpoint, 'section'); - $data = $this->getJson($url); - - $stories = array(); - $section_name = ''; - if($this->useWireAPI) { - $reuters_wireitems = $data['wireitems']; - $section_name = $data['wire_name']; - $processedData = $this->processData($reuters_wireitems); - - // Merge all articles from Editor's Highlight section into existing array of templates. - $top_section = reset($processedData); - if ($top_section['type'] == 'headlines') { - $top_section = array_shift($processedData); - $articles = $top_section['headlines']; - $processedData = array_merge($articles, $processedData); - } - $stories = $processedData; - } else { - $section_name = $data['result']['section']['name']; - if(isset($data['arcResult']['articles'])) { - $stories = $data['arcResult']['articles']; - } else { - $stories = $data['result']['articles']; - } - } - $this->feedName = $section_name . ' | Reuters'; - - foreach ($stories as $story) { - $uid = ''; - $author = ''; - $category = array(); - $content = ''; - $title = ''; - $timestamp = ''; - $url = ''; - $article_uri = ''; - $source_type = ''; - if($this->useWireAPI) { - $uid = $story['story']['usn']; - $article_uri = $story['template_action']['api_path']; - $title = $story['story']['hed']; - $url = $story['template_action']['url']; - } else { - $uid = $story['id']; - $url = self::URI . $story['canonical_url']; - $title = $story['title']; - $article_uri = $story['canonical_url']; - $source_type = $story['source']['name']; - if (isset($story['related_stories'])) { - $this->addRelatedStories($story['related_stories']); - } - } - - // Some article cause unexpected behaviour like redirect to another site not API. - // Attempt to check article source type to avoid this. - if(!$this->useWireAPI && $source_type != 'Package') { // Only Reuters PF api have this, Wire don't. - $author = $this->handleAuthorName($story['authors']); - $timestamp = $story['published_time']; - $image_placeholder = ''; - if (isset($story['thumbnail'])) { - $image_placeholder = $this->handleImage(array($story['thumbnail'])); - } - $content = $story['description'] . $image_placeholder; - $category = array($story['primary_section']['name']); - } else { - $content_detail = $this->getArticle($article_uri); - $description = $content_detail['content']; - $description = defaultLinkTo($description, $this->getURI()); - - $author = $content_detail['author']; - $images = $content_detail['images']; - $category = $content_detail['category']; - $content = "$description $images"; - $timestamp = $content_detail['published_at']; - } - - $this->addStories($title, $content, $timestamp, $author, $url, $category); - - } - } + break; + } + $description .= $embed; + break; + case 'social_media': + if ($content['sub_type'] == 'twitter') { + $description .= $content['html']; + } + break; + case 'table': + $table = '<table>'; + $theaders = $content['header']; + $tr = '<tr>'; + foreach ($theaders as $header) { + $tr .= '<th>' . $header . '</th>'; + } + $tr .= '</tr>'; + $table .= $tr; + $rows = $content['rows']; + foreach ($rows as $row) { + $tr = '<tr>'; + foreach ($row as $data) { + $tr .= '<td>' . $data . '</td>'; + } + $tr .= '</tr>'; + $table .= $tr; + } + $table .= '</table>'; + $description .= $table; + break; + case 'image': + $description .= $this->handleImage([$content]); + } + } + + return $description; + } + + /** + * @param array $stories + */ + private function addRelatedStories($stories) + { + foreach ($stories as $story) { + $story_data = $this->getArticle($story['url']); + $title = $story['caption']; + $url = self::URI . $story['url']; + if (isset($story_data['status']) && $story_data['status'] != 'redirected') { + $article_body = defaultLinkTo($story_data['content'], $this->getURI()); + } else { + $article_body = $story_data['content']; + } + $content = $article_body . $story_data['images']; + $timestamp = $story_data['published_at']; + $category = $story_data['category']; + $author = $story_data['author']; + $this->addStories($title, $content, $timestamp, $author, $url, $category); + } + } + + public function getName() + { + return $this->feedName; + } + + public function collectData() + { + $endpoint = $this->getSectionEndpoint(); + $url = $this->getAPIURL($endpoint, 'section'); + $data = $this->getJson($url); + + $stories = []; + $section_name = ''; + if ($this->useWireAPI) { + $reuters_wireitems = $data['wireitems']; + $section_name = $data['wire_name']; + $processedData = $this->processData($reuters_wireitems); + + // Merge all articles from Editor's Highlight section into existing array of templates. + $top_section = reset($processedData); + if ($top_section['type'] == 'headlines') { + $top_section = array_shift($processedData); + $articles = $top_section['headlines']; + $processedData = array_merge($articles, $processedData); + } + $stories = $processedData; + } else { + $section_name = $data['result']['section']['name']; + if (isset($data['arcResult']['articles'])) { + $stories = $data['arcResult']['articles']; + } else { + $stories = $data['result']['articles']; + } + } + $this->feedName = $section_name . ' | Reuters'; + + foreach ($stories as $story) { + $uid = ''; + $author = ''; + $category = []; + $content = ''; + $title = ''; + $timestamp = ''; + $url = ''; + $article_uri = ''; + $source_type = ''; + if ($this->useWireAPI) { + $uid = $story['story']['usn']; + $article_uri = $story['template_action']['api_path']; + $title = $story['story']['hed']; + $url = $story['template_action']['url']; + } else { + $uid = $story['id']; + $url = self::URI . $story['canonical_url']; + $title = $story['title']; + $article_uri = $story['canonical_url']; + $source_type = $story['source']['name']; + if (isset($story['related_stories'])) { + $this->addRelatedStories($story['related_stories']); + } + } + + // Some article cause unexpected behaviour like redirect to another site not API. + // Attempt to check article source type to avoid this. + if (!$this->useWireAPI && $source_type != 'Package') { // Only Reuters PF api have this, Wire don't. + $author = $this->handleAuthorName($story['authors']); + $timestamp = $story['published_time']; + $image_placeholder = ''; + if (isset($story['thumbnail'])) { + $image_placeholder = $this->handleImage([$story['thumbnail']]); + } + $content = $story['description'] . $image_placeholder; + $category = [$story['primary_section']['name']]; + } else { + $content_detail = $this->getArticle($article_uri); + $description = $content_detail['content']; + $description = defaultLinkTo($description, $this->getURI()); + + $author = $content_detail['author']; + $images = $content_detail['images']; + $category = $content_detail['category']; + $content = "$description $images"; + $timestamp = $content_detail['published_at']; + } + + $this->addStories($title, $content, $timestamp, $author, $url, $category); + } + } } |