diff options
author | 2023-03-11 23:26:22 +0100 | |
---|---|---|
committer | 2023-03-11 23:26:22 +0100 | |
commit | 19fc2dc10019f0ec777a67cf895a4b62538c157d (patch) | |
tree | 2604e00975d31d90bc610faf34ad2f829de5525d | |
parent | 2c94791bcda06a3bd82a30ca024a03696686c1d2 (diff) | |
download | rss-bridge-19fc2dc10019f0ec777a67cf895a4b62538c157d.tar.gz rss-bridge-19fc2dc10019f0ec777a67cf895a4b62538c157d.tar.zst rss-bridge-19fc2dc10019f0ec777a67cf895a4b62538c157d.zip |
[GatesNotesBridge] Fix bridge (fix #3294) (#3305)
-rw-r--r-- | bridges/GatesNotesBridge.php | 74 |
1 files changed, 60 insertions, 14 deletions
diff --git a/bridges/GatesNotesBridge.php b/bridges/GatesNotesBridge.php index 8c988fcb..c7b7c37f 100644 --- a/bridges/GatesNotesBridge.php +++ b/bridges/GatesNotesBridge.php @@ -1,6 +1,6 @@ <?php -class GatesNotesBridge extends FeedExpander +class GatesNotesBridge extends BridgeAbstract { const MAINTAINER = 'corenting'; const NAME = 'Gates Notes'; @@ -8,14 +8,51 @@ class GatesNotesBridge extends FeedExpander const DESCRIPTION = 'Returns the newest articles.'; const CACHE_TIMEOUT = 21600; // 6h - protected function parseItem($item) + public function collectData() + { + $params = [ + 'validYearsString' => 'all', + 'setNumber' => '0', + 'sortByVideo' => 'all', + 'sortByTopic' => 'all' + ]; + $api_endpoint = '/api/TGNWebAPI/Get_Filtered_Article_Set?'; + $apiUrl = self::URI . $api_endpoint . http_build_query($params); + + $rawContent = getContents($apiUrl); + $cleanedContent = str_replace('\r\n', '', substr($rawContent, 1, -1)); + $cleanedContent = str_replace('\"', '"', $cleanedContent); + + // The content is actually a json between quotes with \r\n inserted + $json = json_decode($cleanedContent); + + foreach ($json as $article) { + $item = []; + + $articleUri = self::URI . '/' . $article->{'_system_'}->name; + + $item['uri'] = $articleUri; + $item['title'] = $article->headline; + $item['content'] = self::getItemContent($articleUri); + $item['timestamp'] = strtotime($article->date); + + $this->items[] = $item; + } + } + + protected function getItemContent($articleUri) { - $item = parent::parseItem($item); + // We need to change the headers as the normal desktop website + // use canvas-based image carousels for some pictures + $headers = [ + 'User-Agent: Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)', + ]; + $article_html = getSimpleHTMLDOMCached($articleUri, 86400, $headers); - $article_html = getSimpleHTMLDOMCached($item['uri']); + $content = ''; if (!$article_html) { - $item['content'] .= '<p><em>Could not request ' . $this->getName() . ': ' . $item['uri'] . '</em></p>'; - return $item; + $content .= '<p><em>Could not request ' . $this->getName() . ': ' . $articleUri . '</em></p>'; + return $content; } $article_html = defaultLinkTo($article_html, $this->getURI()); @@ -23,6 +60,20 @@ class GatesNotesBridge extends FeedExpander $hero_image = '<img src=' . $article_html->find('img.article_top_DMT_Image', 0)->getAttribute('data-src') . '>'; $article_body = $article_html->find('div.TGN_Article_ReadTimeSection', 0); + + // Remove the menu bar on some articles (PDF download etc.) + foreach ($article_body->find('.TGN_MenuHolder') as $found) { + $found->remove(); + } + + // For the carousels pictures, we still to remove the lazy-loading and force the real picture + foreach ($article_body->find('canvas') as $found) { + $found->remove(); + } + foreach ($article_body->find('.TGN_PE_C_Img') as $found) { + $found->setAttribute('src', $found->getAttribute('data-src')); + } + // Convert iframe of Youtube videos to link foreach ($article_body->find('iframe') as $found) { $iframeUrl = $found->getAttribute('src'); @@ -32,6 +83,7 @@ class GatesNotesBridge extends FeedExpander $found->outertext = '<p><a href="' . $iframeUrl . '">' . $text . '</a></p>'; } } + // Remove <link> CSS ressources foreach ($article_body->find('link') as $found) { $linkedRessourceUrl = $found->getAttribute('href'); @@ -42,14 +94,8 @@ class GatesNotesBridge extends FeedExpander } $article_body = sanitize($article_body->innertext); - $item['content'] = $top_description . $hero_image . $article_body; - - return $item; - } + $content = $top_description . $hero_image . $article_body; - public function collectData() - { - $feed = static::URI . '/rss'; - $this->collectExpandableDatas($feed); + return $content; } } |