aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGravatar Corentin Garcia <corenting@gmail.com> 2023-03-11 23:26:22 +0100
committerGravatar GitHub <noreply@github.com> 2023-03-11 23:26:22 +0100
commit19fc2dc10019f0ec777a67cf895a4b62538c157d (patch)
tree2604e00975d31d90bc610faf34ad2f829de5525d
parent2c94791bcda06a3bd82a30ca024a03696686c1d2 (diff)
downloadrss-bridge-19fc2dc10019f0ec777a67cf895a4b62538c157d.tar.gz
rss-bridge-19fc2dc10019f0ec777a67cf895a4b62538c157d.tar.zst
rss-bridge-19fc2dc10019f0ec777a67cf895a4b62538c157d.zip
[GatesNotesBridge] Fix bridge (fix #3294) (#3305)
-rw-r--r--bridges/GatesNotesBridge.php74
1 files changed, 60 insertions, 14 deletions
diff --git a/bridges/GatesNotesBridge.php b/bridges/GatesNotesBridge.php
index 8c988fcb..c7b7c37f 100644
--- a/bridges/GatesNotesBridge.php
+++ b/bridges/GatesNotesBridge.php
@@ -1,6 +1,6 @@
<?php
-class GatesNotesBridge extends FeedExpander
+class GatesNotesBridge extends BridgeAbstract
{
const MAINTAINER = 'corenting';
const NAME = 'Gates Notes';
@@ -8,14 +8,51 @@ class GatesNotesBridge extends FeedExpander
const DESCRIPTION = 'Returns the newest articles.';
const CACHE_TIMEOUT = 21600; // 6h
- protected function parseItem($item)
+ public function collectData()
+ {
+ $params = [
+ 'validYearsString' => 'all',
+ 'setNumber' => '0',
+ 'sortByVideo' => 'all',
+ 'sortByTopic' => 'all'
+ ];
+ $api_endpoint = '/api/TGNWebAPI/Get_Filtered_Article_Set?';
+ $apiUrl = self::URI . $api_endpoint . http_build_query($params);
+
+ $rawContent = getContents($apiUrl);
+ $cleanedContent = str_replace('\r\n', '', substr($rawContent, 1, -1));
+ $cleanedContent = str_replace('\"', '"', $cleanedContent);
+
+ // The content is actually a json between quotes with \r\n inserted
+ $json = json_decode($cleanedContent);
+
+ foreach ($json as $article) {
+ $item = [];
+
+ $articleUri = self::URI . '/' . $article->{'_system_'}->name;
+
+ $item['uri'] = $articleUri;
+ $item['title'] = $article->headline;
+ $item['content'] = self::getItemContent($articleUri);
+ $item['timestamp'] = strtotime($article->date);
+
+ $this->items[] = $item;
+ }
+ }
+
+ protected function getItemContent($articleUri)
{
- $item = parent::parseItem($item);
+ // We need to change the headers as the normal desktop website
+ // use canvas-based image carousels for some pictures
+ $headers = [
+ 'User-Agent: Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)',
+ ];
+ $article_html = getSimpleHTMLDOMCached($articleUri, 86400, $headers);
- $article_html = getSimpleHTMLDOMCached($item['uri']);
+ $content = '';
if (!$article_html) {
- $item['content'] .= '<p><em>Could not request ' . $this->getName() . ': ' . $item['uri'] . '</em></p>';
- return $item;
+ $content .= '<p><em>Could not request ' . $this->getName() . ': ' . $articleUri . '</em></p>';
+ return $content;
}
$article_html = defaultLinkTo($article_html, $this->getURI());
@@ -23,6 +60,20 @@ class GatesNotesBridge extends FeedExpander
$hero_image = '<img src=' . $article_html->find('img.article_top_DMT_Image', 0)->getAttribute('data-src') . '>';
$article_body = $article_html->find('div.TGN_Article_ReadTimeSection', 0);
+
+ // Remove the menu bar on some articles (PDF download etc.)
+ foreach ($article_body->find('.TGN_MenuHolder') as $found) {
+ $found->remove();
+ }
+
+ // For the carousels pictures, we still to remove the lazy-loading and force the real picture
+ foreach ($article_body->find('canvas') as $found) {
+ $found->remove();
+ }
+ foreach ($article_body->find('.TGN_PE_C_Img') as $found) {
+ $found->setAttribute('src', $found->getAttribute('data-src'));
+ }
+
// Convert iframe of Youtube videos to link
foreach ($article_body->find('iframe') as $found) {
$iframeUrl = $found->getAttribute('src');
@@ -32,6 +83,7 @@ class GatesNotesBridge extends FeedExpander
$found->outertext = '<p><a href="' . $iframeUrl . '">' . $text . '</a></p>';
}
}
+
// Remove <link> CSS ressources
foreach ($article_body->find('link') as $found) {
$linkedRessourceUrl = $found->getAttribute('href');
@@ -42,14 +94,8 @@ class GatesNotesBridge extends FeedExpander
}
$article_body = sanitize($article_body->innertext);
- $item['content'] = $top_description . $hero_image . $article_body;
-
- return $item;
- }
+ $content = $top_description . $hero_image . $article_body;
- public function collectData()
- {
- $feed = static::URI . '/rss';
- $this->collectExpandableDatas($feed);
+ return $content;
}
}