aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGravatar Scott Colby <scolby33@users.noreply.github.com> 2023-09-15 17:41:08 -0400
committerGravatar GitHub <noreply@github.com> 2023-09-15 23:41:08 +0200
commitcf7e3eea5612f3466f297883395723e5b325fd1e (patch)
tree5977b8082006c0bd56d9ba9b1322f35789092d64
parent3b91b1d260f8fc9fd725b9797a3a149e85d408d0 (diff)
downloadrss-bridge-cf7e3eea5612f3466f297883395723e5b325fd1e.tar.gz
rss-bridge-cf7e3eea5612f3466f297883395723e5b325fd1e.tar.zst
rss-bridge-cf7e3eea5612f3466f297883395723e5b325fd1e.zip
Add DeutscheWelle FeedExpander bridge. (#3673)
* [DeutscheWelle] Add DeutscheWelle FeedExpander bridge. * [DeutscheWelle] Fix linting errors.
-rw-r--r--bridges/DeutscheWelleBridge.php143
1 files changed, 143 insertions, 0 deletions
diff --git a/bridges/DeutscheWelleBridge.php b/bridges/DeutscheWelleBridge.php
new file mode 100644
index 00000000..2e10d670
--- /dev/null
+++ b/bridges/DeutscheWelleBridge.php
@@ -0,0 +1,143 @@
+<?php
+
+class DeutscheWelleBridge extends FeedExpander
+{
+ const MAINTAINER = 'No maintainer';
+ const NAME = 'Deutsche Welle Bridge';
+ const URI = 'https://www.dw.com';
+ const DESCRIPTION = 'Returns the full articles instead of only the intro';
+ const CACHE_TIMEOUT = 3600;
+ const PARAMETERS = [[
+ 'feed' => [
+ 'name' => 'feed',
+ 'type' => 'list',
+ 'values' => [
+ 'All Top Stories and News Updates'
+ => 'http://rss.dw.com/atom/rss-en-all',
+ 'Top Stories'
+ => 'http://rss.dw.com/atom/rss-en-top',
+ 'Germany'
+ => 'http://rss.dw.com/atom/rss-en-ger',
+ 'World'
+ => 'http://rss.dw.com/atom/rss-en-world',
+ 'Europe'
+ => 'http://rss.dw.com/atom/rss-en-eu',
+ 'Business'
+ => 'http://rss.dw.com/atom/rss-en-bus',
+ 'Science'
+ => 'http://rss.dw.com/atom/rss_en_science',
+ 'Environment'
+ => 'http://rss.dw.com/atom/rss_en_environment',
+ 'Culture & Lifestyle'
+ => 'http://rss.dw.com/atom/rss-en-cul',
+ 'Sports'
+ => 'http://rss.dw.de/atom/rss-en-sports',
+ 'Visit Germany'
+ => 'http://rss.dw.com/atom/rss-en-visitgermany',
+ 'Asia'
+ => 'http://rss.dw.com/atom/rss-en-asia',
+ 'Deutsche Welle Gesamt'
+ => 'http://rss.dw.com/atom/rss-de-all',
+ 'Themen des Tages'
+ => 'http://rss.dw.com/atom/rss-de-top',
+ 'Nachrichten'
+ => 'http://rss.dw.com/atom/rss-de-news',
+ 'Wissenschaft'
+ => 'http://rss.dw.com/atom/rss-de-wissenschaft',
+ 'Sport'
+ => 'http://rss.dw.com/atom/rss-de-sport',
+ 'Deutschland entdecken'
+ => 'http://rss.dw.com/atom/rss-de-deutschlandentdecken',
+ 'Presse'
+ => 'http://rss.dw.com/atom/presse',
+ 'Politik'
+ => 'http://rss.dw.com/atom/rss_de_politik',
+ 'Wirtschaft'
+ => 'http://rss.dw.com/atom/rss-de-eco',
+ 'Kultur & Leben'
+ => 'http://rss.dw.com/atom/rss-de-cul',
+ 'Kultur & Leben: Buch'
+ => 'http://rss.dw.com/atom/rss-de-cul-buch',
+ 'Kultur & Leben: Film'
+ => 'http://rss.dw.com/atom/rss-de-cul-film',
+ 'Kultur & Leben: Musik'
+ => 'http://rss.dw.com/atom/rss-de-cul-musik',
+ ]
+ ]
+ ]];
+
+ public function collectData()
+ {
+ $this->collectExpandableDatas($this->getInput('feed'));
+ }
+
+ protected function parseItem($item)
+ {
+ $item = parent::parseItem($item);
+
+ $parsedUrl = parse_url($item['uri']);
+ unset($parsedUrl['query']);
+ $url = $this->unparseUrl($parsedUrl);
+
+ $page = getSimpleHTMLDOM($url);
+ $page = defaultLinkTo($page, $url);
+
+ $article = $page->find('article', 0);
+
+ // author
+ $author = $article->find('.author-link > span', 0);
+ if ($author) {
+ $item['author'] = $author->text();
+ }
+
+ $teaser = $article->find('.teaser-text', 0);
+ if (!is_null($teaser)) {
+ $item['content'] = $teaser->outertext();
+ } else {
+ $item['content'] = '';
+ }
+
+ // remove unneeded elements
+ foreach (
+ $article->find(
+ 'header, .advertisement, [data-tracking-name="sharing-icons-inline"], a.external-link > svg, picture > source, .vjs-wrapper, .dw-widget, footer'
+ ) as $bad
+ ) {
+ $bad->remove();
+ }
+ // reload html as remove() is buggy
+ $article = str_get_html($article->outertext());
+
+ // remove width and height values from img tags
+ foreach ($article->find('img') as $img) {
+ $img->width = null;
+ $img->height = null;
+ }
+
+ // replace lazy-loaded images
+ foreach ($article->find('figure.placeholder-image') as $figure) {
+ $img = $figure->find('img', 0);
+ $img->src = str_replace('${formatId}', '906', $img->getAttribute('data-url'));
+ $img->style = null;
+ }
+
+ $item['content'] .= $article->save();
+
+ return $item;
+ }
+
+ // https://www.php.net/manual/en/function.parse-url.php#106731
+ private function unparseUrl($parsed_url)
+ {
+ $scheme = isset($parsed_url['scheme']) ? $parsed_url['scheme'] . '://' : '';
+ $host = isset($parsed_url['host']) ? $parsed_url['host'] : '';
+ $port = isset($parsed_url['port']) ? ':' . $parsed_url['port'] : '';
+ $user = isset($parsed_url['user']) ? $parsed_url['user'] : '';
+ $pass = isset($parsed_url['pass']) ? $parsed_url['pass'] : '';
+ $pass = ($user || $pass) ? "$pass@" : '';
+ $path = isset($parsed_url['path']) ? $parsed_url['path'] : '';
+ $query = isset($parsed_url['query']) ? '?' . $parsed_url['query'] : '';
+ $fragment = isset($parsed_url['fragment']) ? '#' . $parsed_url['fragment'] : '';
+ return "$scheme$user$pass$host$port$path$query$fragment";
+ }
+}