aboutsummaryrefslogtreecommitdiff
path: root/bridges/EconomistBridge.php
diff options
context:
space:
mode:
Diffstat (limited to 'bridges/EconomistBridge.php')
-rw-r--r--bridges/EconomistBridge.php270
1 files changed, 138 insertions, 132 deletions
diff --git a/bridges/EconomistBridge.php b/bridges/EconomistBridge.php
index 973711a5..79314a0d 100644
--- a/bridges/EconomistBridge.php
+++ b/bridges/EconomistBridge.php
@@ -1,143 +1,149 @@
<?php
-class EconomistBridge extends FeedExpander {
- const MAINTAINER = 'bockiii';
- const NAME = 'Economist Bridge';
- const URI = 'https://www.economist.com/';
- const CACHE_TIMEOUT = 3600; //1hour
- const DESCRIPTION = 'Returns the latest articles for the selected category';
+class EconomistBridge extends FeedExpander
+{
+ const MAINTAINER = 'bockiii';
+ const NAME = 'Economist Bridge';
+ const URI = 'https://www.economist.com/';
+ const CACHE_TIMEOUT = 3600; //1hour
+ const DESCRIPTION = 'Returns the latest articles for the selected category';
- const PARAMETERS = array(
- 'global' => array(
- 'limit' => array(
- 'name' => 'Feed Item Limit',
- 'required' => true,
- 'type' => 'number',
- 'defaultValue' => 10,
- 'title' => 'Maximum number of returned feed items. Maximum 30, default 10'
- )
- ),
- 'Topics' => array(
- 'topic' => array(
- 'name' => 'Topics',
- 'type' => 'list',
- 'title' => 'Select a Topic',
- 'defaultValue' => 'latest',
- 'values' => array(
- 'Latest' => 'latest',
- 'The world this week' => 'the-world-this-week',
- 'Letters' => 'letters',
- 'Leaders' => 'leaders',
- 'Briefings' => 'briefing',
- 'Special reports' => 'special-report',
- 'Britain' => 'britain',
- 'Europe' => 'europe',
- 'United States' => 'united-states',
- 'The Americas' => 'the-americas',
- 'Middle East and Africa' => 'middle-east-and-africa',
- 'Asia' => 'asia',
- 'China' => 'china',
- 'International' => 'international',
- 'Business' => 'business',
- 'Finance and economics' => 'finance-and-economics',
- 'Science and technology' => 'science-and-technology',
- 'Books and arts' => 'books-and-arts',
- 'Obituaries' => 'obituary',
- 'Graphic detail' => 'graphic-detail',
- 'Indicators' => 'economic-and-financial-indicators',
- )
- )
- ),
- 'Blogs' => array(
- 'blog' => array(
- 'name' => 'Blogs',
- 'type' => 'list',
- 'title' => 'Select a Blog',
- 'values' => array(
- 'Bagehots notebook' => 'bagehots-notebook',
- 'Bartleby' => 'bartleby',
- 'Buttonwoods notebook' => 'buttonwoods-notebook',
- 'Charlemagnes notebook' => 'charlemagnes-notebook',
- 'Democracy in America' => 'democracy-in-america',
- 'Erasmus' => 'erasmus',
- 'Free exchange' => 'free-exchange',
- 'Game theory' => 'game-theory',
- 'Gulliver' => 'gulliver',
- 'Kaffeeklatsch' => 'kaffeeklatsch',
- 'Prospero' => 'prospero',
- 'The Economist Explains' => 'the-economist-explains',
- )
- )
- )
- );
+ const PARAMETERS = [
+ 'global' => [
+ 'limit' => [
+ 'name' => 'Feed Item Limit',
+ 'required' => true,
+ 'type' => 'number',
+ 'defaultValue' => 10,
+ 'title' => 'Maximum number of returned feed items. Maximum 30, default 10'
+ ]
+ ],
+ 'Topics' => [
+ 'topic' => [
+ 'name' => 'Topics',
+ 'type' => 'list',
+ 'title' => 'Select a Topic',
+ 'defaultValue' => 'latest',
+ 'values' => [
+ 'Latest' => 'latest',
+ 'The world this week' => 'the-world-this-week',
+ 'Letters' => 'letters',
+ 'Leaders' => 'leaders',
+ 'Briefings' => 'briefing',
+ 'Special reports' => 'special-report',
+ 'Britain' => 'britain',
+ 'Europe' => 'europe',
+ 'United States' => 'united-states',
+ 'The Americas' => 'the-americas',
+ 'Middle East and Africa' => 'middle-east-and-africa',
+ 'Asia' => 'asia',
+ 'China' => 'china',
+ 'International' => 'international',
+ 'Business' => 'business',
+ 'Finance and economics' => 'finance-and-economics',
+ 'Science and technology' => 'science-and-technology',
+ 'Books and arts' => 'books-and-arts',
+ 'Obituaries' => 'obituary',
+ 'Graphic detail' => 'graphic-detail',
+ 'Indicators' => 'economic-and-financial-indicators',
+ ]
+ ]
+ ],
+ 'Blogs' => [
+ 'blog' => [
+ 'name' => 'Blogs',
+ 'type' => 'list',
+ 'title' => 'Select a Blog',
+ 'values' => [
+ 'Bagehots notebook' => 'bagehots-notebook',
+ 'Bartleby' => 'bartleby',
+ 'Buttonwoods notebook' => 'buttonwoods-notebook',
+ 'Charlemagnes notebook' => 'charlemagnes-notebook',
+ 'Democracy in America' => 'democracy-in-america',
+ 'Erasmus' => 'erasmus',
+ 'Free exchange' => 'free-exchange',
+ 'Game theory' => 'game-theory',
+ 'Gulliver' => 'gulliver',
+ 'Kaffeeklatsch' => 'kaffeeklatsch',
+ 'Prospero' => 'prospero',
+ 'The Economist Explains' => 'the-economist-explains',
+ ]
+ ]
+ ]
+ ];
- public function collectData(){
- // get if topics or blogs were selected and store the selected category
- switch ($this->queriedContext) {
- case 'Topics':
- $category = $this->getInput('topic');
- break;
- case 'Blogs':
- $category = $this->getInput('blog');
- break;
- default:
- $category = 'latest';
- }
- // limit the returned articles to 30 at max
- if ((int)$this->getInput('limit') <= 30) {
- $limit = (int)$this->getInput('limit');
- } else {
- $limit = 30;
- }
+ public function collectData()
+ {
+ // get if topics or blogs were selected and store the selected category
+ switch ($this->queriedContext) {
+ case 'Topics':
+ $category = $this->getInput('topic');
+ break;
+ case 'Blogs':
+ $category = $this->getInput('blog');
+ break;
+ default:
+ $category = 'latest';
+ }
+ // limit the returned articles to 30 at max
+ if ((int)$this->getInput('limit') <= 30) {
+ $limit = (int)$this->getInput('limit');
+ } else {
+ $limit = 30;
+ }
- $this->collectExpandableDatas('https://www.economist.com/' . $category . '/rss.xml', $limit);
- }
+ $this->collectExpandableDatas('https://www.economist.com/' . $category . '/rss.xml', $limit);
+ }
- protected function parseItem($feedItem){
- $item = parent::parseItem($feedItem);
- $article = getSimpleHTMLDOM($item['uri']);
- // before the article can be added, it needs to be cleaned up, thus, the extra function
- // We also need to distinguish between old style and new style articles
- if ($article->find('article', 0)->getAttribute('data-test-id') == 'Article') {
- $contentNode = 'div.layout-article-body';
- $imgNode = 'div.article__lead-image';
- $categoryNode = 'span.article__subheadline';
- } elseif ($article->find('article', 0)->getAttribute('data-test-id') === 'NewArticle') {
- $contentNode = 'section';
- $imgNode = 'figure.css-12eysrk.e3y6nua0';
- $categoryNode = 'span.ern1uyf0';
- } else {
- return;
- }
+ protected function parseItem($feedItem)
+ {
+ $item = parent::parseItem($feedItem);
+ $article = getSimpleHTMLDOM($item['uri']);
+ // before the article can be added, it needs to be cleaned up, thus, the extra function
+ // We also need to distinguish between old style and new style articles
+ if ($article->find('article', 0)->getAttribute('data-test-id') == 'Article') {
+ $contentNode = 'div.layout-article-body';
+ $imgNode = 'div.article__lead-image';
+ $categoryNode = 'span.article__subheadline';
+ } elseif ($article->find('article', 0)->getAttribute('data-test-id') === 'NewArticle') {
+ $contentNode = 'section';
+ $imgNode = 'figure.css-12eysrk.e3y6nua0';
+ $categoryNode = 'span.ern1uyf0';
+ } else {
+ return;
+ }
- $item['content'] = $this->cleanContent($article, $contentNode);
- // only the article lead image is retained if it's there
- if (!is_null($article->find($imgNode, 0))) {
- $item['enclosures'][] = $article->find($imgNode, 0)->find('img', 0)->getAttribute('src');
- } else {
- $item['enclosures'][] = '';
- }
- // add the subheadline as category. This will create a link in new articles
- // and a text in old articles
- $item['categories'][] = $article->find($categoryNode, 0)->innertext;
+ $item['content'] = $this->cleanContent($article, $contentNode);
+ // only the article lead image is retained if it's there
+ if (!is_null($article->find($imgNode, 0))) {
+ $item['enclosures'][] = $article->find($imgNode, 0)->find('img', 0)->getAttribute('src');
+ } else {
+ $item['enclosures'][] = '';
+ }
+ // add the subheadline as category. This will create a link in new articles
+ // and a text in old articles
+ $item['categories'][] = $article->find($categoryNode, 0)->innertext;
- return $item;
- }
+ return $item;
+ }
- private function cleanContent($article, $contentNode){
- // the actual article is in this div
- $content = $article->find($contentNode, 0)->innertext;
- // clean the article content. Remove all div's since the text is in paragraph elements
- foreach (array(
- '<div '
- ) as $tag_start) {
- $content = stripRecursiveHTMLSection($content, 'div', $tag_start);
- }
- // now remove embedded iframes. The podcast postings contain these for example
- $content = preg_replace('/<iframe.*?\/iframe>/i', '', $content);
- // fix the relative links
- $content = defaultLinkTo($content, $this->getURI());
+ private function cleanContent($article, $contentNode)
+ {
+ // the actual article is in this div
+ $content = $article->find($contentNode, 0)->innertext;
+ // clean the article content. Remove all div's since the text is in paragraph elements
+ foreach (
+ [
+ '<div '
+ ] as $tag_start
+ ) {
+ $content = stripRecursiveHTMLSection($content, 'div', $tag_start);
+ }
+ // now remove embedded iframes. The podcast postings contain these for example
+ $content = preg_replace('/<iframe.*?\/iframe>/i', '', $content);
+ // fix the relative links
+ $content = defaultLinkTo($content, $this->getURI());
- return $content;
- }
+ return $content;
+ }
}