aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--bridges/CommonDreamsBridge.php9
-rw-r--r--bridges/TheHackerNewsBridge.php120
-rw-r--r--bridges/WikiLeaksBridge.php6
3 files changed, 73 insertions, 62 deletions
diff --git a/bridges/CommonDreamsBridge.php b/bridges/CommonDreamsBridge.php
index ea21b436..99580499 100644
--- a/bridges/CommonDreamsBridge.php
+++ b/bridges/CommonDreamsBridge.php
@@ -21,10 +21,11 @@ class CommonDreamsBridge extends FeedExpander
private function extractContent($url)
{
- $html3 = getSimpleHTMLDOMCached($url);
- $text = $html3->find('div[class=field--type-text-with-summary]', 0)->innertext;
- $html3->clear();
- unset($html3);
+ $dom = getSimpleHTMLDOMCached($url);
+ $summary = $dom->find('div.node__body', 0);
+ $text = $summary->innertext;
+ $dom->clear();
+ unset($dom);
return $text;
}
}
diff --git a/bridges/TheHackerNewsBridge.php b/bridges/TheHackerNewsBridge.php
index 98b8f840..0636bb46 100644
--- a/bridges/TheHackerNewsBridge.php
+++ b/bridges/TheHackerNewsBridge.php
@@ -13,72 +13,80 @@ class TheHackerNewsBridge extends BridgeAbstract
$limit = 0;
foreach ($html->find('div.body-post') as $element) {
- if ($limit < 5) {
- $article_url = $element->find('a.story-link', 0)->href;
- $article_author = trim($element->find('i.icon-user', 0)->parent()->plaintext);
+ if ($limit >= 5) {
+ break;
+ }
+
+ $article_author = null;
+ $icon_user = $element->find('i.icon-user', 0);
+ if ($icon_user) {
+ $article_author = trim($icon_user->parent()->plaintext);
$article_author = str_replace('&#59396;', '', $article_author);
- $article_title = $element->find('h2.home-title', 0)->plaintext;
+ }
+ $article_title = $element->find('h2.home-title', 0)->plaintext;
- $article_timestamp = time();
- //Date without time
- $calendar = $element->find('i.icon-calendar', 0);
- if ($calendar) {
- $article_timestamp = strtotime(
- extractFromDelimiters(
- $calendar->parent()->outertext,
- '</i>',
- '<span>'
- )
- );
- }
+ $article_timestamp = time();
+ //Date without time
+ $calendar = $element->find('i.icon-calendar', 0);
+ if ($calendar) {
+ $article_timestamp = strtotime(
+ extractFromDelimiters(
+ $calendar->parent()->outertext,
+ '</i>',
+ '<span>'
+ )
+ );
+ }
- //Article thumbnail in lazy-loading image
- if (is_object($element->find('img[data-echo]', 0))) {
- $article_thumbnail = [
+ //Article thumbnail in lazy-loading image
+ if (is_object($element->find('img[data-echo]', 0))) {
+ $article_thumbnail = [
+ extractFromDelimiters(
+ $element->find('img[data-echo]', 0)->outertext,
+ "data-echo='",
+ "'"
+ )
+ ];
+ } else {
+ $article_thumbnail = [];
+ }
+
+ $article_url = $element->find('a.story-link', 0)->href;
+ $article = getSimpleHTMLDOMCached($article_url);
+ if ($article) {
+ //Article body
+ $var = $article->find('div.articlebody', 0);
+ if ($var) {
+ $contents = $var->innertext;
+ $contents = stripRecursiveHtmlSection($contents, 'div', '<div class="ad_');
+ $contents = stripWithDelimiters($contents, 'id="google_ads', '</iframe>');
+ $contents = stripWithDelimiters($contents, '<script', '</script>');
+ }
+ //Date with time
+ if (is_object($article->find('meta[itemprop=dateModified]', 0))) {
+ $article_timestamp = strtotime(
extractFromDelimiters(
- $element->find('img[data-echo]', 0)->outertext,
- "data-echo='",
+ $article->find('meta[itemprop=dateModified]', 0)->outertext,
+ "content='",
"'"
)
- ];
- } else {
- $article_thumbnail = [];
- }
-
- $article = getSimpleHTMLDOMCached($article_url);
- if ($article) {
- //Article body
- $var = $article->find('div.articlebody', 0);
- if ($var) {
- $contents = $var->innertext;
- $contents = stripRecursiveHtmlSection($contents, 'div', '<div class="ad_');
- $contents = stripWithDelimiters($contents, 'id="google_ads', '</iframe>');
- $contents = stripWithDelimiters($contents, '<script', '</script>');
- }
- //Date with time
- if (is_object($article->find('meta[itemprop=dateModified]', 0))) {
- $article_timestamp = strtotime(
- extractFromDelimiters(
- $article->find('meta[itemprop=dateModified]', 0)->outertext,
- "content='",
- "'"
- )
- );
- }
- } else {
- $contents = 'Could not request TheHackerNews: ' . $article_url;
+ );
}
+ } else {
+ $contents = 'Could not request TheHackerNews: ' . $article_url;
+ }
- $item = [];
- $item['uri'] = $article_url;
- $item['title'] = $article_title;
+ $item = [];
+ $item['uri'] = $article_url;
+ $item['title'] = $article_title;
+ if ($article_author) {
$item['author'] = $article_author;
- $item['enclosures'] = $article_thumbnail;
- $item['timestamp'] = $article_timestamp;
- $item['content'] = trim($contents ?? '');
- $this->items[] = $item;
- $limit++;
}
+ $item['enclosures'] = $article_thumbnail;
+ $item['timestamp'] = $article_timestamp;
+ $item['content'] = trim($contents ?? '');
+ $this->items[] = $item;
+ $limit++;
}
}
}
diff --git a/bridges/WikiLeaksBridge.php b/bridges/WikiLeaksBridge.php
index 512b1c30..08144688 100644
--- a/bridges/WikiLeaksBridge.php
+++ b/bridges/WikiLeaksBridge.php
@@ -93,8 +93,10 @@ class WikiLeaksBridge extends BridgeAbstract
$item['title'] = $article->find('h3', 0)->plaintext;
$item['uri'] = static::URI . $article->find('h3 a', 0)->href;
$item['content'] = $article->find('div.introduction', 0)->plaintext;
- $item['timestamp'] = strtotime($article->find('div.timestamp', 0)->plaintext);
-
+ $timestamp = $article->find('div.timestamp', 0);
+ if ($timestamp) {
+ $item['timestamp'] = strtotime($timestamp->plaintext);
+ }
$this->items[] = $item;
}
}