aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--bridges/TapasBridge.php7
-rw-r--r--lib/FeedExpander.php5
-rw-r--r--lib/FeedParser.php15
3 files changed, 20 insertions, 7 deletions
diff --git a/bridges/TapasBridge.php b/bridges/TapasBridge.php
index 19995a23..ea6a7ff6 100644
--- a/bridges/TapasBridge.php
+++ b/bridges/TapasBridge.php
@@ -40,7 +40,7 @@ class TapasBridge extends FeedExpander
$this->id = $html->find('meta[property$=":url"]', 0)->content;
$this->id = str_ireplace(['tapastic://series/', '/info'], '', $this->id);
}
- $this->collectExpandableDatas($this->getURI());
+ $this->collectExpandableDatas($this->getURI(), 10);
}
protected function parseItem(array $item)
@@ -55,9 +55,8 @@ class TapasBridge extends FeedExpander
if ($this->getInput('extend_content')) {
$html = getSimpleHTMLDOM($item['uri']);
- if (!$item['content']) {
- $item['content'] = '';
- }
+ $item['content'] = $item['content'] ?? '';
+
if ($html->find('article.main__body', 0)) {
foreach ($html->find('article', 0)->find('img') as $line) {
$item['content'] .= '<img src="' . $line->{'data-src'} . '">';
diff --git a/lib/FeedExpander.php b/lib/FeedExpander.php
index f9cff900..361df4d9 100644
--- a/lib/FeedExpander.php
+++ b/lib/FeedExpander.php
@@ -22,6 +22,11 @@ abstract class FeedExpander extends BridgeAbstract
if ($xmlString === '') {
throw new \Exception(sprintf('Unable to parse xml from `%s` because we got the empty string', $url), 10);
}
+ // prepare/massage the xml to make it more acceptable
+ $badStrings = [
+ '&raquo;',
+ ];
+ $xmlString = str_replace($badStrings, '', $xmlString);
$feedParser = new FeedParser();
$this->feed = $feedParser->parseFeed($xmlString);
$items = array_slice($this->feed['items'], 0, $maxItems);
diff --git a/lib/FeedParser.php b/lib/FeedParser.php
index 0a5b4679..7c8a5232 100644
--- a/lib/FeedParser.php
+++ b/lib/FeedParser.php
@@ -11,7 +11,10 @@ final class FeedParser
$xmlErrors = libxml_get_errors();
libxml_use_internal_errors(false);
if ($xml === false) {
- throw new \Exception('Unable to parse xml');
+ if ($xmlErrors) {
+ $firstXmlErrorMessage = $xmlErrors[0]->message;
+ }
+ throw new \Exception(sprintf('Unable to parse xml: %s', $firstXmlErrorMessage ?? ''));
}
$feed = [
'title' => null,
@@ -123,7 +126,6 @@ final class FeedParser
{
// Primary data is compatible to 0.91 with some additional data
$item = $this->parseRss091Item($feedItem);
-
$namespaces = $feedItem->getNamespaces(true);
if (isset($namespaces['dc'])) {
$dc = $feedItem->children($namespaces['dc']);
@@ -192,7 +194,14 @@ final class FeedParser
public function parseRss091Item(\SimpleXMLElement $feedItem): array
{
- $item = [];
+ $item = [
+ 'uri' => null,
+ 'title' => null,
+ 'content' => null,
+ 'timestamp' => null,
+ 'author' => null,
+ 'enclosures' => [],
+ ];
if (isset($feedItem->link)) {
// todo: trim uri
$item['uri'] = (string)$feedItem->link;