diff options
author | 2023-10-13 02:31:09 +0200 | |
---|---|---|
committer | 2023-10-13 02:31:09 +0200 | |
commit | 49d9dafaecdb1e63ba1fe966f0e73c7f228fa5c3 (patch) | |
tree | 1c0d7fb18be8a2ac3d4a0086b8a007d3e74b799a /lib | |
parent | 2880524dfc7685985fde8429c1dcb85387f4ba14 (diff) | |
download | rss-bridge-49d9dafaecdb1e63ba1fe966f0e73c7f228fa5c3.tar.gz rss-bridge-49d9dafaecdb1e63ba1fe966f0e73c7f228fa5c3.tar.zst rss-bridge-49d9dafaecdb1e63ba1fe966f0e73c7f228fa5c3.zip |
refactor: more feed parsing tweaks (#3748)
Diffstat (limited to 'lib')
-rw-r--r-- | lib/FeedExpander.php | 5 | ||||
-rw-r--r-- | lib/FeedParser.php | 15 |
2 files changed, 17 insertions, 3 deletions
diff --git a/lib/FeedExpander.php b/lib/FeedExpander.php index f9cff900..361df4d9 100644 --- a/lib/FeedExpander.php +++ b/lib/FeedExpander.php @@ -22,6 +22,11 @@ abstract class FeedExpander extends BridgeAbstract if ($xmlString === '') { throw new \Exception(sprintf('Unable to parse xml from `%s` because we got the empty string', $url), 10); } + // prepare/massage the xml to make it more acceptable + $badStrings = [ + '»', + ]; + $xmlString = str_replace($badStrings, '', $xmlString); $feedParser = new FeedParser(); $this->feed = $feedParser->parseFeed($xmlString); $items = array_slice($this->feed['items'], 0, $maxItems); diff --git a/lib/FeedParser.php b/lib/FeedParser.php index 0a5b4679..7c8a5232 100644 --- a/lib/FeedParser.php +++ b/lib/FeedParser.php @@ -11,7 +11,10 @@ final class FeedParser $xmlErrors = libxml_get_errors(); libxml_use_internal_errors(false); if ($xml === false) { - throw new \Exception('Unable to parse xml'); + if ($xmlErrors) { + $firstXmlErrorMessage = $xmlErrors[0]->message; + } + throw new \Exception(sprintf('Unable to parse xml: %s', $firstXmlErrorMessage ?? '')); } $feed = [ 'title' => null, @@ -123,7 +126,6 @@ final class FeedParser { // Primary data is compatible to 0.91 with some additional data $item = $this->parseRss091Item($feedItem); - $namespaces = $feedItem->getNamespaces(true); if (isset($namespaces['dc'])) { $dc = $feedItem->children($namespaces['dc']); @@ -192,7 +194,14 @@ final class FeedParser public function parseRss091Item(\SimpleXMLElement $feedItem): array { - $item = []; + $item = [ + 'uri' => null, + 'title' => null, + 'content' => null, + 'timestamp' => null, + 'author' => null, + 'enclosures' => [], + ]; if (isset($feedItem->link)) { // todo: trim uri $item['uri'] = (string)$feedItem->link; |