aboutsummaryrefslogtreecommitdiff
path: root/lib
diff options
context:
space:
mode:
authorGravatar Dag <me@dvikan.no> 2023-10-13 02:31:09 +0200
committerGravatar GitHub <noreply@github.com> 2023-10-13 02:31:09 +0200
commit49d9dafaecdb1e63ba1fe966f0e73c7f228fa5c3 (patch)
tree1c0d7fb18be8a2ac3d4a0086b8a007d3e74b799a /lib
parent2880524dfc7685985fde8429c1dcb85387f4ba14 (diff)
downloadrss-bridge-49d9dafaecdb1e63ba1fe966f0e73c7f228fa5c3.tar.gz
rss-bridge-49d9dafaecdb1e63ba1fe966f0e73c7f228fa5c3.tar.zst
rss-bridge-49d9dafaecdb1e63ba1fe966f0e73c7f228fa5c3.zip
refactor: more feed parsing tweaks (#3748)
Diffstat (limited to 'lib')
-rw-r--r--lib/FeedExpander.php5
-rw-r--r--lib/FeedParser.php15
2 files changed, 17 insertions, 3 deletions
diff --git a/lib/FeedExpander.php b/lib/FeedExpander.php
index f9cff900..361df4d9 100644
--- a/lib/FeedExpander.php
+++ b/lib/FeedExpander.php
@@ -22,6 +22,11 @@ abstract class FeedExpander extends BridgeAbstract
if ($xmlString === '') {
throw new \Exception(sprintf('Unable to parse xml from `%s` because we got the empty string', $url), 10);
}
+ // prepare/massage the xml to make it more acceptable
+ $badStrings = [
+ '&raquo;',
+ ];
+ $xmlString = str_replace($badStrings, '', $xmlString);
$feedParser = new FeedParser();
$this->feed = $feedParser->parseFeed($xmlString);
$items = array_slice($this->feed['items'], 0, $maxItems);
diff --git a/lib/FeedParser.php b/lib/FeedParser.php
index 0a5b4679..7c8a5232 100644
--- a/lib/FeedParser.php
+++ b/lib/FeedParser.php
@@ -11,7 +11,10 @@ final class FeedParser
$xmlErrors = libxml_get_errors();
libxml_use_internal_errors(false);
if ($xml === false) {
- throw new \Exception('Unable to parse xml');
+ if ($xmlErrors) {
+ $firstXmlErrorMessage = $xmlErrors[0]->message;
+ }
+ throw new \Exception(sprintf('Unable to parse xml: %s', $firstXmlErrorMessage ?? ''));
}
$feed = [
'title' => null,
@@ -123,7 +126,6 @@ final class FeedParser
{
// Primary data is compatible to 0.91 with some additional data
$item = $this->parseRss091Item($feedItem);
-
$namespaces = $feedItem->getNamespaces(true);
if (isset($namespaces['dc'])) {
$dc = $feedItem->children($namespaces['dc']);
@@ -192,7 +194,14 @@ final class FeedParser
public function parseRss091Item(\SimpleXMLElement $feedItem): array
{
- $item = [];
+ $item = [
+ 'uri' => null,
+ 'title' => null,
+ 'content' => null,
+ 'timestamp' => null,
+ 'author' => null,
+ 'enclosures' => [],
+ ];
if (isset($feedItem->link)) {
// todo: trim uri
$item['uri'] = (string)$feedItem->link;