diff options
author | 2023-10-16 02:58:03 +0200 | |
---|---|---|
committer | 2023-10-16 02:58:03 +0200 | |
commit | ef5bd83bd0d8645b1d7ae4201e7a167f82e3eaee (patch) | |
tree | 6cae063ad6895898e9bb1901a4e57f7611fc2bef /lib | |
parent | 408c2e5e918dd94716281dfce4bb5b6882cc829e (diff) | |
download | rss-bridge-ef5bd83bd0d8645b1d7ae4201e7a167f82e3eaee.tar.gz rss-bridge-ef5bd83bd0d8645b1d7ae4201e7a167f82e3eaee.tar.zst rss-bridge-ef5bd83bd0d8645b1d7ae4201e7a167f82e3eaee.zip |
feat: preserve and reproduce podcast feeds (itunes rss module) (#3759)
Diffstat (limited to 'lib')
-rw-r--r-- | lib/FeedParser.php | 100 | ||||
-rw-r--r-- | lib/FormatAbstract.php | 2 |
2 files changed, 69 insertions, 33 deletions
diff --git a/lib/FeedParser.php b/lib/FeedParser.php index 1393f5f5..2d982de1 100644 --- a/lib/FeedParser.php +++ b/lib/FeedParser.php @@ -3,11 +3,13 @@ declare(strict_types=1); /** - * Very basic and naive feed parser that srapes out rss 0.91, 1.0, 2.0 and atom 1.0. + * Very basic and naive feed parser. * - * Emit arrays meant to be used inside rss-bridge. + * Scrapes out rss 0.91, 1.0, 2.0 and atom 1.0. * - * The feed item structure is identical to that of FeedItem + * Produce arrays meant to be used inside rss-bridge. + * + * The item structure is tweaked so that works with FeedItem */ final class FeedParser { @@ -85,9 +87,7 @@ final class FeedParser public function parseAtomItem(\SimpleXMLElement $feedItem): array { - // Some ATOM entries also contain RSS 2.0 fields $item = $this->parseRss2Item($feedItem); - if (isset($feedItem->id)) { $item['uri'] = (string)$feedItem->id; } @@ -131,8 +131,35 @@ final class FeedParser public function parseRss2Item(\SimpleXMLElement $feedItem): array { - // Primary data is compatible to 0.91 with some additional data - $item = $this->parseRss091Item($feedItem); + $item = [ + 'uri' => '', + 'title' => '', + 'content' => '', + 'timestamp' => '', + 'author' => '', + //'uid' => null, + //'categories' => [], + //'enclosures' => [], + ]; + + foreach ($feedItem as $k => $v) { + $hasChildren = count($v) !== 0; + if (!$hasChildren) { + $item[$k] = (string) $v; + } + } + + if (isset($feedItem->link)) { + // todo: trim uri + $item['uri'] = (string)$feedItem->link; + } + if (isset($feedItem->title)) { + $item['title'] = html_entity_decode((string)$feedItem->title); + } + if (isset($feedItem->description)) { + $item['content'] = (string)$feedItem->description; + } + $namespaces = $feedItem->getNamespaces(true); if (isset($namespaces['dc'])) { $dc = $feedItem->children($namespaces['dc']); @@ -140,7 +167,24 @@ final class FeedParser if (isset($namespaces['media'])) { $media = $feedItem->children($namespaces['media']); } - + foreach ($namespaces as $namespaceName => $namespaceUrl) { + if (in_array($namespaceName, ['', 'content', 'media'])) { + continue; + } + $module = $feedItem->children($namespaceUrl); + $item[$namespaceName] = []; + foreach ($module as $moduleKey => $moduleValue) { + $item[$namespaceName][$moduleKey] = (string) $moduleValue; + } + } + if (isset($namespaces['itunes'])) { + $enclosure = $feedItem->enclosure; + $item['enclosure'] = [ + 'url' => (string) $enclosure['url'], + 'length' => (string) $enclosure['length'], + 'type' => (string) $enclosure['type'], + ]; + } if (isset($feedItem->guid)) { // Pluck out a url from guid foreach ($feedItem->guid->attributes() as $attribute => $value) { @@ -185,29 +229,12 @@ final class FeedParser public function parseRss1Item(\SimpleXMLElement $feedItem): array { - // 1.0 adds optional elements around the 0.91 standard - $item = $this->parseRss091Item($feedItem); - $namespaces = $feedItem->getNamespaces(true); - if (isset($namespaces['dc'])) { - $dc = $feedItem->children($namespaces['dc']); - if (isset($dc->date)) { - $item['timestamp'] = strtotime((string)$dc->date); - } - if (isset($dc->creator)) { - $item['author'] = (string)$dc->creator; - } - } - return $item; - } - - public function parseRss091Item(\SimpleXMLElement $feedItem): array - { $item = [ - 'uri' => null, - 'title' => null, - 'content' => null, - 'timestamp' => null, - 'author' => null, + 'uri' => '', + 'title' => '', + 'content' => '', + 'timestamp' => '', + 'author' => '', //'uid' => null, //'categories' => [], //'enclosures' => [], @@ -219,12 +246,19 @@ final class FeedParser if (isset($feedItem->title)) { $item['title'] = html_entity_decode((string)$feedItem->title); } - // rss 0.91 doesn't support timestamps - // rss 0.91 doesn't support authors - // rss 0.91 doesn't support enclosures if (isset($feedItem->description)) { $item['content'] = (string)$feedItem->description; } + $namespaces = $feedItem->getNamespaces(true); + if (isset($namespaces['dc'])) { + $dc = $feedItem->children($namespaces['dc']); + if (isset($dc->date)) { + $item['timestamp'] = strtotime((string)$dc->date); + } + if (isset($dc->creator)) { + $item['author'] = (string)$dc->creator; + } + } return $item; } } diff --git a/lib/FormatAbstract.php b/lib/FormatAbstract.php index b05a5764..c76d1e42 100644 --- a/lib/FormatAbstract.php +++ b/lib/FormatAbstract.php @@ -2,6 +2,8 @@ abstract class FormatAbstract { + public const ITUNES_NS = 'http://www.itunes.com/dtds/podcast-1.0.dtd'; + const MIME_TYPE = 'text/plain'; protected string $charset = 'UTF-8'; |