aboutsummaryrefslogtreecommitdiff
path: root/lib
diff options
context:
space:
mode:
authorGravatar Dag <me@dvikan.no> 2023-10-16 02:58:03 +0200
committerGravatar GitHub <noreply@github.com> 2023-10-16 02:58:03 +0200
commitef5bd83bd0d8645b1d7ae4201e7a167f82e3eaee (patch)
tree6cae063ad6895898e9bb1901a4e57f7611fc2bef /lib
parent408c2e5e918dd94716281dfce4bb5b6882cc829e (diff)
downloadrss-bridge-ef5bd83bd0d8645b1d7ae4201e7a167f82e3eaee.tar.gz
rss-bridge-ef5bd83bd0d8645b1d7ae4201e7a167f82e3eaee.tar.zst
rss-bridge-ef5bd83bd0d8645b1d7ae4201e7a167f82e3eaee.zip
feat: preserve and reproduce podcast feeds (itunes rss module) (#3759)
Diffstat (limited to 'lib')
-rw-r--r--lib/FeedParser.php100
-rw-r--r--lib/FormatAbstract.php2
2 files changed, 69 insertions, 33 deletions
diff --git a/lib/FeedParser.php b/lib/FeedParser.php
index 1393f5f5..2d982de1 100644
--- a/lib/FeedParser.php
+++ b/lib/FeedParser.php
@@ -3,11 +3,13 @@
declare(strict_types=1);
/**
- * Very basic and naive feed parser that srapes out rss 0.91, 1.0, 2.0 and atom 1.0.
+ * Very basic and naive feed parser.
*
- * Emit arrays meant to be used inside rss-bridge.
+ * Scrapes out rss 0.91, 1.0, 2.0 and atom 1.0.
*
- * The feed item structure is identical to that of FeedItem
+ * Produce arrays meant to be used inside rss-bridge.
+ *
+ * The item structure is tweaked so that works with FeedItem
*/
final class FeedParser
{
@@ -85,9 +87,7 @@ final class FeedParser
public function parseAtomItem(\SimpleXMLElement $feedItem): array
{
- // Some ATOM entries also contain RSS 2.0 fields
$item = $this->parseRss2Item($feedItem);
-
if (isset($feedItem->id)) {
$item['uri'] = (string)$feedItem->id;
}
@@ -131,8 +131,35 @@ final class FeedParser
public function parseRss2Item(\SimpleXMLElement $feedItem): array
{
- // Primary data is compatible to 0.91 with some additional data
- $item = $this->parseRss091Item($feedItem);
+ $item = [
+ 'uri' => '',
+ 'title' => '',
+ 'content' => '',
+ 'timestamp' => '',
+ 'author' => '',
+ //'uid' => null,
+ //'categories' => [],
+ //'enclosures' => [],
+ ];
+
+ foreach ($feedItem as $k => $v) {
+ $hasChildren = count($v) !== 0;
+ if (!$hasChildren) {
+ $item[$k] = (string) $v;
+ }
+ }
+
+ if (isset($feedItem->link)) {
+ // todo: trim uri
+ $item['uri'] = (string)$feedItem->link;
+ }
+ if (isset($feedItem->title)) {
+ $item['title'] = html_entity_decode((string)$feedItem->title);
+ }
+ if (isset($feedItem->description)) {
+ $item['content'] = (string)$feedItem->description;
+ }
+
$namespaces = $feedItem->getNamespaces(true);
if (isset($namespaces['dc'])) {
$dc = $feedItem->children($namespaces['dc']);
@@ -140,7 +167,24 @@ final class FeedParser
if (isset($namespaces['media'])) {
$media = $feedItem->children($namespaces['media']);
}
-
+ foreach ($namespaces as $namespaceName => $namespaceUrl) {
+ if (in_array($namespaceName, ['', 'content', 'media'])) {
+ continue;
+ }
+ $module = $feedItem->children($namespaceUrl);
+ $item[$namespaceName] = [];
+ foreach ($module as $moduleKey => $moduleValue) {
+ $item[$namespaceName][$moduleKey] = (string) $moduleValue;
+ }
+ }
+ if (isset($namespaces['itunes'])) {
+ $enclosure = $feedItem->enclosure;
+ $item['enclosure'] = [
+ 'url' => (string) $enclosure['url'],
+ 'length' => (string) $enclosure['length'],
+ 'type' => (string) $enclosure['type'],
+ ];
+ }
if (isset($feedItem->guid)) {
// Pluck out a url from guid
foreach ($feedItem->guid->attributes() as $attribute => $value) {
@@ -185,29 +229,12 @@ final class FeedParser
public function parseRss1Item(\SimpleXMLElement $feedItem): array
{
- // 1.0 adds optional elements around the 0.91 standard
- $item = $this->parseRss091Item($feedItem);
- $namespaces = $feedItem->getNamespaces(true);
- if (isset($namespaces['dc'])) {
- $dc = $feedItem->children($namespaces['dc']);
- if (isset($dc->date)) {
- $item['timestamp'] = strtotime((string)$dc->date);
- }
- if (isset($dc->creator)) {
- $item['author'] = (string)$dc->creator;
- }
- }
- return $item;
- }
-
- public function parseRss091Item(\SimpleXMLElement $feedItem): array
- {
$item = [
- 'uri' => null,
- 'title' => null,
- 'content' => null,
- 'timestamp' => null,
- 'author' => null,
+ 'uri' => '',
+ 'title' => '',
+ 'content' => '',
+ 'timestamp' => '',
+ 'author' => '',
//'uid' => null,
//'categories' => [],
//'enclosures' => [],
@@ -219,12 +246,19 @@ final class FeedParser
if (isset($feedItem->title)) {
$item['title'] = html_entity_decode((string)$feedItem->title);
}
- // rss 0.91 doesn't support timestamps
- // rss 0.91 doesn't support authors
- // rss 0.91 doesn't support enclosures
if (isset($feedItem->description)) {
$item['content'] = (string)$feedItem->description;
}
+ $namespaces = $feedItem->getNamespaces(true);
+ if (isset($namespaces['dc'])) {
+ $dc = $feedItem->children($namespaces['dc']);
+ if (isset($dc->date)) {
+ $item['timestamp'] = strtotime((string)$dc->date);
+ }
+ if (isset($dc->creator)) {
+ $item['author'] = (string)$dc->creator;
+ }
+ }
return $item;
}
}
diff --git a/lib/FormatAbstract.php b/lib/FormatAbstract.php
index b05a5764..c76d1e42 100644
--- a/lib/FormatAbstract.php
+++ b/lib/FormatAbstract.php
@@ -2,6 +2,8 @@
abstract class FormatAbstract
{
+ public const ITUNES_NS = 'http://www.itunes.com/dtds/podcast-1.0.dtd';
+
const MIME_TYPE = 'text/plain';
protected string $charset = 'UTF-8';