diff options
Diffstat (limited to '')
-rw-r--r-- | internal/reader/atom/atom_10_adapter.go | 109 | ||||
-rw-r--r-- | internal/reader/atom/atom_10_test.go | 154 | ||||
-rw-r--r-- | internal/reader/atom/atom_common.go | 15 | ||||
-rw-r--r-- | internal/reader/json/parser_test.go | 36 | ||||
-rw-r--r-- | internal/reader/rss/adapter.go | 91 | ||||
-rw-r--r-- | internal/reader/rss/parser_test.go | 170 |
6 files changed, 447 insertions, 128 deletions
diff --git a/internal/reader/atom/atom_10_adapter.go b/internal/reader/atom/atom_10_adapter.go index c9ce5c23..a0a73623 100644 --- a/internal/reader/atom/atom_10_adapter.go +++ b/internal/reader/atom/atom_10_adapter.go @@ -158,51 +158,92 @@ func (a *Atom10Adapter) populateEntries(siteURL string) model.Entries { uniqueEnclosuresMap := make(map[string]bool) for _, mediaThumbnail := range atomEntry.AllMediaThumbnails() { - if _, found := uniqueEnclosuresMap[mediaThumbnail.URL]; !found { - uniqueEnclosuresMap[mediaThumbnail.URL] = true - entry.Enclosures = append(entry.Enclosures, &model.Enclosure{ - URL: mediaThumbnail.URL, - MimeType: mediaThumbnail.MimeType(), - Size: mediaThumbnail.Size(), - }) - } - } - - for _, link := range atomEntry.Links { - if !strings.EqualFold(link.Rel, "enclosure") || link.Href == "" { + mediaURL := strings.TrimSpace(mediaThumbnail.URL) + if mediaURL == "" { continue } + if _, found := uniqueEnclosuresMap[mediaURL]; !found { + if mediaAbsoluteURL, err := urllib.AbsoluteURL(siteURL, mediaURL); err != nil { + slog.Debug("Unable to build absolute URL for media thumbnail", + slog.String("url", mediaThumbnail.URL), + slog.String("site_url", siteURL), + slog.Any("error", err), + ) + } else { + uniqueEnclosuresMap[mediaAbsoluteURL] = true + entry.Enclosures = append(entry.Enclosures, &model.Enclosure{ + URL: mediaAbsoluteURL, + MimeType: mediaThumbnail.MimeType(), + Size: mediaThumbnail.Size(), + }) + } + } + } - if _, found := uniqueEnclosuresMap[link.Href]; !found { - uniqueEnclosuresMap[link.Href] = true - length, _ := strconv.ParseInt(link.Length, 10, 0) - entry.Enclosures = append(entry.Enclosures, &model.Enclosure{ - URL: link.Href, - MimeType: link.Type, - Size: length, - }) + for _, link := range atomEntry.Links.findAllLinksWithRelation("enclosure") { + absoluteEnclosureURL, err := urllib.AbsoluteURL(siteURL, link.Href) + if err != nil { + slog.Debug("Unable to resolve absolute URL for enclosure", + slog.String("enclosure_url", link.Href), + slog.String("entry_url", entry.URL), + slog.Any("error", err), + ) + } else { + if _, found := uniqueEnclosuresMap[absoluteEnclosureURL]; !found { + uniqueEnclosuresMap[absoluteEnclosureURL] = true + length, _ := strconv.ParseInt(link.Length, 10, 0) + entry.Enclosures = append(entry.Enclosures, &model.Enclosure{ + URL: absoluteEnclosureURL, + MimeType: link.Type, + Size: length, + }) + } } } for _, mediaContent := range atomEntry.AllMediaContents() { - if _, found := uniqueEnclosuresMap[mediaContent.URL]; !found { - uniqueEnclosuresMap[mediaContent.URL] = true - entry.Enclosures = append(entry.Enclosures, &model.Enclosure{ - URL: mediaContent.URL, - MimeType: mediaContent.MimeType(), - Size: mediaContent.Size(), - }) + mediaURL := strings.TrimSpace(mediaContent.URL) + if mediaURL == "" { + continue + } + if mediaAbsoluteURL, err := urllib.AbsoluteURL(siteURL, mediaURL); err != nil { + slog.Debug("Unable to build absolute URL for media content", + slog.String("url", mediaContent.URL), + slog.String("site_url", siteURL), + slog.Any("error", err), + ) + } else { + if _, found := uniqueEnclosuresMap[mediaAbsoluteURL]; !found { + uniqueEnclosuresMap[mediaAbsoluteURL] = true + entry.Enclosures = append(entry.Enclosures, &model.Enclosure{ + URL: mediaAbsoluteURL, + MimeType: mediaContent.MimeType(), + Size: mediaContent.Size(), + }) + } } } for _, mediaPeerLink := range atomEntry.AllMediaPeerLinks() { - if _, found := uniqueEnclosuresMap[mediaPeerLink.URL]; !found { - uniqueEnclosuresMap[mediaPeerLink.URL] = true - entry.Enclosures = append(entry.Enclosures, &model.Enclosure{ - URL: mediaPeerLink.URL, - MimeType: mediaPeerLink.MimeType(), - Size: mediaPeerLink.Size(), - }) + mediaURL := strings.TrimSpace(mediaPeerLink.URL) + if mediaURL == "" { + continue + } + if mediaAbsoluteURL, err := urllib.AbsoluteURL(siteURL, mediaURL); err != nil { + slog.Debug("Unable to build absolute URL for media peer link", + slog.String("url", mediaPeerLink.URL), + slog.String("site_url", siteURL), + slog.Any("error", err), + ) + } else { + if _, found := uniqueEnclosuresMap[mediaAbsoluteURL]; !found { + uniqueEnclosuresMap[mediaAbsoluteURL] = true + entry.Enclosures = append(entry.Enclosures, &model.Enclosure{ + URL: mediaAbsoluteURL, + MimeType: mediaPeerLink.MimeType(), + Size: mediaPeerLink.Size(), + }) + } } } diff --git a/internal/reader/atom/atom_10_test.go b/internal/reader/atom/atom_10_test.go index 7b69f29d..cdc5b364 100644 --- a/internal/reader/atom/atom_10_test.go +++ b/internal/reader/atom/atom_10_test.go @@ -1105,7 +1105,7 @@ func TestParseEntryWithEnclosures(t *testing.T) { } if len(feed.Entries) != 1 { - t.Errorf("Incorrect number of entries, got: %d", len(feed.Entries)) + t.Fatalf("Incorrect number of entries, got: %d", len(feed.Entries)) } if feed.Entries[0].URL != "http://www.example.org/entries/1" { @@ -1140,6 +1140,89 @@ func TestParseEntryWithEnclosures(t *testing.T) { } } +func TestParseEntryWithRelativeEnclosureURL(t *testing.T) { + data := `<?xml version="1.0" encoding="utf-8"?> + <feed xmlns="http://www.w3.org/2005/Atom"> + <id>https://www.example.org/myfeed</id> + <title>My Podcast Feed</title> + <link href="https://example.org" /> + <link rel="self" href="https://example.org/myfeed" /> + <entry> + <id>https://www.example.org/entries/1</id> + <title>Atom 1.0</title> + <updated>2005-07-15T12:00:00Z</updated> + <link href="https://www.example.org/entries/1" /> + <link rel="enclosure" + type="audio/mpeg" + title="MP3" + href=" /myaudiofile.mp3 " + length="1234" /> + </content> + </entry> + </feed>` + + feed, err := Parse("https://example.org/", bytes.NewReader([]byte(data)), "10") + if err != nil { + t.Fatal(err) + } + + if len(feed.Entries) != 1 { + t.Fatalf("Incorrect number of entries, got: %d", len(feed.Entries)) + } + + if len(feed.Entries[0].Enclosures) != 1 { + t.Fatalf("Incorrect number of enclosures, got: %d", len(feed.Entries[0].Enclosures)) + } + + if feed.Entries[0].Enclosures[0].URL != "https://example.org/myaudiofile.mp3" { + t.Errorf("Incorrect enclosure URL, got: %q", feed.Entries[0].Enclosures[0].URL) + } +} + +func TestParseEntryWithDuplicateEnclosureURL(t *testing.T) { + data := `<?xml version="1.0" encoding="utf-8"?> + <feed xmlns="http://www.w3.org/2005/Atom"> + <id>http://www.example.org/myfeed</id> + <title>My Podcast Feed</title> + <link href="http://example.org" /> + <link rel="self" href="http://example.org/myfeed" /> + <entry> + <id>http://www.example.org/entries/1</id> + <title>Atom 1.0</title> + <updated>2005-07-15T12:00:00Z</updated> + <link href="http://www.example.org/entries/1" /> + <link rel="enclosure" + type="audio/mpeg" + title="MP3" + href="http://www.example.org/myaudiofile.mp3" + length="1234" /> + <link rel="enclosure" + type="audio/mpeg" + title="MP3" + href=" http://www.example.org/myaudiofile.mp3 " + length="1234" /> + </content> + </entry> + </feed>` + + feed, err := Parse("https://example.org/", bytes.NewReader([]byte(data)), "10") + if err != nil { + t.Fatal(err) + } + + if len(feed.Entries) != 1 { + t.Fatalf("Incorrect number of entries, got: %d", len(feed.Entries)) + } + + if len(feed.Entries[0].Enclosures) != 1 { + t.Fatalf("Incorrect number of enclosures, got: %d", len(feed.Entries[0].Enclosures)) + } + + if feed.Entries[0].Enclosures[0].URL != "http://www.example.org/myaudiofile.mp3" { + t.Errorf("Incorrect enclosure URL, got: %q", feed.Entries[0].Enclosures[0].URL) + } +} + func TestParseEntryWithoutEnclosureURL(t *testing.T) { data := `<?xml version="1.0" encoding="utf-8"?> <feed xmlns="http://www.w3.org/2005/Atom"> @@ -1334,20 +1417,25 @@ func TestParseWithInvalidCharacterEntity(t *testing.T) { func TestParseMediaGroup(t *testing.T) { data := `<?xml version="1.0" encoding="utf-8"?> <feed xmlns="http://www.w3.org/2005/Atom" xmlns:media="http://search.yahoo.com/mrss/"> - <id>http://www.example.org/myfeed</id> + <id>https://www.example.org/myfeed</id> <title>My Video Feed</title> <updated>2005-07-15T12:00:00Z</updated> - <link href="http://example.org" /> - <link rel="self" href="http://example.org/myfeed" /> + <link href="https://example.org" /> + <link rel="self" href="https://example.org/myfeed" /> <entry> - <id>http://www.example.org/entries/1</id> + <id>https://www.example.org/entries/1</id> <title>Some Video</title> <updated>2005-07-15T12:00:00Z</updated> - <link href="http://www.example.org/entries/1" /> + <link href="https://www.example.org/entries/1" /> <media:group> <media:title>Another title</media:title> <media:content url="https://www.youtube.com/v/abcd" type="application/x-shockwave-flash" width="640" height="390"/> - <media:thumbnail url="https://example.org/thumbnail.jpg" width="480" height="360"/> + <media:content url=" /v/efg " type="application/x-shockwave-flash" width="640" height="390"/> + <media:content url=" " type="application/x-shockwave-flash" width="640" height="390"/> + <media:thumbnail url="https://www.example.org/duplicate-thumbnail.jpg" width="480" height="360"/> + <media:thumbnail url="https://www.example.org/duplicate-thumbnail.jpg" width="480" height="360"/> + <media:thumbnail url=" /thumbnail2.jpg " width="480" height="360"/> + <media:thumbnail url=" " width="480" height="360"/> <media:description>Some description A website: http://example.org/</media:description> </media:group> @@ -1360,18 +1448,10 @@ A website: http://example.org/</media:description> } if len(feed.Entries) != 1 { - t.Errorf("Incorrect number of entries, got: %d", len(feed.Entries)) - } - - if feed.Entries[0].URL != "http://www.example.org/entries/1" { - t.Errorf("Incorrect entry URL, got: %s", feed.Entries[0].URL) - } - - if feed.Entries[0].Content != `Some description<br>A website: <a href="http://example.org/">http://example.org/</a>` { - t.Errorf("Incorrect entry content, got: %q", feed.Entries[0].Content) + t.Fatalf("Incorrect number of entries, got: %d", len(feed.Entries)) } - if len(feed.Entries[0].Enclosures) != 2 { + if len(feed.Entries[0].Enclosures) != 4 { t.Fatalf("Incorrect number of enclosures, got: %d", len(feed.Entries[0].Enclosures)) } @@ -1380,8 +1460,10 @@ A website: http://example.org/</media:description> mimeType string size int64 }{ - {"https://example.org/thumbnail.jpg", "image/*", 0}, + {"https://www.example.org/duplicate-thumbnail.jpg", "image/*", 0}, + {"https://example.org/thumbnail2.jpg", "image/*", 0}, {"https://www.youtube.com/v/abcd", "application/x-shockwave-flash", 0}, + {"https://example.org/v/efg", "application/x-shockwave-flash", 0}, } for index, enclosure := range feed.Entries[0].Enclosures { @@ -1402,19 +1484,26 @@ A website: http://example.org/</media:description> func TestParseMediaElements(t *testing.T) { data := `<?xml version="1.0" encoding="utf-8"?> <feed xmlns="http://www.w3.org/2005/Atom" xmlns:media="http://search.yahoo.com/mrss/"> - <id>http://www.example.org/myfeed</id> + <id>https://www.example.org/myfeed</id> <title>My Video Feed</title> <updated>2005-07-15T12:00:00Z</updated> - <link href="http://example.org" /> - <link rel="self" href="http://example.org/myfeed" /> + <link href="https://example.org" /> + <link rel="self" href="https://example.org/myfeed" /> <entry> - <id>http://www.example.org/entries/1</id> + <id>https://www.example.org/entries/1</id> <title>Some Video</title> <updated>2005-07-15T12:00:00Z</updated> - <link href="http://www.example.org/entries/1" /> + <link href="https://www.example.org/entries/1" /> <media:title>Another title</media:title> <media:content url="https://www.youtube.com/v/abcd" type="application/x-shockwave-flash" width="640" height="390"/> - <media:thumbnail url="https://example.org/thumbnail.jpg" width="480" height="360"/> + <media:content url=" /relative/media.mp4 " type="application/x-shockwave-flash" width="640" height="390"/> + <media:content url=" " type="application/x-shockwave-flash" width="640" height="390"/> + <media:thumbnail url="https://example.org/duplicated-thumbnail.jpg" width="480" height="360"/> + <media:thumbnail url=" https://example.org/duplicated-thumbnail.jpg " width="480" height="360"/> + <media:thumbnail url=" " width="480" height="360"/> + <media:peerLink type="application/x-bittorrent" href=" http://www.example.org/sampleFile.torrent " /> + <media:peerLink type="application/x-bittorrent" href=" /sampleFile2.torrent" /> + <media:peerLink type="application/x-bittorrent" href=" " /> <media:description>Some description A website: http://example.org/</media:description> </entry> @@ -1426,18 +1515,10 @@ A website: http://example.org/</media:description> } if len(feed.Entries) != 1 { - t.Errorf("Incorrect number of entries, got: %d", len(feed.Entries)) - } - - if feed.Entries[0].URL != "http://www.example.org/entries/1" { - t.Errorf("Incorrect entry URL, got: %s", feed.Entries[0].URL) - } - - if feed.Entries[0].Content != `Some description<br>A website: <a href="http://example.org/">http://example.org/</a>` { - t.Errorf("Incorrect entry content, got: %q", feed.Entries[0].Content) + t.Fatalf("Incorrect number of entries, got: %d", len(feed.Entries)) } - if len(feed.Entries[0].Enclosures) != 2 { + if len(feed.Entries[0].Enclosures) != 5 { t.Fatalf("Incorrect number of enclosures, got: %d", len(feed.Entries[0].Enclosures)) } @@ -1446,8 +1527,11 @@ A website: http://example.org/</media:description> mimeType string size int64 }{ - {"https://example.org/thumbnail.jpg", "image/*", 0}, + {"https://example.org/duplicated-thumbnail.jpg", "image/*", 0}, {"https://www.youtube.com/v/abcd", "application/x-shockwave-flash", 0}, + {"https://example.org/relative/media.mp4", "application/x-shockwave-flash", 0}, + {"http://www.example.org/sampleFile.torrent", "application/x-bittorrent", 0}, + {"https://example.org/sampleFile2.torrent", "application/x-bittorrent", 0}, } for index, enclosure := range feed.Entries[0].Enclosures { diff --git a/internal/reader/atom/atom_common.go b/internal/reader/atom/atom_common.go index debd46f1..945c5573 100644 --- a/internal/reader/atom/atom_common.go +++ b/internal/reader/atom/atom_common.go @@ -96,6 +96,21 @@ func (a AtomLinks) firstLinkWithRelationAndType(relation string, contentTypes .. return "" } +func (a AtomLinks) findAllLinksWithRelation(relation string) []*AtomLink { + var links []*AtomLink + + for _, link := range a { + if strings.EqualFold(link.Rel, relation) { + link.Href = strings.TrimSpace(link.Href) + if link.Href != "" { + links = append(links, link) + } + } + } + + return links +} + // The "atom:category" element conveys information about a category // associated with an entry or feed. This specification assigns no // meaning to the content (if any) of this element. diff --git a/internal/reader/json/parser_test.go b/internal/reader/json/parser_test.go index 0bbd39ec..6f62831f 100644 --- a/internal/reader/json/parser_test.go +++ b/internal/reader/json/parser_test.go @@ -848,6 +848,42 @@ func TestParseFeedIcon(t *testing.T) { } } +func TestParseFeedWithRelativeAttachmentURL(t *testing.T) { + data := `{ + "version": "https://jsonfeed.org/version/1", + "title": "My Example Feed", + "home_page_url": "https://example.org/", + "feed_url": "https://example.org/feed.json", + "items": [ + { + "id": "2", + "content_text": "This is a second item.", + "url": "https://example.org/second-item", + "attachments": [ + { + "url": " /attachment.mp3 ", + "mime_type": "audio/mpeg", + "size_in_bytes": 123456 + } + ] + } + ] + }` + + feed, err := Parse("https://example.org/feed.json", bytes.NewBufferString(data)) + if err != nil { + t.Fatal(err) + } + + if len(feed.Entries[0].Enclosures) != 1 { + t.Fatalf("Incorrect number of enclosures, got: %d", len(feed.Entries[0].Enclosures)) + } + + if feed.Entries[0].Enclosures[0].URL != "https://example.org/attachment.mp3" { + t.Errorf("Incorrect enclosure URL, got: %q", feed.Entries[0].Enclosures[0].URL) + } +} + func TestParseInvalidJSON(t *testing.T) { data := `garbage` _, err := Parse("https://example.org/feed.json", bytes.NewBufferString(data)) diff --git a/internal/reader/rss/adapter.go b/internal/reader/rss/adapter.go index 48def825..07d56059 100644 --- a/internal/reader/rss/adapter.go +++ b/internal/reader/rss/adapter.go @@ -72,7 +72,7 @@ func (r *RSSAdapter) BuildFeed(baseURL string) *model.Feed { entry := model.NewEntry() entry.Date = findEntryDate(&item) entry.Content = findEntryContent(&item) - entry.Enclosures = findEntryEnclosures(&item) + entry.Enclosures = findEntryEnclosures(&item, feed.SiteURL) // Populate the entry URL. entryURL := findEntryURL(&item) @@ -245,18 +245,30 @@ func findEntryAuthor(rssItem *RSSItem) string { return strings.TrimSpace(sanitizer.StripTags(author)) } -func findEntryEnclosures(rssItem *RSSItem) model.EnclosureList { +func findEntryEnclosures(rssItem *RSSItem, siteURL string) model.EnclosureList { enclosures := make(model.EnclosureList, 0) duplicates := make(map[string]bool) for _, mediaThumbnail := range rssItem.AllMediaThumbnails() { - if _, found := duplicates[mediaThumbnail.URL]; !found { - duplicates[mediaThumbnail.URL] = true - enclosures = append(enclosures, &model.Enclosure{ - URL: mediaThumbnail.URL, - MimeType: mediaThumbnail.MimeType(), - Size: mediaThumbnail.Size(), - }) + mediaURL := strings.TrimSpace(mediaThumbnail.URL) + if mediaURL == "" { + continue + } + if _, found := duplicates[mediaURL]; !found { + if mediaAbsoluteURL, err := urllib.AbsoluteURL(siteURL, mediaURL); err != nil { + slog.Debug("Unable to build absolute URL for media thumbnail", + slog.String("url", mediaThumbnail.URL), + slog.String("site_url", siteURL), + slog.Any("error", err), + ) + } else { + duplicates[mediaAbsoluteURL] = true + enclosures = append(enclosures, &model.Enclosure{ + URL: mediaAbsoluteURL, + MimeType: mediaThumbnail.MimeType(), + Size: mediaThumbnail.Size(), + }) + } } } @@ -265,15 +277,20 @@ func findEntryEnclosures(rssItem *RSSItem) model.EnclosureList { if rssItem.FeedBurnerEnclosureLink != "" { filename := path.Base(rssItem.FeedBurnerEnclosureLink) - if strings.Contains(enclosureURL, filename) { + if strings.HasSuffix(enclosureURL, filename) { enclosureURL = rssItem.FeedBurnerEnclosureLink } } + enclosureURL = strings.TrimSpace(enclosureURL) if enclosureURL == "" { continue } + if absoluteEnclosureURL, err := urllib.AbsoluteURL(siteURL, enclosureURL); err == nil { + enclosureURL = absoluteEnclosureURL + } + if _, found := duplicates[enclosureURL]; !found { duplicates[enclosureURL] = true @@ -286,24 +303,50 @@ func findEntryEnclosures(rssItem *RSSItem) model.EnclosureList { } for _, mediaContent := range rssItem.AllMediaContents() { - if _, found := duplicates[mediaContent.URL]; !found { - duplicates[mediaContent.URL] = true - enclosures = append(enclosures, &model.Enclosure{ - URL: mediaContent.URL, - MimeType: mediaContent.MimeType(), - Size: mediaContent.Size(), - }) + mediaURL := strings.TrimSpace(mediaContent.URL) + if mediaURL == "" { + continue + } + if _, found := duplicates[mediaURL]; !found { + mediaURL := strings.TrimSpace(mediaContent.URL) + if mediaAbsoluteURL, err := urllib.AbsoluteURL(siteURL, mediaURL); err != nil { + slog.Debug("Unable to build absolute URL for media content", + slog.String("url", mediaContent.URL), + slog.String("site_url", siteURL), + slog.Any("error", err), + ) + } else { + duplicates[mediaAbsoluteURL] = true + enclosures = append(enclosures, &model.Enclosure{ + URL: mediaAbsoluteURL, + MimeType: mediaContent.MimeType(), + Size: mediaContent.Size(), + }) + } } } for _, mediaPeerLink := range rssItem.AllMediaPeerLinks() { - if _, found := duplicates[mediaPeerLink.URL]; !found { - duplicates[mediaPeerLink.URL] = true - enclosures = append(enclosures, &model.Enclosure{ - URL: mediaPeerLink.URL, - MimeType: mediaPeerLink.MimeType(), - Size: mediaPeerLink.Size(), - }) + mediaURL := strings.TrimSpace(mediaPeerLink.URL) + if mediaURL == "" { + continue + } + if _, found := duplicates[mediaURL]; !found { + mediaURL := strings.TrimSpace(mediaPeerLink.URL) + if mediaAbsoluteURL, err := urllib.AbsoluteURL(siteURL, mediaURL); err != nil { + slog.Debug("Unable to build absolute URL for media peer link", + slog.String("url", mediaPeerLink.URL), + slog.String("site_url", siteURL), + slog.Any("error", err), + ) + } else { + duplicates[mediaAbsoluteURL] = true + enclosures = append(enclosures, &model.Enclosure{ + URL: mediaAbsoluteURL, + MimeType: mediaPeerLink.MimeType(), + Size: mediaPeerLink.Size(), + }) + } } } diff --git a/internal/reader/rss/parser_test.go b/internal/reader/rss/parser_test.go index e3f8450f..8d84e582 100644 --- a/internal/reader/rss/parser_test.go +++ b/internal/reader/rss/parser_test.go @@ -1016,15 +1016,11 @@ func TestParseEntryWithEnclosures(t *testing.T) { } if len(feed.Entries) != 1 { - t.Errorf("Incorrect number of entries, got: %d", len(feed.Entries)) - } - - if feed.Entries[0].URL != "http://www.example.org/entries/1" { - t.Errorf("Incorrect entry URL, got: %s", feed.Entries[0].URL) + t.Fatalf("Incorrect number of entries, got: %d", len(feed.Entries)) } if len(feed.Entries[0].Enclosures) != 1 { - t.Errorf("Incorrect number of enclosures, got: %d", len(feed.Entries[0].Enclosures)) + t.Fatalf("Incorrect number of enclosures, got: %d", len(feed.Entries[0].Enclosures)) } if feed.Entries[0].Enclosures[0].URL != "http://www.example.org/myaudiofile.mp3" { @@ -1065,15 +1061,11 @@ func TestParseEntryWithIncorrectEnclosureLength(t *testing.T) { } if len(feed.Entries) != 1 { - t.Errorf("Incorrect number of entries, got: %d", len(feed.Entries)) - } - - if feed.Entries[0].URL != "http://www.example.org/entries/1" { - t.Errorf("Incorrect entry URL, got: %s", feed.Entries[0].URL) + t.Fatalf("Incorrect number of entries, got: %d", len(feed.Entries)) } if len(feed.Entries[0].Enclosures) != 2 { - t.Errorf("Incorrect number of enclosures, got: %d", len(feed.Entries[0].Enclosures)) + t.Fatalf("Incorrect number of enclosures, got: %d", len(feed.Entries[0].Enclosures)) } if feed.Entries[0].Enclosures[0].URL != "http://www.example.org/myaudiofile.mp3" { @@ -1093,6 +1085,39 @@ func TestParseEntryWithIncorrectEnclosureLength(t *testing.T) { } } +func TestParseEntryWithDuplicatedEnclosureURL(t *testing.T) { + data := `<?xml version="1.0" encoding="utf-8"?> + <rss version="2.0"> + <channel> + <title>My Podcast Feed</title> + <link>http://example.org</link> + <item> + <title>Podcasting with RSS</title> + <link>http://www.example.org/entries/1</link> + <enclosure url="http://www.example.org/myaudiofile.mp3" type="audio/mpeg" /> + <enclosure url=" http://www.example.org/myaudiofile.mp3 " type="audio/mpeg" /> + </item> + </channel> + </rss>` + + feed, err := Parse("https://example.org/", bytes.NewReader([]byte(data))) + if err != nil { + t.Fatal(err) + } + + if len(feed.Entries) != 1 { + t.Fatalf("Incorrect number of entries, got: %d", len(feed.Entries)) + } + + if len(feed.Entries[0].Enclosures) != 1 { + t.Fatalf("Incorrect number of enclosures, got: %d", len(feed.Entries[0].Enclosures)) + } + + if feed.Entries[0].Enclosures[0].URL != "http://www.example.org/myaudiofile.mp3" { + t.Errorf("Incorrect enclosure URL, got: %s", feed.Entries[0].Enclosures[0].URL) + } +} + func TestParseEntryWithEmptyEnclosureURL(t *testing.T) { data := `<?xml version="1.0" encoding="utf-8"?> <rss version="2.0"> @@ -1106,7 +1131,7 @@ func TestParseEntryWithEmptyEnclosureURL(t *testing.T) { <description>An overview of RSS podcasting</description> <pubDate>Fri, 15 Jul 2005 00:00:00 -0500</pubDate> <guid isPermaLink="true">http://www.example.org/entries/1</guid> - <enclosure url="" length="0"/> + <enclosure url=" " length="0"/> </item> </channel> </rss>` @@ -1117,15 +1142,47 @@ func TestParseEntryWithEmptyEnclosureURL(t *testing.T) { } if len(feed.Entries) != 1 { - t.Errorf("Incorrect number of entries, got: %d", len(feed.Entries)) + t.Fatalf("Incorrect number of entries, got: %d", len(feed.Entries)) } - if feed.Entries[0].URL != "http://www.example.org/entries/1" { - t.Errorf("Incorrect entry URL, got: %s", feed.Entries[0].URL) + if len(feed.Entries[0].Enclosures) != 0 { + t.Fatalf("Incorrect number of enclosures, got: %d", len(feed.Entries[0].Enclosures)) } +} - if len(feed.Entries[0].Enclosures) != 0 { - t.Errorf("Incorrect number of enclosures, got: %d", len(feed.Entries[0].Enclosures)) +func TestParseEntryWithRelativeEnclosureURL(t *testing.T) { + data := `<?xml version="1.0" encoding="utf-8"?> + <rss version="2.0"> + <channel> + <title>My Podcast Feed</title> + <link>http://example.org</link> + <author>some.email@example.org</author> + <item> + <title>Podcasting with RSS</title> + <link>http://www.example.org/entries/1</link> + <description>An overview of RSS podcasting</description> + <pubDate>Fri, 15 Jul 2005 00:00:00 -0500</pubDate> + <guid isPermaLink="true">http://www.example.org/entries/1</guid> + <enclosure url=" /files/file.mp3 "/> + </item> + </channel> + </rss>` + + feed, err := Parse("https://example.org/", bytes.NewReader([]byte(data))) + if err != nil { + t.Fatal(err) + } + + if len(feed.Entries) != 1 { + t.Fatalf("Incorrect number of entries, got: %d", len(feed.Entries)) + } + + if len(feed.Entries[0].Enclosures) != 1 { + t.Fatalf("Incorrect number of enclosures, got: %d", len(feed.Entries[0].Enclosures)) + } + + if feed.Entries[0].Enclosures[0].URL != "http://example.org/files/file.mp3" { + t.Errorf("Incorrect enclosure URL, got: %q", feed.Entries[0].Enclosures[0].URL) } } @@ -1154,15 +1211,11 @@ func TestParseEntryWithFeedBurnerEnclosures(t *testing.T) { } if len(feed.Entries) != 1 { - t.Errorf("Incorrect number of entries, got: %d", len(feed.Entries)) - } - - if feed.Entries[0].URL != "http://www.example.org/entries/1" { - t.Errorf("Incorrect entry URL, got: %s", feed.Entries[0].URL) + t.Fatalf("Incorrect number of entries, got: %d", len(feed.Entries)) } if len(feed.Entries[0].Enclosures) != 1 { - t.Errorf("Incorrect number of enclosures, got: %d", len(feed.Entries[0].Enclosures)) + t.Fatalf("Incorrect number of enclosures, got: %d", len(feed.Entries[0].Enclosures)) } if feed.Entries[0].Enclosures[0].URL != "http://example.org/67ca416c-f22a-4228-a681-68fc9998ec10/File.mp3" { @@ -1178,6 +1231,42 @@ func TestParseEntryWithFeedBurnerEnclosures(t *testing.T) { } } +func TestParseEntryWithFeedBurnerEnclosuresAndRelativeURL(t *testing.T) { + data := `<?xml version="1.0" encoding="utf-8"?> + <rss version="2.0" xmlns:feedburner="http://rssnamespace.org/feedburner/ext/1.0"> + <channel> + <title>My Example Feed</title> + <link>http://example.org</link> + <item> + <title>Example Item</title> + <link>http://www.example.org/entries/1</link> + <enclosure + url="http://feedproxy.google.com/~r/example/~5/lpMyFSCvubs/File.mp3" + length="76192460" + type="audio/mpeg" /> + <feedburner:origEnclosureLink>/67ca416c-f22a-4228-a681-68fc9998ec10/File.mp3</feedburner:origEnclosureLink> + </item> + </channel> + </rss>` + + feed, err := Parse("https://example.org/", bytes.NewReader([]byte(data))) + if err != nil { + t.Fatal(err) + } + + if len(feed.Entries) != 1 { + t.Errorf("Incorrect number of entries, got: %d", len(feed.Entries)) + } + + if len(feed.Entries[0].Enclosures) != 1 { + t.Fatalf("Incorrect number of enclosures, got: %d", len(feed.Entries[0].Enclosures)) + } + + if feed.Entries[0].Enclosures[0].URL != "http://example.org/67ca416c-f22a-4228-a681-68fc9998ec10/File.mp3" { + t.Errorf("Incorrect enclosure URL, got: %s", feed.Entries[0].Enclosures[0].URL) + } +} + func TestParseEntryWithRelativeURL(t *testing.T) { data := `<?xml version="1.0" encoding="utf-8"?> <rss version="2.0"> @@ -1389,7 +1478,7 @@ func TestParseEntryWithMediaGroup(t *testing.T) { <rss version="2.0" xmlns:media="http://search.yahoo.com/mrss/"> <channel> <title>My Example Feed</title> - <link>http://example.org</link> + <link>https://example.org</link> <item> <title>Example Item</title> <link>http://www.example.org/entries/1</link> @@ -1400,7 +1489,9 @@ func TestParseEntryWithMediaGroup(t *testing.T) { <media:content type="application/x-bittorrent" url="https://example.org/file2.torrent" isDefault="true"></media:content> <media:content type="application/x-bittorrent" url="https://example.org/file3.torrent"></media:content> <media:content type="application/x-bittorrent" url="https://example.org/file4.torrent"></media:content> - <media:content type="application/x-bittorrent" url="https://example.org/file5.torrent" fileSize="42"></media:content> + <media:content type="application/x-bittorrent" url="https://example.org/file4.torrent"></media:content> + <media:content type="application/x-bittorrent" url=" file5.torrent " fileSize="42"></media:content> + <media:content type="application/x-bittorrent" url=" " fileSize="42"></media:content> <media:rating>nonadult</media:rating> </media:group> <media:thumbnail url="https://example.org/image.jpg" height="122" width="223"></media:thumbnail> @@ -1453,15 +1544,19 @@ func TestParseEntryWithMediaContent(t *testing.T) { <rss version="2.0" xmlns:media="http://search.yahoo.com/mrss/"> <channel> <title>My Example Feed</title> - <link>http://example.org</link> + <link>https://example.org</link> <item> <title>Example Item</title> <link>http://www.example.org/entries/1</link> <media:thumbnail url="https://example.org/thumbnail.jpg" /> + <media:thumbnail url="https://example.org/thumbnail.jpg" /> + <media:thumbnail url=" thumbnail.jpg " /> + <media:thumbnail url=" " /> <media:content url="https://example.org/media1.jpg" medium="image"> <media:title type="html">Some Title for Media 1</media:title> </media:content> - <media:content url="https://example.org/media2.jpg" medium="image" /> + <media:content url=" /media2.jpg " medium="image" /> + <media:content url=" " medium="image" /> </item> </channel> </rss>` @@ -1472,9 +1567,9 @@ func TestParseEntryWithMediaContent(t *testing.T) { } if len(feed.Entries) != 1 { - t.Errorf("Incorrect number of entries, got: %d", len(feed.Entries)) + t.Fatalf("Incorrect number of entries, got: %d", len(feed.Entries)) } - if len(feed.Entries[0].Enclosures) != 3 { + if len(feed.Entries[0].Enclosures) != 4 { t.Fatalf("Incorrect number of enclosures, got: %d", len(feed.Entries[0].Enclosures)) } @@ -1484,6 +1579,7 @@ func TestParseEntryWithMediaContent(t *testing.T) { size int64 }{ {"https://example.org/thumbnail.jpg", "image/*", 0}, + {"https://example.org/thumbnail.jpg", "image/*", 0}, {"https://example.org/media1.jpg", "image/*", 0}, {"https://example.org/media2.jpg", "image/*", 0}, } @@ -1508,11 +1604,14 @@ func TestParseEntryWithMediaPeerLink(t *testing.T) { <rss version="2.0" xmlns:media="http://search.yahoo.com/mrss/"> <channel> <title>My Example Feed</title> - <link>http://example.org</link> + <link>https://website.example.org</link> <item> <title>Example Item</title> <link>http://www.example.org/entries/1</link> - <media:peerLink type="application/x-bittorrent" href="http://www.example.org/file.torrent" /> + <media:peerLink type="application/x-bittorrent" href="https://www.example.org/file.torrent" /> + <media:peerLink type="application/x-bittorrent" href="https://www.example.org/file.torrent" /> + <media:peerLink type="application/x-bittorrent" href=" file2.torrent " /> + <media:peerLink type="application/x-bittorrent" href=" " /> </item> </channel> </rss>` @@ -1523,10 +1622,10 @@ func TestParseEntryWithMediaPeerLink(t *testing.T) { } if len(feed.Entries) != 1 { - t.Errorf("Incorrect number of entries, got: %d", len(feed.Entries)) + t.Fatalf("Incorrect number of entries, got: %d", len(feed.Entries)) } - if len(feed.Entries[0].Enclosures) != 1 { + if len(feed.Entries[0].Enclosures) != 2 { t.Fatalf("Incorrect number of enclosures, got: %d", len(feed.Entries[0].Enclosures)) } @@ -1535,7 +1634,8 @@ func TestParseEntryWithMediaPeerLink(t *testing.T) { mimeType string size int64 }{ - {"http://www.example.org/file.torrent", "application/x-bittorrent", 0}, + {"https://www.example.org/file.torrent", "application/x-bittorrent", 0}, + {"https://website.example.org/file2.torrent", "application/x-bittorrent", 0}, } for index, enclosure := range feed.Entries[0].Enclosures { |