diff options
author | 2020-06-29 18:08:19 -0700 | |
---|---|---|
committer | 2020-06-29 18:24:06 -0700 | |
commit | 1d6b0491a75687553fa9c37b68cd5f71aa6fee6e (patch) | |
tree | 94d37bdba69085fe0c45a7ece5faa56e9b330649 /reader/rss/rss.go | |
parent | c70bebb2aa686dc534da98bf059f998ba939d0f5 (diff) | |
download | v2-1d6b0491a75687553fa9c37b68cd5f71aa6fee6e.tar.gz v2-1d6b0491a75687553fa9c37b68cd5f71aa6fee6e.tar.zst v2-1d6b0491a75687553fa9c37b68cd5f71aa6fee6e.zip |
Ignore <media:title> in RSS 2.0 feeds
In the vast majority of cases, the default entry title is correct.
Ignoring <media:title> avoid overriding the default title if they are different.
Diffstat (limited to 'reader/rss/rss.go')
-rw-r--r-- | reader/rss/rss.go | 27 |
1 files changed, 25 insertions, 2 deletions
diff --git a/reader/rss/rss.go b/reader/rss/rss.go index 3619ec68..cbb1bd19 100644 --- a/reader/rss/rss.go +++ b/reader/rss/rss.go @@ -122,6 +122,12 @@ type rssAuthor struct { Inner string `xml:",innerxml"` } +type rssTitle struct { + XMLName xml.Name + Data string `xml:",chardata"` + Inner string `xml:",innerxml"` +} + type rssEnclosure struct { URL string `xml:"url,attr"` Type string `xml:"type,attr"` @@ -138,7 +144,7 @@ func (enclosure *rssEnclosure) Size() int64 { type rssItem struct { GUID string `xml:"guid"` - Title string `xml:"title"` + Title []rssTitle `xml:"title"` Links []rssLink `xml:"link"` Description string `xml:"description"` PubDate string `xml:"pubDate"` @@ -223,7 +229,24 @@ func (r *rssItem) entryHash() string { } func (r *rssItem) entryTitle() string { - return strings.TrimSpace(sanitizer.StripTags(r.Title)) + var title string + + for _, rssTitle := range r.Title { + switch rssTitle.XMLName.Space { + case "http://search.yahoo.com/mrss/": + // Ignore title in media namespace + case "http://purl.org/dc/elements/1.1/": + title = rssTitle.Data + default: + title = rssTitle.Data + } + + if title != "" { + break + } + } + + return strings.TrimSpace(sanitizer.StripTags(title)) } func (r *rssItem) entryContent() string { |