diff options
author | 2021-01-03 11:33:10 -0800 | |
---|---|---|
committer | 2021-01-03 11:44:07 -0800 | |
commit | 291bf96d15ac4fdfbb2c99e3d4de9fb9d2ee162b (patch) | |
tree | 796c7ee84550fcb2baa77d7a6187b1781dc8eb85 | |
parent | f0610bdd9c9db82b65663e754294dd474df9fc7e (diff) | |
download | v2-291bf96d15ac4fdfbb2c99e3d4de9fb9d2ee162b.tar.gz v2-291bf96d15ac4fdfbb2c99e3d4de9fb9d2ee162b.tar.zst v2-291bf96d15ac4fdfbb2c99e3d4de9fb9d2ee162b.zip |
Do not strip tags for entry title
Some technical blogs have titles like "</some-title>" or "This is some <code>source code</code>".
Miniflux was removing these elements which prevent rendering the title correctly.
-rw-r--r-- | reader/atom/atom_10.go | 3 | ||||
-rw-r--r-- | reader/atom/atom_10_test.go | 6 | ||||
-rw-r--r-- | reader/json/json.go | 5 | ||||
-rw-r--r-- | reader/json/parser_test.go | 29 | ||||
-rw-r--r-- | reader/rdf/rdf.go | 1 | ||||
-rw-r--r-- | reader/rss/parser_test.go | 27 | ||||
-rw-r--r-- | reader/rss/rss.go | 7 |
7 files changed, 62 insertions, 16 deletions
diff --git a/reader/atom/atom_10.go b/reader/atom/atom_10.go index d60c3ce4..5fb42d7b 100644 --- a/reader/atom/atom_10.go +++ b/reader/atom/atom_10.go @@ -15,7 +15,6 @@ import ( "miniflux.app/model" "miniflux.app/reader/date" "miniflux.app/reader/media" - "miniflux.app/reader/sanitizer" "miniflux.app/url" ) @@ -100,7 +99,7 @@ func (a *atom10Entry) Transform() *model.Entry { } func (a *atom10Entry) entryTitle() string { - return sanitizer.StripTags(a.Title.String()) + return a.Title.String() } func (a *atom10Entry) entryContent() string { diff --git a/reader/atom/atom_10_test.go b/reader/atom/atom_10_test.go index 4999aca2..519bb94b 100644 --- a/reader/atom/atom_10_test.go +++ b/reader/atom/atom_10_test.go @@ -265,7 +265,7 @@ func TestParseEntryTitleWithHTMLAndCDATA(t *testing.T) { t.Fatal(err) } - if feed.Entries[0].Title != "Test “Test”" { + if feed.Entries[0].Title != "Test “Test”" { t.Errorf("Incorrect entry title, got: %q", feed.Entries[0].Title) } } @@ -291,7 +291,7 @@ func TestParseEntryTitleWithHTML(t *testing.T) { t.Fatal(err) } - if feed.Entries[0].Title != "Test Test" { + if feed.Entries[0].Title != "<code>Test</code> Test" { t.Errorf("Incorrect entry title, got: %q", feed.Entries[0].Title) } } @@ -317,7 +317,7 @@ func TestParseEntryTitleWithXHTML(t *testing.T) { t.Fatal(err) } - if feed.Entries[0].Title != "Test Test" { + if feed.Entries[0].Title != "<code>Test</code> Test" { t.Errorf("Incorrect entry title, got: %q", feed.Entries[0].Title) } } diff --git a/reader/json/json.go b/reader/json/json.go index 45e28888..18bbcaee 100644 --- a/reader/json/json.go +++ b/reader/json/json.go @@ -12,7 +12,6 @@ import ( "miniflux.app/logger" "miniflux.app/model" "miniflux.app/reader/date" - "miniflux.app/reader/sanitizer" "miniflux.app/url" ) @@ -123,9 +122,9 @@ func (j *jsonItem) GetHash() string { } func (j *jsonItem) GetTitle() string { - for _, value := range []string{j.Title, j.Summary, j.Text, j.HTML} { + for _, value := range []string{j.Title, j.Summary, j.Text, j.URL} { if value != "" { - return truncate(sanitizer.StripTags(value)) + return truncate(value) } } diff --git a/reader/json/parser_test.go b/reader/json/parser_test.go index 93d8189a..46bfba40 100644 --- a/reader/json/parser_test.go +++ b/reader/json/parser_test.go @@ -76,7 +76,7 @@ func TestParseJsonFeed(t *testing.T) { t.Errorf("Incorrect entry URL, got: %s", feed.Entries[1].URL) } - if feed.Entries[1].Title != "Hello, world!" { + if feed.Entries[1].Title != "https://example.org/initial-post" { t.Errorf(`Incorrect entry title, got: "%s"`, feed.Entries[1].Title) } @@ -409,6 +409,33 @@ func TestParseTruncateItemTitle(t *testing.T) { } } +func TestParseItemTitleWithXMLTags(t *testing.T) { + data := `{ + "version": "https://jsonfeed.org/version/1", + "title": "My Example Feed", + "home_page_url": "https://example.org/", + "feed_url": "https://example.org/feed.json", + "items": [ + { + "title": "</example>" + } + ] + }` + + feed, err := Parse("https://example.org/feed.json", bytes.NewBufferString(data)) + if err != nil { + t.Fatal(err) + } + + if len(feed.Entries) != 1 { + t.Errorf("Incorrect number of entries, got: %d", len(feed.Entries)) + } + + if feed.Entries[0].Title != "</example>" { + t.Errorf("Incorrect entry title, got: %s", feed.Entries[0].Title) + } +} + func TestParseInvalidJSON(t *testing.T) { data := `garbage` _, err := Parse("https://example.org/feed.json", bytes.NewBufferString(data)) diff --git a/reader/rdf/rdf.go b/reader/rdf/rdf.go index 337df206..1710897a 100644 --- a/reader/rdf/rdf.go +++ b/reader/rdf/rdf.go @@ -40,7 +40,6 @@ func (r *rdfFeed) Transform(baseURL string) *model.Feed { if entry.Author == "" && r.DublinCoreCreator != "" { entry.Author = strings.TrimSpace(r.DublinCoreCreator) } - entry.Author = sanitizer.StripTags(entry.Author) if entry.URL == "" { entry.URL = feed.SiteURL diff --git a/reader/rss/parser_test.go b/reader/rss/parser_test.go index 64d1e456..891b2e1e 100644 --- a/reader/rss/parser_test.go +++ b/reader/rss/parser_test.go @@ -951,7 +951,7 @@ func TestParseInvalidXml(t *testing.T) { } } -func TestParseWithHTMLEntity(t *testing.T) { +func TestParseFeedTitleWithHTMLEntity(t *testing.T) { data := `<?xml version="1.0" encoding="utf-8"?> <rss version="2.0" xmlns:slash="http://purl.org/rss/1.0/modules/slash/"> <channel> @@ -970,7 +970,30 @@ func TestParseWithHTMLEntity(t *testing.T) { } } -func TestParseWithInvalidCharacterEntity(t *testing.T) { +func TestParseItemTitleWithHTMLEntity(t *testing.T) { + data := `<?xml version="1.0" encoding="utf-8"?> + <rss version="2.0" xmlns:slash="http://purl.org/rss/1.0/modules/slash/"> + <channel> + <link>https://example.org/</link> + <title>Example</title> + <item> + <title></example></title> + <link>http://www.example.org/entries/1</link> + </item> + </channel> + </rss>` + + feed, err := Parse("https://example.org/", bytes.NewBufferString(data)) + if err != nil { + t.Fatal(err) + } + + if feed.Entries[0].Title != "</example>" { + t.Errorf(`Incorrect title, got: %q`, feed.Title) + } +} + +func TestParseFeedLinkWithInvalidCharacterEntity(t *testing.T) { data := `<?xml version="1.0" encoding="utf-8"?> <rss version="2.0" xmlns:slash="http://purl.org/rss/1.0/modules/slash/"> <channel> diff --git a/reader/rss/rss.go b/reader/rss/rss.go index 51d52ce6..196b9d15 100644 --- a/reader/rss/rss.go +++ b/reader/rss/rss.go @@ -62,7 +62,6 @@ func (r *rssFeed) Transform(baseURL string) *model.Feed { if entry.Author == "" { entry.Author = r.feedAuthor() } - entry.Author = sanitizer.StripTags(entry.Author) if entry.URL == "" { entry.URL = feed.SiteURL @@ -111,7 +110,7 @@ func (r rssFeed) feedAuthor() string { case r.Webmaster != "": author = r.Webmaster } - return strings.TrimSpace(author) + return sanitizer.StripTags(strings.TrimSpace(author)) } type rssLink struct { @@ -227,7 +226,7 @@ func (r *rssItem) entryAuthor() string { author = r.DublinCoreCreator } - return strings.TrimSpace(author) + return sanitizer.StripTags(strings.TrimSpace(author)) } func (r *rssItem) entryHash() string { @@ -258,7 +257,7 @@ func (r *rssItem) entryTitle() string { } } - return strings.TrimSpace(sanitizer.StripTags(title)) + return strings.TrimSpace(title) } func (r *rssItem) entryContent() string { |