aboutsummaryrefslogtreecommitdiff
path: root/internal
diff options
context:
space:
mode:
authorGravatar Frédéric Guillot <f@miniflux.net> 2023-09-08 16:50:06 -0700
committerGravatar Frédéric Guillot <f@miniflux.net> 2023-09-08 17:39:49 -0700
commit36f013670efa627883f8de3d03ff93b7b119baff (patch)
tree8e403c592f906a290ebeb756bf699bbbb9e5694f /internal
parent344a237af87e07c51ca73e3b6f1c23598613996d (diff)
downloadv2-36f013670efa627883f8de3d03ff93b7b119baff.tar.gz
v2-36f013670efa627883f8de3d03ff93b7b119baff.tar.zst
v2-36f013670efa627883f8de3d03ff93b7b119baff.zip
Strip HTML tags from DublinCore Creator tags
Diffstat (limited to 'internal')
-rw-r--r--internal/reader/dublincore/dublincore.go (renamed from internal/reader/rdf/dublincore.go)20
-rw-r--r--internal/reader/rdf/parser_test.go28
-rw-r--r--internal/reader/rdf/rdf.go9
-rw-r--r--internal/reader/rss/dublincore.go11
-rw-r--r--internal/reader/rss/rss.go5
5 files changed, 53 insertions, 20 deletions
diff --git a/internal/reader/rdf/dublincore.go b/internal/reader/dublincore/dublincore.go
index 6d0112c3..e2e2607c 100644
--- a/internal/reader/rdf/dublincore.go
+++ b/internal/reader/dublincore/dublincore.go
@@ -1,16 +1,30 @@
// SPDX-FileCopyrightText: Copyright The Miniflux Authors. All rights reserved.
// SPDX-License-Identifier: Apache-2.0
-package rdf // import "miniflux.app/v2/internal/reader/rdf"
+package dublincore // import "miniflux.app/v2/internal/reader/dublincore"
+
+import (
+ "strings"
+
+ "miniflux.app/v2/internal/reader/sanitizer"
+)
// DublinCoreFeedElement represents Dublin Core feed XML elements.
type DublinCoreFeedElement struct {
DublinCoreCreator string `xml:"http://purl.org/dc/elements/1.1/ channel>creator"`
}
-// DublinCoreEntryElement represents Dublin Core entry XML elements.
-type DublinCoreEntryElement struct {
+func (feed *DublinCoreFeedElement) GetSanitizedCreator() string {
+ return strings.TrimSpace(sanitizer.StripTags(feed.DublinCoreCreator))
+}
+
+// DublinCoreItemElement represents Dublin Core entry XML elements.
+type DublinCoreItemElement struct {
DublinCoreDate string `xml:"http://purl.org/dc/elements/1.1/ date"`
DublinCoreCreator string `xml:"http://purl.org/dc/elements/1.1/ creator"`
DublinCoreContent string `xml:"http://purl.org/rss/1.0/modules/content/ encoded"`
}
+
+func (item *DublinCoreItemElement) GetSanitizedCreator() string {
+ return strings.TrimSpace(sanitizer.StripTags(item.DublinCoreCreator))
+}
diff --git a/internal/reader/rdf/parser_test.go b/internal/reader/rdf/parser_test.go
index 52565956..67b8c569 100644
--- a/internal/reader/rdf/parser_test.go
+++ b/internal/reader/rdf/parser_test.go
@@ -349,6 +349,34 @@ func TestParseItemWithDublicCoreDate(t *testing.T) {
}
}
+func TestParseItemWithEncodedHTMLInDCCreatorField(t *testing.T) {
+ data := `<?xml version="1.0" encoding="utf-8"?>
+ <rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns="http://purl.org/rss/1.0/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:slash="http://purl.org/rss/1.0/modules/slash/">
+ <channel>
+ <title>Example</title>
+ <link>http://example.org</link>
+ </channel>
+
+ <item>
+ <title>Title</title>
+ <description>Test</description>
+ <link>http://example.org/test.html</link>
+ <dc:creator>&lt;a href=&quot;http://example.org/author1&quot;>Author 1&lt;/a&gt; (University 1), &lt;a href=&quot;http://example.org/author2&quot;>Author 2&lt;/a&gt; (University 2)</dc:creator>
+ <dc:date>2018-04-10T05:00:00+00:00</dc:date>
+ </item>
+ </rdf:RDF>`
+
+ feed, err := Parse("http://example.org", bytes.NewBufferString(data))
+ if err != nil {
+ t.Fatal(err)
+ }
+
+ expectedAuthor := "Author 1 (University 1), Author 2 (University 2)"
+ if feed.Entries[0].Author != expectedAuthor {
+ t.Errorf("Incorrect entry author, got: %s, want: %s", feed.Entries[0].Author, expectedAuthor)
+ }
+}
+
func TestParseItemWithoutDate(t *testing.T) {
data := `<?xml version="1.0" encoding="utf-8"?>
<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns="http://purl.org/rss/1.0/">
diff --git a/internal/reader/rdf/rdf.go b/internal/reader/rdf/rdf.go
index 6118ec20..935d0c0c 100644
--- a/internal/reader/rdf/rdf.go
+++ b/internal/reader/rdf/rdf.go
@@ -13,6 +13,7 @@ import (
"miniflux.app/v2/internal/logger"
"miniflux.app/v2/internal/model"
"miniflux.app/v2/internal/reader/date"
+ "miniflux.app/v2/internal/reader/dublincore"
"miniflux.app/v2/internal/reader/sanitizer"
"miniflux.app/v2/internal/urllib"
)
@@ -22,7 +23,7 @@ type rdfFeed struct {
Title string `xml:"channel>title"`
Link string `xml:"channel>link"`
Items []rdfItem `xml:"item"`
- DublinCoreFeedElement
+ dublincore.DublinCoreFeedElement
}
func (r *rdfFeed) Transform(baseURL string) *model.Feed {
@@ -38,7 +39,7 @@ func (r *rdfFeed) Transform(baseURL string) *model.Feed {
for _, item := range r.Items {
entry := item.Transform()
if entry.Author == "" && r.DublinCoreCreator != "" {
- entry.Author = strings.TrimSpace(r.DublinCoreCreator)
+ entry.Author = r.GetSanitizedCreator()
}
if entry.URL == "" {
@@ -60,7 +61,7 @@ type rdfItem struct {
Title string `xml:"title"`
Link string `xml:"link"`
Description string `xml:"description"`
- DublinCoreEntryElement
+ dublincore.DublinCoreItemElement
}
func (r *rdfItem) Transform() *model.Entry {
@@ -88,7 +89,7 @@ func (r *rdfItem) entryContent() string {
}
func (r *rdfItem) entryAuthor() string {
- return strings.TrimSpace(r.DublinCoreCreator)
+ return r.GetSanitizedCreator()
}
func (r *rdfItem) entryURL() string {
diff --git a/internal/reader/rss/dublincore.go b/internal/reader/rss/dublincore.go
deleted file mode 100644
index e8a8d243..00000000
--- a/internal/reader/rss/dublincore.go
+++ /dev/null
@@ -1,11 +0,0 @@
-// SPDX-FileCopyrightText: Copyright The Miniflux Authors. All rights reserved.
-// SPDX-License-Identifier: Apache-2.0
-
-package rss // import "miniflux.app/v2/internal/reader/rss"
-
-// DublinCoreElement represents Dublin Core XML elements.
-type DublinCoreElement struct {
- DublinCoreDate string `xml:"http://purl.org/dc/elements/1.1/ date"`
- DublinCoreCreator string `xml:"http://purl.org/dc/elements/1.1/ creator"`
- DublinCoreContent string `xml:"http://purl.org/rss/1.0/modules/content/ encoded"`
-}
diff --git a/internal/reader/rss/rss.go b/internal/reader/rss/rss.go
index 93584bf0..323c6041 100644
--- a/internal/reader/rss/rss.go
+++ b/internal/reader/rss/rss.go
@@ -15,6 +15,7 @@ import (
"miniflux.app/v2/internal/logger"
"miniflux.app/v2/internal/model"
"miniflux.app/v2/internal/reader/date"
+ "miniflux.app/v2/internal/reader/dublincore"
"miniflux.app/v2/internal/reader/media"
"miniflux.app/v2/internal/reader/sanitizer"
"miniflux.app/v2/internal/urllib"
@@ -182,7 +183,7 @@ type rssItem struct {
CommentLinks []rssCommentLink `xml:"comments"`
EnclosureLinks []rssEnclosure `xml:"enclosure"`
Categories []rssCategory `xml:"category"`
- DublinCoreElement
+ dublincore.DublinCoreItemElement
FeedBurnerElement
PodcastEntryElement
media.Element
@@ -250,7 +251,7 @@ func (r *rssItem) entryAuthor() string {
}
if author == "" {
- author = r.DublinCoreCreator
+ author = r.GetSanitizedCreator()
}
return sanitizer.StripTags(strings.TrimSpace(author))