aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGravatar Frédéric Guillot <f@miniflux.net> 2024-04-23 20:08:09 -0700
committerGravatar Frédéric Guillot <f@miniflux.net> 2024-04-23 20:31:52 -0700
commitfb075b60b5d8f74d157d12cf66e32fe9580cf3fe (patch)
tree2539a39cd8e1b5c07a64e759cc52a28734c1255c
parent2c4c845cd2b40cfd468868efff7d4ba350124b47 (diff)
downloadv2-fb075b60b5d8f74d157d12cf66e32fe9580cf3fe.tar.gz
v2-fb075b60b5d8f74d157d12cf66e32fe9580cf3fe.tar.zst
v2-fb075b60b5d8f74d157d12cf66e32fe9580cf3fe.zip
reader/processor: minifier is breaking HTML entry content
-rw-r--r--internal/reader/processor/processor.go34
-rw-r--r--internal/reader/processor/processor_test.go9
2 files changed, 27 insertions, 16 deletions
diff --git a/internal/reader/processor/processor.go b/internal/reader/processor/processor.go
index c92550d2..fa3e53be 100644
--- a/internal/reader/processor/processor.go
+++ b/internal/reader/processor/processor.go
@@ -38,9 +38,6 @@ var (
func ProcessFeedEntries(store *storage.Storage, feed *model.Feed, user *model.User, forceRefresh bool) {
var filteredEntries model.Entries
- minifier := minify.New()
- minifier.AddFunc("text/html", html.Minify)
-
// Process older entries first
for i := len(feed.Entries) - 1; i >= 0; i-- {
entry := feed.Entries[i]
@@ -107,11 +104,7 @@ func ProcessFeedEntries(store *storage.Storage, feed *model.Feed, user *model.Us
)
} else if content != "" {
// We replace the entry content only if the scraper doesn't return any error.
- if minifiedHTML, err := minifier.String("text/html", content); err == nil {
- entry.Content = minifiedHTML
- } else {
- entry.Content = content
- }
+ entry.Content = minifyEntryContent(content)
}
}
@@ -189,9 +182,6 @@ func isAllowedEntry(feed *model.Feed, entry *model.Entry) bool {
// ProcessEntryWebPage downloads the entry web page and apply rewrite rules.
func ProcessEntryWebPage(feed *model.Feed, entry *model.Entry, user *model.User) error {
- minifier := minify.New()
- minifier.AddFunc("text/html", html.Minify)
-
startTime := time.Now()
websiteURL := getUrlFromEntry(feed, entry)
@@ -223,11 +213,7 @@ func ProcessEntryWebPage(feed *model.Feed, entry *model.Entry, user *model.User)
}
if content != "" {
- if minifiedHTML, err := minifier.String("text/html", content); err == nil {
- entry.Content = minifiedHTML
- } else {
- entry.Content = content
- }
+ entry.Content = minifyEntryContent(content)
if user.ShowReadingTime {
entry.ReadingTime = readingtime.EstimateReadingTime(entry.Content, user.DefaultReadingSpeed, user.CJKReadingSpeed)
}
@@ -439,3 +425,19 @@ func isRecentEntry(entry *model.Entry) bool {
}
return false
}
+
+func minifyEntryContent(entryContent string) string {
+ m := minify.New()
+
+ // Options required to avoid breaking the HTML content.
+ m.Add("text/html", &html.Minifier{
+ KeepEndTags: true,
+ KeepQuotes: true,
+ })
+
+ if minifiedHTML, err := m.String("text/html", entryContent); err == nil {
+ entryContent = minifiedHTML
+ }
+
+ return entryContent
+}
diff --git a/internal/reader/processor/processor_test.go b/internal/reader/processor/processor_test.go
index e99a566a..48ae2d6b 100644
--- a/internal/reader/processor/processor_test.go
+++ b/internal/reader/processor/processor_test.go
@@ -117,3 +117,12 @@ func TestIsRecentEntry(t *testing.T) {
}
}
}
+
+func TestMinifyEntryContent(t *testing.T) {
+ input := `<p> Some text with a <a href="http://example.org/"> link </a> </p>`
+ expected := `<p>Some text with a <a href="http://example.org/">link</a></p>`
+ result := minifyEntryContent(input)
+ if expected != result {
+ t.Errorf(`Unexpected result, got %q`, result)
+ }
+}