From fb075b60b5d8f74d157d12cf66e32fe9580cf3fe Mon Sep 17 00:00:00 2001 From: Frédéric Guillot Date: Tue, 23 Apr 2024 20:08:09 -0700 Subject: reader/processor: minifier is breaking HTML entry content --- internal/reader/processor/processor.go | 34 ++++++++++++++++++---------------- 1 file changed, 18 insertions(+), 16 deletions(-) (limited to 'internal/reader/processor/processor.go') diff --git a/internal/reader/processor/processor.go b/internal/reader/processor/processor.go index c92550d2..fa3e53be 100644 --- a/internal/reader/processor/processor.go +++ b/internal/reader/processor/processor.go @@ -38,9 +38,6 @@ var ( func ProcessFeedEntries(store *storage.Storage, feed *model.Feed, user *model.User, forceRefresh bool) { var filteredEntries model.Entries - minifier := minify.New() - minifier.AddFunc("text/html", html.Minify) - // Process older entries first for i := len(feed.Entries) - 1; i >= 0; i-- { entry := feed.Entries[i] @@ -107,11 +104,7 @@ func ProcessFeedEntries(store *storage.Storage, feed *model.Feed, user *model.Us ) } else if content != "" { // We replace the entry content only if the scraper doesn't return any error. - if minifiedHTML, err := minifier.String("text/html", content); err == nil { - entry.Content = minifiedHTML - } else { - entry.Content = content - } + entry.Content = minifyEntryContent(content) } } @@ -189,9 +182,6 @@ func isAllowedEntry(feed *model.Feed, entry *model.Entry) bool { // ProcessEntryWebPage downloads the entry web page and apply rewrite rules. func ProcessEntryWebPage(feed *model.Feed, entry *model.Entry, user *model.User) error { - minifier := minify.New() - minifier.AddFunc("text/html", html.Minify) - startTime := time.Now() websiteURL := getUrlFromEntry(feed, entry) @@ -223,11 +213,7 @@ func ProcessEntryWebPage(feed *model.Feed, entry *model.Entry, user *model.User) } if content != "" { - if minifiedHTML, err := minifier.String("text/html", content); err == nil { - entry.Content = minifiedHTML - } else { - entry.Content = content - } + entry.Content = minifyEntryContent(content) if user.ShowReadingTime { entry.ReadingTime = readingtime.EstimateReadingTime(entry.Content, user.DefaultReadingSpeed, user.CJKReadingSpeed) } @@ -439,3 +425,19 @@ func isRecentEntry(entry *model.Entry) bool { } return false } + +func minifyEntryContent(entryContent string) string { + m := minify.New() + + // Options required to avoid breaking the HTML content. + m.Add("text/html", &html.Minifier{ + KeepEndTags: true, + KeepQuotes: true, + }) + + if minifiedHTML, err := m.String("text/html", entryContent); err == nil { + entryContent = minifiedHTML + } + + return entryContent +} -- cgit v1.2.3