aboutsummaryrefslogtreecommitdiff
path: root/internal/reader/encoding/encoding.go
diff options
context:
space:
mode:
authorGravatar Frédéric Guillot <f@miniflux.net> 2023-12-01 16:27:18 -0800
committerGravatar Frédéric Guillot <f@miniflux.net> 2023-12-01 16:52:03 -0800
commitd0f99cee1af46be932b3b6b7e343a511eaa829a1 (patch)
treec9af07ac29e666adef0e5ff68d01f362bfaead61 /internal/reader/encoding/encoding.go
parentf8b40085cdc4af0ce8f36c54b5e16b700acedf65 (diff)
downloadv2-d0f99cee1af46be932b3b6b7e343a511eaa829a1.tar.gz
v2-d0f99cee1af46be932b3b6b7e343a511eaa829a1.tar.zst
v2-d0f99cee1af46be932b3b6b7e343a511eaa829a1.zip
Regression: ensure all HTML documents are encoded in UTF-8
Fixes #2196
Diffstat (limited to 'internal/reader/encoding/encoding.go')
-rw-r--r--internal/reader/encoding/encoding.go9
1 files changed, 7 insertions, 2 deletions
diff --git a/internal/reader/encoding/encoding.go b/internal/reader/encoding/encoding.go
index a580a050..71f93543 100644
--- a/internal/reader/encoding/encoding.go
+++ b/internal/reader/encoding/encoding.go
@@ -22,7 +22,7 @@ import (
// - Feeds with encoding specified in both places
// - Feeds with encoding specified only in XML document and not in HTTP header
// - Feeds with wrong encoding defined and already in UTF-8
-func CharsetReader(label string, input io.Reader) (io.Reader, error) {
+func CharsetReader(charsetLabel string, input io.Reader) (io.Reader, error) {
buffer, _ := io.ReadAll(input)
r := bytes.NewReader(buffer)
@@ -33,5 +33,10 @@ func CharsetReader(label string, input io.Reader) (io.Reader, error) {
}
// Transform document to UTF-8 from the specified encoding in XML prolog.
- return charset.NewReaderLabel(label, r)
+ return charset.NewReaderLabel(charsetLabel, r)
+}
+
+// CharsetReaderFromContentType is used when the encoding is not specified for the input document.
+func CharsetReaderFromContentType(contentType string, input io.Reader) (io.Reader, error) {
+ return charset.NewReader(input, contentType)
}