diff options
author | 2023-12-01 16:27:18 -0800 | |
---|---|---|
committer | 2023-12-01 16:52:03 -0800 | |
commit | d0f99cee1af46be932b3b6b7e343a511eaa829a1 (patch) | |
tree | c9af07ac29e666adef0e5ff68d01f362bfaead61 /internal/reader/encoding/encoding.go | |
parent | f8b40085cdc4af0ce8f36c54b5e16b700acedf65 (diff) | |
download | v2-d0f99cee1af46be932b3b6b7e343a511eaa829a1.tar.gz v2-d0f99cee1af46be932b3b6b7e343a511eaa829a1.tar.zst v2-d0f99cee1af46be932b3b6b7e343a511eaa829a1.zip |
Regression: ensure all HTML documents are encoded in UTF-8
Fixes #2196
Diffstat (limited to 'internal/reader/encoding/encoding.go')
-rw-r--r-- | internal/reader/encoding/encoding.go | 9 |
1 files changed, 7 insertions, 2 deletions
diff --git a/internal/reader/encoding/encoding.go b/internal/reader/encoding/encoding.go index a580a050..71f93543 100644 --- a/internal/reader/encoding/encoding.go +++ b/internal/reader/encoding/encoding.go @@ -22,7 +22,7 @@ import ( // - Feeds with encoding specified in both places // - Feeds with encoding specified only in XML document and not in HTTP header // - Feeds with wrong encoding defined and already in UTF-8 -func CharsetReader(label string, input io.Reader) (io.Reader, error) { +func CharsetReader(charsetLabel string, input io.Reader) (io.Reader, error) { buffer, _ := io.ReadAll(input) r := bytes.NewReader(buffer) @@ -33,5 +33,10 @@ func CharsetReader(label string, input io.Reader) (io.Reader, error) { } // Transform document to UTF-8 from the specified encoding in XML prolog. - return charset.NewReaderLabel(label, r) + return charset.NewReaderLabel(charsetLabel, r) +} + +// CharsetReaderFromContentType is used when the encoding is not specified for the input document. +func CharsetReaderFromContentType(contentType string, input io.Reader) (io.Reader, error) { + return charset.NewReader(input, contentType) } |