diff options
author | 2024-06-19 20:38:24 +0200 | |
---|---|---|
committer | 2024-06-21 14:00:40 -0700 | |
commit | ee5e18ea9f06c69dc0e37ca077f82b5f315505e8 (patch) | |
tree | 24176c9e4e3192ae272a73c8cad78e803a40bff4 | |
parent | 3ef2522c62d251a464aab5ad0e2b6fb6bafd6469 (diff) | |
download | v2-ee5e18ea9f06c69dc0e37ca077f82b5f315505e8.tar.gz v2-ee5e18ea9f06c69dc0e37ca077f82b5f315505e8.tar.zst v2-ee5e18ea9f06c69dc0e37ca077f82b5f315505e8.zip |
sanitizer: add support for HTML `hidden` attribute
This commit adjusts the `Sanitize` function to skip tags with the
`hidden` attribute, similar to how it skips blocked tags and their
contents.
-rw-r--r-- | internal/reader/sanitizer/sanitizer.go | 19 | ||||
-rw-r--r-- | internal/reader/sanitizer/sanitizer_test.go | 10 |
2 files changed, 20 insertions, 9 deletions
diff --git a/internal/reader/sanitizer/sanitizer.go b/internal/reader/sanitizer/sanitizer.go index 7f063f04..962111b2 100644 --- a/internal/reader/sanitizer/sanitizer.go +++ b/internal/reader/sanitizer/sanitizer.go @@ -82,7 +82,7 @@ func Sanitize(baseURL, input string) string { var buffer strings.Builder var tagStack []string var parentTag string - blacklistedTagDepth := 0 + var blockedStack []string tokenizer := html.NewTokenizer(strings.NewReader(input)) for { @@ -98,7 +98,7 @@ func Sanitize(baseURL, input string) string { token := tokenizer.Token() switch token.Type { case html.TextToken: - if blacklistedTagDepth > 0 { + if len(blockedStack) > 0 { continue } @@ -116,7 +116,10 @@ func Sanitize(baseURL, input string) string { if isPixelTracker(tagName, token.Attr) { continue } - if isValidTag(tagName) { + + if isBlockedTag(tagName) || slices.ContainsFunc(token.Attr, func(attr html.Attribute) bool { return attr.Key == "hidden" }) { + blockedStack = append(blockedStack, tagName) + } else if len(blockedStack) == 0 && isValidTag(tagName) { attrNames, htmlAttributes := sanitizeAttributes(baseURL, tagName, token.Attr) if hasRequiredAttributes(tagName, attrNames) { @@ -128,22 +131,20 @@ func Sanitize(baseURL, input string) string { tagStack = append(tagStack, tagName) } - } else if isBlockedTag(tagName) { - blacklistedTagDepth++ } case html.EndTagToken: tagName := token.DataAtom.String() - if isValidTag(tagName) && slices.Contains(tagStack, tagName) { + if len(blockedStack) > 0 && blockedStack[len(blockedStack)-1] == tagName { + blockedStack = blockedStack[:len(blockedStack)-1] + } else if len(blockedStack) == 0 && isValidTag(tagName) && slices.Contains(tagStack, tagName) { buffer.WriteString("</" + tagName + ">") - } else if isBlockedTag(tagName) { - blacklistedTagDepth-- } case html.SelfClosingTagToken: tagName := token.DataAtom.String() if isPixelTracker(tagName, token.Attr) { continue } - if isValidTag(tagName) { + if isValidTag(tagName) && len(blockedStack) == 0 { attrNames, htmlAttributes := sanitizeAttributes(baseURL, tagName, token.Attr) if hasRequiredAttributes(tagName, attrNames) { if len(attrNames) > 0 { diff --git a/internal/reader/sanitizer/sanitizer_test.go b/internal/reader/sanitizer/sanitizer_test.go index 07207a44..8289e602 100644 --- a/internal/reader/sanitizer/sanitizer_test.go +++ b/internal/reader/sanitizer/sanitizer_test.go @@ -630,3 +630,13 @@ func TestReplaceStyle(t *testing.T) { t.Errorf(`Wrong output: "%s" != "%s"`, expected, output) } } + +func TestHiddenParagraph(t *testing.T) { + input := `<p>Before paragraph.</p><p hidden>This should <em>not</em> appear in the <strong>output</strong></p><p>After paragraph.</p>` + expected := `<p>Before paragraph.</p><p>After paragraph.</p>` + output := Sanitize("http://example.org/", input) + + if expected != output { + t.Errorf(`Wrong output: "%s" != "%s"`, expected, output) + } +} |