diff options
author | 2023-03-31 19:23:31 +0100 | |
---|---|---|
committer | 2023-04-02 17:50:19 -0700 | |
commit | 49d2596fc61d4f8575fec777d3beed751ef5337c (patch) | |
tree | 753683188a9959e0a049976d497b8c9ef9d55aba | |
parent | 9a826bbe6f28642ef0483f934bfcffa7a1dbe91f (diff) | |
download | v2-49d2596fc61d4f8575fec777d3beed751ef5337c.tar.gz v2-49d2596fc61d4f8575fec777d3beed751ef5337c.tar.zst v2-49d2596fc61d4f8575fec777d3beed751ef5337c.zip |
Basic table removal rule
-rw-r--r-- | reader/rewrite/rewrite_functions.go | 31 | ||||
-rw-r--r-- | reader/rewrite/rewriter.go | 2 | ||||
-rw-r--r-- | reader/rewrite/rewriter_test.go | 10 |
3 files changed, 43 insertions, 0 deletions
diff --git a/reader/rewrite/rewrite_functions.go b/reader/rewrite/rewrite_functions.go index 9e6e3e27..9ed36ca0 100644 --- a/reader/rewrite/rewrite_functions.go +++ b/reader/rewrite/rewrite_functions.go @@ -335,3 +335,34 @@ func parseMarkdown(entryContent string) string { return sb.String() } + +func removeTables(entryContent string) string { + doc, err := goquery.NewDocumentFromReader(strings.NewReader(entryContent)) + if err != nil { + return entryContent + } + + var table *goquery.Selection + + for { + table = doc.Find("table").First() + + if table.Length() == 0 { + break + } + + td := table.Find("td").First() + + if td.Length() == 0 { + break + } + + tdHtml, _ := td.Html() + + table.Parent().AppendHtml(tdHtml) + table.Remove() + } + + output, _ := doc.Find("body").First().Html() + return output +} diff --git a/reader/rewrite/rewriter.go b/reader/rewrite/rewriter.go index aa2dacc9..961a47ec 100644 --- a/reader/rewrite/rewriter.go +++ b/reader/rewrite/rewriter.go @@ -110,6 +110,8 @@ func applyRule(entryURL, entryContent string, rule rule) string { } case "parse_markdown": entryContent = parseMarkdown(entryContent) + case "remove_tables": + entryContent = removeTables(entryContent) } return entryContent diff --git a/reader/rewrite/rewriter_test.go b/reader/rewrite/rewriter_test.go index fab7da26..8f8aeb0a 100644 --- a/reader/rewrite/rewriter_test.go +++ b/reader/rewrite/rewriter_test.go @@ -325,3 +325,13 @@ func TestRewriteBase64DecodeArgs(t *testing.T) { t.Errorf(`Not expected output: got "%s" instead of "%s"`, output, expected) } } + +func TestRewriteRemoveTables(t *testing.T) { + content := `<table class="container"><tbody><tr><td><p>Test</p><table class="row"><tbody><tr><td>Hello World!</td></tr></tbody></table></td></tr></tbody></table>` + expected := `<p>Test</p>Hello World!` + output := Rewriter("https://example.org/article", content, `remove_tables`) + + if expected != output { + t.Errorf(`Not expected output: got "%s" instead of "%s"`, output, expected) + } +} |