aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--reader/rewrite/rewrite_functions.go31
-rw-r--r--reader/rewrite/rewriter.go2
-rw-r--r--reader/rewrite/rewriter_test.go10
3 files changed, 43 insertions, 0 deletions
diff --git a/reader/rewrite/rewrite_functions.go b/reader/rewrite/rewrite_functions.go
index 9e6e3e27..9ed36ca0 100644
--- a/reader/rewrite/rewrite_functions.go
+++ b/reader/rewrite/rewrite_functions.go
@@ -335,3 +335,34 @@ func parseMarkdown(entryContent string) string {
return sb.String()
}
+
+func removeTables(entryContent string) string {
+ doc, err := goquery.NewDocumentFromReader(strings.NewReader(entryContent))
+ if err != nil {
+ return entryContent
+ }
+
+ var table *goquery.Selection
+
+ for {
+ table = doc.Find("table").First()
+
+ if table.Length() == 0 {
+ break
+ }
+
+ td := table.Find("td").First()
+
+ if td.Length() == 0 {
+ break
+ }
+
+ tdHtml, _ := td.Html()
+
+ table.Parent().AppendHtml(tdHtml)
+ table.Remove()
+ }
+
+ output, _ := doc.Find("body").First().Html()
+ return output
+}
diff --git a/reader/rewrite/rewriter.go b/reader/rewrite/rewriter.go
index aa2dacc9..961a47ec 100644
--- a/reader/rewrite/rewriter.go
+++ b/reader/rewrite/rewriter.go
@@ -110,6 +110,8 @@ func applyRule(entryURL, entryContent string, rule rule) string {
}
case "parse_markdown":
entryContent = parseMarkdown(entryContent)
+ case "remove_tables":
+ entryContent = removeTables(entryContent)
}
return entryContent
diff --git a/reader/rewrite/rewriter_test.go b/reader/rewrite/rewriter_test.go
index fab7da26..8f8aeb0a 100644
--- a/reader/rewrite/rewriter_test.go
+++ b/reader/rewrite/rewriter_test.go
@@ -325,3 +325,13 @@ func TestRewriteBase64DecodeArgs(t *testing.T) {
t.Errorf(`Not expected output: got "%s" instead of "%s"`, output, expected)
}
}
+
+func TestRewriteRemoveTables(t *testing.T) {
+ content := `<table class="container"><tbody><tr><td><p>Test</p><table class="row"><tbody><tr><td>Hello World!</td></tr></tbody></table></td></tr></tbody></table>`
+ expected := `<p>Test</p>Hello World!`
+ output := Rewriter("https://example.org/article", content, `remove_tables`)
+
+ if expected != output {
+ t.Errorf(`Not expected output: got "%s" instead of "%s"`, output, expected)
+ }
+}