diff options
author | 2018-04-26 16:51:07 -0400 | |
---|---|---|
committer | 2018-04-27 17:57:22 -0700 | |
commit | 322b265d7aec7731f7fa703c9a74ceb61ae73f3f (patch) | |
tree | 01373329a179dd163ce434739bccd6e127ca25a5 | |
parent | 920dda79b7d66f30a05aaa03e3fe8dc74aab515b (diff) | |
download | v2-322b265d7aec7731f7fa703c9a74ceb61ae73f3f.tar.gz v2-322b265d7aec7731f7fa703c9a74ceb61ae73f3f.tar.zst v2-322b265d7aec7731f7fa703c9a74ceb61ae73f3f.zip |
Scrape parent element for iframe
Current behavior: if you have an `iframe` scraper rule, `scrapContent`
tries to return the inner HTML of the `iframe`, which turns up blank.
New behavior: like `img` elements, if an `iframe` is matched by a scraper rule,
the parent element's inner HTML (i.e. the `iframe` is returned).
-rw-r--r-- | reader/scraper/scraper.go | 2 |
1 files changed, 1 insertions, 1 deletions
diff --git a/reader/scraper/scraper.go b/reader/scraper/scraper.go index 4f36d962..b90cc14d 100644 --- a/reader/scraper/scraper.go +++ b/reader/scraper/scraper.go @@ -72,7 +72,7 @@ func scrapContent(page io.Reader, rules string) (string, error) { var content string // For some inline elements, we get the parent. - if s.Is("img") { + if s.Is("img") || s.Is("iframe") { content, _ = s.Parent().Html() } else { content, _ = s.Html() |