diff options
author | 2024-02-29 04:01:17 +0100 | |
---|---|---|
committer | 2024-02-28 20:03:14 -0800 | |
commit | 4db138d4b87c988eed6dbe2fc72cf1a13d393d8b (patch) | |
tree | 45497efe3ba5ab039b82186b1cf28de37e9207b0 | |
parent | f12d5131b01a771e9dbd63705064f4b26f5a77d0 (diff) | |
download | v2-4db138d4b87c988eed6dbe2fc72cf1a13d393d8b.tar.gz v2-4db138d4b87c988eed6dbe2fc72cf1a13d393d8b.tar.zst v2-4db138d4b87c988eed6dbe2fc72cf1a13d393d8b.zip |
Minor internal/reader/readability/readability.go speedup
- Don't use a capturing group in `divToPElementsRegexp`
- Remove a duplicate condition
- Replace a regex with a fixed-comparison and a `Contains`
-rw-r--r-- | internal/reader/readability/readability.go | 11 |
1 files changed, 6 insertions, 5 deletions
diff --git a/internal/reader/readability/readability.go b/internal/reader/readability/readability.go index ec127bca..443f2138 100644 --- a/internal/reader/readability/readability.go +++ b/internal/reader/readability/readability.go @@ -21,8 +21,7 @@ const ( ) var ( - divToPElementsRegexp = regexp.MustCompile(`(?i)<(a|blockquote|dl|div|img|ol|p|pre|table|ul)`) - sentenceRegexp = regexp.MustCompile(`\.( |$)`) + divToPElementsRegexp = regexp.MustCompile(`(?i)<(?:a|blockquote|dl|div|img|ol|p|pre|table|ul)`) blacklistCandidatesRegexp = regexp.MustCompile(`(?i)popupbody|-ad|g-plus`) okMaybeItsACandidateRegexp = regexp.MustCompile(`(?i)and|article|body|column|main|shadow`) @@ -114,9 +113,11 @@ func getArticle(topCandidate *candidate, candidates candidateList) string { content := s.Text() contentLength := len(content) - if contentLength >= 80 && linkDensity < .25 { - append = true - } else if contentLength < 80 && linkDensity == 0 && sentenceRegexp.MatchString(content) { + if contentLength >= 80 { + if linkDensity < .25 { + append = true + } + } else if linkDensity == 0 && (content[len(content)-1] == '.' || strings.Contains(content, ". ")) { append = true } } |