aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGravatar jvoisin <julien.voisin@dustri.org> 2024-02-29 04:01:17 +0100
committerGravatar Frédéric Guillot <f@miniflux.net> 2024-02-28 20:03:14 -0800
commit4db138d4b87c988eed6dbe2fc72cf1a13d393d8b (patch)
tree45497efe3ba5ab039b82186b1cf28de37e9207b0
parentf12d5131b01a771e9dbd63705064f4b26f5a77d0 (diff)
downloadv2-4db138d4b87c988eed6dbe2fc72cf1a13d393d8b.tar.gz
v2-4db138d4b87c988eed6dbe2fc72cf1a13d393d8b.tar.zst
v2-4db138d4b87c988eed6dbe2fc72cf1a13d393d8b.zip
Minor internal/reader/readability/readability.go speedup
- Don't use a capturing group in `divToPElementsRegexp` - Remove a duplicate condition - Replace a regex with a fixed-comparison and a `Contains`
-rw-r--r--internal/reader/readability/readability.go11
1 files changed, 6 insertions, 5 deletions
diff --git a/internal/reader/readability/readability.go b/internal/reader/readability/readability.go
index ec127bca..443f2138 100644
--- a/internal/reader/readability/readability.go
+++ b/internal/reader/readability/readability.go
@@ -21,8 +21,7 @@ const (
)
var (
- divToPElementsRegexp = regexp.MustCompile(`(?i)<(a|blockquote|dl|div|img|ol|p|pre|table|ul)`)
- sentenceRegexp = regexp.MustCompile(`\.( |$)`)
+ divToPElementsRegexp = regexp.MustCompile(`(?i)<(?:a|blockquote|dl|div|img|ol|p|pre|table|ul)`)
blacklistCandidatesRegexp = regexp.MustCompile(`(?i)popupbody|-ad|g-plus`)
okMaybeItsACandidateRegexp = regexp.MustCompile(`(?i)and|article|body|column|main|shadow`)
@@ -114,9 +113,11 @@ func getArticle(topCandidate *candidate, candidates candidateList) string {
content := s.Text()
contentLength := len(content)
- if contentLength >= 80 && linkDensity < .25 {
- append = true
- } else if contentLength < 80 && linkDensity == 0 && sentenceRegexp.MatchString(content) {
+ if contentLength >= 80 {
+ if linkDensity < .25 {
+ append = true
+ }
+ } else if linkDensity == 0 && (content[len(content)-1] == '.' || strings.Contains(content, ". ")) {
append = true
}
}