aboutsummaryrefslogtreecommitdiff
path: root/internal
diff options
context:
space:
mode:
authorGravatar Frédéric Guillot <f@miniflux.net> 2023-10-06 20:57:53 -0700
committerGravatar Frédéric Guillot <f@miniflux.net> 2023-10-06 22:04:31 -0700
commit7b541af2536ce8337758390c5e5b51db3eb75cef (patch)
tree6475fbc466dde2e7c83726269ad89ae60d560753 /internal
parent09e9b0361dbca3881fb34964325d5b4656e861c9 (diff)
downloadv2-7b541af2536ce8337758390c5e5b51db3eb75cef.tar.gz
v2-7b541af2536ce8337758390c5e5b51db3eb75cef.tar.zst
v2-7b541af2536ce8337758390c5e5b51db3eb75cef.zip
Replace github.com/rylans/getlang with github.com/abadojack/whatlanggo
github.com/rylans/getlang doesn't seems to be updated anymore
Diffstat (limited to 'internal')
-rw-r--r--internal/reader/processor/processor.go24
-rw-r--r--internal/reader/readingtime/readingtime.go31
-rw-r--r--internal/reader/readingtime/readingtime_test.go61
3 files changed, 95 insertions, 21 deletions
diff --git a/internal/reader/processor/processor.go b/internal/reader/processor/processor.go
index c6514bf6..2fa85a7c 100644
--- a/internal/reader/processor/processor.go
+++ b/internal/reader/processor/processor.go
@@ -7,25 +7,22 @@ import (
"errors"
"fmt"
"log/slog"
- "math"
"regexp"
"strconv"
- "strings"
"time"
- "unicode/utf8"
"miniflux.app/v2/internal/config"
"miniflux.app/v2/internal/http/client"
"miniflux.app/v2/internal/metric"
"miniflux.app/v2/internal/model"
"miniflux.app/v2/internal/reader/browser"
+ "miniflux.app/v2/internal/reader/readingtime"
"miniflux.app/v2/internal/reader/rewrite"
"miniflux.app/v2/internal/reader/sanitizer"
"miniflux.app/v2/internal/reader/scraper"
"miniflux.app/v2/internal/storage"
"github.com/PuerkitoBio/goquery"
- "github.com/rylans/getlang"
)
var (
@@ -174,7 +171,7 @@ func ProcessEntryWebPage(feed *model.Feed, entry *model.Entry, user *model.User)
if content != "" {
entry.Content = content
- entry.ReadingTime = calculateReadingTime(content, user)
+ entry.ReadingTime = readingtime.EstimateReadingTime(entry.Content, user.DefaultReadingSpeed, user.CJKReadingSpeed)
}
rewrite.Rewriter(url, entry, entry.Feed.RewriteRules)
@@ -252,7 +249,7 @@ func updateEntryReadingTime(store *storage.Storage, feed *model.Feed, entry *mod
}
// Handle YT error case and non-YT entries.
if entry.ReadingTime == 0 {
- entry.ReadingTime = calculateReadingTime(entry.Content, user)
+ entry.ReadingTime = readingtime.EstimateReadingTime(entry.Content, user.DefaultReadingSpeed, user.CJKReadingSpeed)
}
}
@@ -360,18 +357,3 @@ func parseISO8601(from string) (time.Duration, error) {
return d, nil
}
-
-func calculateReadingTime(content string, user *model.User) int {
- sanitizedContent := sanitizer.StripTags(content)
- languageInfo := getlang.FromString(sanitizedContent)
-
- var timeToReadInt int
- if languageInfo.LanguageCode() == "ko" || languageInfo.LanguageCode() == "zh" || languageInfo.LanguageCode() == "jp" {
- timeToReadInt = int(math.Ceil(float64(utf8.RuneCountInString(sanitizedContent)) / float64(user.CJKReadingSpeed)))
- } else {
- nbOfWords := len(strings.Fields(sanitizedContent))
- timeToReadInt = int(math.Ceil(float64(nbOfWords) / float64(user.DefaultReadingSpeed)))
- }
-
- return timeToReadInt
-}
diff --git a/internal/reader/readingtime/readingtime.go b/internal/reader/readingtime/readingtime.go
new file mode 100644
index 00000000..faf78471
--- /dev/null
+++ b/internal/reader/readingtime/readingtime.go
@@ -0,0 +1,31 @@
+// SPDX-FileCopyrightText: Copyright The Miniflux Authors. All rights reserved.
+// SPDX-License-Identifier: Apache-2.0
+
+// Package readtime provides a function to estimate the reading time of an article.
+package readingtime
+
+import (
+ "math"
+ "strings"
+ "unicode/utf8"
+
+ "miniflux.app/v2/internal/reader/sanitizer"
+
+ "github.com/abadojack/whatlanggo"
+)
+
+// EstimateReadingTime returns the estimated reading time of an article in minute.
+func EstimateReadingTime(content string, defaultReadingSpeed, cjkReadingSpeed int) int {
+ sanitizedContent := sanitizer.StripTags(content)
+ langInfo := whatlanggo.Detect(sanitizedContent)
+
+ var timeToReadInt int
+ if langInfo.IsReliable() && (langInfo.Lang == whatlanggo.Jpn || langInfo.Lang == whatlanggo.Cmn || langInfo.Lang == whatlanggo.Kor) {
+ timeToReadInt = int(math.Ceil(float64(utf8.RuneCountInString(sanitizedContent)) / float64(cjkReadingSpeed)))
+ } else {
+ nbOfWords := len(strings.Fields(sanitizedContent))
+ timeToReadInt = int(math.Ceil(float64(nbOfWords) / float64(defaultReadingSpeed)))
+ }
+
+ return timeToReadInt
+}
diff --git a/internal/reader/readingtime/readingtime_test.go b/internal/reader/readingtime/readingtime_test.go
new file mode 100644
index 00000000..4915c7cb
--- /dev/null
+++ b/internal/reader/readingtime/readingtime_test.go
@@ -0,0 +1,61 @@
+// SPDX-FileCopyrightText: Copyright The Miniflux Authors. All rights reserved.
+// SPDX-License-Identifier: Apache-2.0
+
+package readingtime
+
+import "testing"
+
+func TestEstimateReadingTimeInEnglish(t *testing.T) {
+ sampleText := `
+ In turpis lacus, sollicitudin non accumsan sed, suscipit eget magna. Morbi id
+ neque enim. Aenean ac lacus consectetur, accumsan elit ac, suscipit dui. Donec
+ congue mi et nisl bibendum, venenatis fringilla orci tristique. Nullam ullamcorper
+ cursus justo, ac iaculis ante euismod a. Fusce dapibus lacus arcu, consectetur
+ porttitor odio finibus ac. Integer dictum faucibus egestas. Etiam magna diam, placerat
+ sed velit vitae, lobortis accumsan nisi. Sed viverra dui in odio commodo dapibus.
+ Sed pulvinar metus finibus, hendrerit diam eu, faucibus lectus. Mauris est tellus,
+ convallis et velit sit amet, convallis sagittis nunc. Quisque at ex leo. Donec eget leo
+ vel nibh porta molestie. Aenean pellentesque purus non laoreet aliquam.
+
+ In feugiat eget arcu nec sodales. Nunc rutrum felis in tellus venenatis, sit
+ amet tincidunt augue varius. Nunc nec dignissim quam. In euismod gravida rhoncus.
+ Vivamus eget nibh sed diam malesuada facilisis. Donec ac convallis elit. Fusce
+ fermentum tincidunt est. Nunc viverra, eros in gravida convallis, ex augue vehicula
+ magna, sed tincidunt metus sem et mauris. In pretium purus odio, a auctor tellus
+ ornare vel. Donec ac dolor pulvinar, placerat elit eget, ultrices nisi. Donec
+ tincidunt magna eget pretium sodales. In urna lorem, consectetur in fringilla eget,
+ rutrum et erat. Proin fringilla, lectus eget commodo consequat, est massa lacinia
+ lorem, ut ultricies nunc erat id sapien.
+
+ Lorem ipsum dolor sit amet, consectetur adipiscing elit. Fusce fermentum id
+ sem sed commodo. Ut eget mauris eu lectus mollis aliquam. Fusce convallis, quam
+ vel volutpat aliquet, nunc sem rhoncus magna, a iaculis enim ex nec neque.
+ Suspendisse vel imperdiet leo. Quisque ultrices semper commodo. Pellentesque nec libero et
+ mauris gravida porta vitae id nunc. Fusce sed sem sed augue gravida ultricies at nec
+ turpis. Sed semper eu urna sit amet malesuada. Suspendisse blandit condimentum elit,
+ in scelerisque tellus convallis eu. Nunc eleifend sem et mauris vestibulum
+ mattis. Praesent ultricies pellentesque eros non posuere.
+ `
+
+ readingTime := EstimateReadingTime(sampleText, 200, 500)
+ if readingTime != 2 {
+ t.Errorf(`Wrong reading time, got %d instead of 2`, readingTime)
+ }
+}
+
+func TestEstimateReadingTimeInChinese(t *testing.T) {
+ sampleText := `
+ 労問委格名町違載式新青脂通由。割止書円画民京般著治登門画拡下。有国同観教田美森素説砂者徴多。上治速相支存色分繰年活元事集遣逆山。身消年森発世財間世変悲原記潟旅好手真今。現通浪口特愛始信川節身方一表著購。郁不使権草定内防並要更一条露加。載交源図訴際属年券重供健三洗。事北残却女鮎朝分要廷込宣政愛無投事。
+
+ 問警技亮参沼洗請米物模人。誰探重午局新戦報投性病庭。典向載問千著書故表視新権最石車音端乏大。白僚三掲局係仕表広無旧見要最裁。額寄済生年余講前本次載隊劇。権成観始応泉早高拓了経地本稼室目犯井出。暮載必広傷内校岡公南散広転行別釈。康運行関本掲隠泉傷退報告。独変年換差取予口男旅挑講禁姿。出芳工類胸管払時済潟髪内豊。
+
+ 康浴部問玲玉追球化就店岡問画路投。施先太業阪能敏所陸不供探掲方用。手右演社援発示竹育対橋除際愛功旬転好使公。利時改本項輸属嘆員複携者地剤。天政朝戸祝言月接住世黙極者議編連。囲淑覧重弾必治物健賄開頂外称豊開名銀戸院。政稿調励廃演手生告題営味董演何南峰貨。学横公得行提大品回猿齢利込家前役把煎。天代者内身慢作業署間地日。
+
+ 中個興本広坂態掲神中能等無滞長対。号処月画界意気様党目購栃欠歌暮。一耳供意盛四俊健必財下画例本判著堺要北王。宮大攻人水一備治首闘振円分建前趣校。目少供午見掲岡安画入情薦続土世始。診読格七久改急目斉実配正。性止月模多様更社発掲雪奇芸量全兵経負。予転済反問止下生買再無旅的。模治明以共会必華浅知館版領送。
+ `
+
+ readingTime := EstimateReadingTime(sampleText, 200, 500)
+ if readingTime != 2 {
+ t.Errorf(`Wrong reading time, got %d instead of 2`, readingTime)
+ }
+}