aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGravatar Frédéric Guillot <f@miniflux.net> 2023-10-06 20:57:53 -0700
committerGravatar Frédéric Guillot <f@miniflux.net> 2023-10-06 22:04:31 -0700
commit7b541af2536ce8337758390c5e5b51db3eb75cef (patch)
tree6475fbc466dde2e7c83726269ad89ae60d560753
parent09e9b0361dbca3881fb34964325d5b4656e861c9 (diff)
downloadv2-7b541af2536ce8337758390c5e5b51db3eb75cef.tar.gz
v2-7b541af2536ce8337758390c5e5b51db3eb75cef.tar.zst
v2-7b541af2536ce8337758390c5e5b51db3eb75cef.zip
Replace github.com/rylans/getlang with github.com/abadojack/whatlanggo
github.com/rylans/getlang doesn't seems to be updated anymore
Diffstat (limited to '')
-rw-r--r--go.mod3
-rw-r--r--go.sum11
-rw-r--r--internal/reader/processor/processor.go24
-rw-r--r--internal/reader/readingtime/readingtime.go31
-rw-r--r--internal/reader/readingtime/readingtime_test.go61
5 files changed, 103 insertions, 27 deletions
diff --git a/go.mod b/go.mod
index ec90e70e..e6256ecb 100644
--- a/go.mod
+++ b/go.mod
@@ -4,12 +4,12 @@ module miniflux.app/v2
require (
github.com/PuerkitoBio/goquery v1.8.1
+ github.com/abadojack/whatlanggo v1.0.1
github.com/coreos/go-oidc/v3 v3.6.0
github.com/gorilla/mux v1.8.0
github.com/lib/pq v1.10.9
github.com/mccutchen/go-httpbin/v2 v2.11.1
github.com/prometheus/client_golang v1.17.0
- github.com/rylans/getlang v0.0.0-20201227074721-9e7f44ff8aa0
github.com/tdewolff/minify/v2 v2.12.9
github.com/yuin/goldmark v1.5.6
golang.org/x/crypto v0.14.0
@@ -29,6 +29,7 @@ require (
github.com/prometheus/client_model v0.4.1-0.20230718164431-9a2bf3000d16 // indirect
github.com/prometheus/common v0.44.0 // indirect
github.com/prometheus/procfs v0.11.1 // indirect
+ github.com/stretchr/testify v1.8.4 // indirect
github.com/tdewolff/parse/v2 v2.6.8 // indirect
golang.org/x/sys v0.13.0 // indirect
golang.org/x/text v0.13.0 // indirect
diff --git a/go.sum b/go.sum
index 21d0762f..8df4025a 100644
--- a/go.sum
+++ b/go.sum
@@ -1,5 +1,7 @@
github.com/PuerkitoBio/goquery v1.8.1 h1:uQxhNlArOIdbrH1tr0UXwdVFgDcZDrZVdcpygAcwmWM=
github.com/PuerkitoBio/goquery v1.8.1/go.mod h1:Q8ICL1kNUJ2sXGoAhPGUdYDJvgQgHzJsnnd3H7Ho5jQ=
+github.com/abadojack/whatlanggo v1.0.1 h1:19N6YogDnf71CTHm3Mp2qhYfkRdyvbgwWdd2EPxJRG4=
+github.com/abadojack/whatlanggo v1.0.1/go.mod h1:66WiQbSbJBIlOZMsvbKe5m6pzQovxCH9B/K8tQB2uoc=
github.com/andybalholm/cascadia v1.3.1 h1:nhxRkql1kdYCc8Snf7D5/D3spOX+dBgjA6u8x004T2c=
github.com/andybalholm/cascadia v1.3.1/go.mod h1:R4bJ1UQfqADjvDa4P6HZHLh/3OxWWEqc0Sk8XGwHqvA=
github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM=
@@ -40,12 +42,10 @@ github.com/prometheus/common v0.44.0 h1:+5BrQJwiBB9xsMygAB3TNvpQKOwlkc25LbISbrdO
github.com/prometheus/common v0.44.0/go.mod h1:ofAIvZbQ1e/nugmZGz4/qCb9Ap1VoSTIO7x0VV9VvuY=
github.com/prometheus/procfs v0.11.1 h1:xRC8Iq1yyca5ypa9n1EZnWZkt7dwcoRPQwX/5gwaUuI=
github.com/prometheus/procfs v0.11.1/go.mod h1:eesXgaPo1q7lBpVMoMy0ZOFTth9hBn4W/y0/p/ScXhY=
-github.com/rylans/getlang v0.0.0-20201227074721-9e7f44ff8aa0 h1:qSaU9YAEIxk/ozcmY1hiauktAYTpbwYIrPdQ0L2E8UM=
-github.com/rylans/getlang v0.0.0-20201227074721-9e7f44ff8aa0/go.mod h1:3vfmZI6aJd5Rb9W2TQ0Nmupl+qem21R05+hmCscI0Bk=
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
github.com/stretchr/testify v1.6.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
-github.com/stretchr/testify v1.7.0 h1:nwc3DEeHmmLAfoZucVR881uASk0Mfjw8xYJ99tb5CcY=
-github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
+github.com/stretchr/testify v1.8.4 h1:CcVxjf3Q8PM0mHUKJCdn+eZZtm5yQwehR5yeSVQQcUk=
+github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo=
github.com/tdewolff/minify/v2 v2.12.9 h1:dvn5MtmuQ/DFMwqf5j8QhEVpPX6fi3WGImhv8RUB4zA=
github.com/tdewolff/minify/v2 v2.12.9/go.mod h1:qOqdlDfL+7v0/fyymB+OP497nIxJYSvX4MQWA8OoiXU=
github.com/tdewolff/parse/v2 v2.6.8 h1:mhNZXYCx//xG7Yq2e/kVLNZw4YfYmeHbhx+Zc0OvFMA=
@@ -110,7 +110,8 @@ google.golang.org/protobuf v1.26.0/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQ
google.golang.org/protobuf v1.31.0 h1:g0LDEJHgrBl9N9r17Ru3sqWhkIx2NB67okBHPwC7hs8=
google.golang.org/protobuf v1.31.0/go.mod h1:HV8QOd/L58Z+nl8r43ehVNZIU/HEI6OcFqwMG9pJV4I=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
-gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c h1:dUUwHk2QECo/6vqA44rthZ8ie2QXMNeKRTHCNY2nXvo=
gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
+gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
+gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
mvdan.cc/xurls/v2 v2.5.0 h1:lyBNOm8Wo71UknhUs4QTFUNNMyxy2JEIaKKo0RWOh+8=
mvdan.cc/xurls/v2 v2.5.0/go.mod h1:yQgaGQ1rFtJUzkmKiHYSSfuQxqfYmd//X6PxvholpeE=
diff --git a/internal/reader/processor/processor.go b/internal/reader/processor/processor.go
index c6514bf6..2fa85a7c 100644
--- a/internal/reader/processor/processor.go
+++ b/internal/reader/processor/processor.go
@@ -7,25 +7,22 @@ import (
"errors"
"fmt"
"log/slog"
- "math"
"regexp"
"strconv"
- "strings"
"time"
- "unicode/utf8"
"miniflux.app/v2/internal/config"
"miniflux.app/v2/internal/http/client"
"miniflux.app/v2/internal/metric"
"miniflux.app/v2/internal/model"
"miniflux.app/v2/internal/reader/browser"
+ "miniflux.app/v2/internal/reader/readingtime"
"miniflux.app/v2/internal/reader/rewrite"
"miniflux.app/v2/internal/reader/sanitizer"
"miniflux.app/v2/internal/reader/scraper"
"miniflux.app/v2/internal/storage"
"github.com/PuerkitoBio/goquery"
- "github.com/rylans/getlang"
)
var (
@@ -174,7 +171,7 @@ func ProcessEntryWebPage(feed *model.Feed, entry *model.Entry, user *model.User)
if content != "" {
entry.Content = content
- entry.ReadingTime = calculateReadingTime(content, user)
+ entry.ReadingTime = readingtime.EstimateReadingTime(entry.Content, user.DefaultReadingSpeed, user.CJKReadingSpeed)
}
rewrite.Rewriter(url, entry, entry.Feed.RewriteRules)
@@ -252,7 +249,7 @@ func updateEntryReadingTime(store *storage.Storage, feed *model.Feed, entry *mod
}
// Handle YT error case and non-YT entries.
if entry.ReadingTime == 0 {
- entry.ReadingTime = calculateReadingTime(entry.Content, user)
+ entry.ReadingTime = readingtime.EstimateReadingTime(entry.Content, user.DefaultReadingSpeed, user.CJKReadingSpeed)
}
}
@@ -360,18 +357,3 @@ func parseISO8601(from string) (time.Duration, error) {
return d, nil
}
-
-func calculateReadingTime(content string, user *model.User) int {
- sanitizedContent := sanitizer.StripTags(content)
- languageInfo := getlang.FromString(sanitizedContent)
-
- var timeToReadInt int
- if languageInfo.LanguageCode() == "ko" || languageInfo.LanguageCode() == "zh" || languageInfo.LanguageCode() == "jp" {
- timeToReadInt = int(math.Ceil(float64(utf8.RuneCountInString(sanitizedContent)) / float64(user.CJKReadingSpeed)))
- } else {
- nbOfWords := len(strings.Fields(sanitizedContent))
- timeToReadInt = int(math.Ceil(float64(nbOfWords) / float64(user.DefaultReadingSpeed)))
- }
-
- return timeToReadInt
-}
diff --git a/internal/reader/readingtime/readingtime.go b/internal/reader/readingtime/readingtime.go
new file mode 100644
index 00000000..faf78471
--- /dev/null
+++ b/internal/reader/readingtime/readingtime.go
@@ -0,0 +1,31 @@
+// SPDX-FileCopyrightText: Copyright The Miniflux Authors. All rights reserved.
+// SPDX-License-Identifier: Apache-2.0
+
+// Package readtime provides a function to estimate the reading time of an article.
+package readingtime
+
+import (
+ "math"
+ "strings"
+ "unicode/utf8"
+
+ "miniflux.app/v2/internal/reader/sanitizer"
+
+ "github.com/abadojack/whatlanggo"
+)
+
+// EstimateReadingTime returns the estimated reading time of an article in minute.
+func EstimateReadingTime(content string, defaultReadingSpeed, cjkReadingSpeed int) int {
+ sanitizedContent := sanitizer.StripTags(content)
+ langInfo := whatlanggo.Detect(sanitizedContent)
+
+ var timeToReadInt int
+ if langInfo.IsReliable() && (langInfo.Lang == whatlanggo.Jpn || langInfo.Lang == whatlanggo.Cmn || langInfo.Lang == whatlanggo.Kor) {
+ timeToReadInt = int(math.Ceil(float64(utf8.RuneCountInString(sanitizedContent)) / float64(cjkReadingSpeed)))
+ } else {
+ nbOfWords := len(strings.Fields(sanitizedContent))
+ timeToReadInt = int(math.Ceil(float64(nbOfWords) / float64(defaultReadingSpeed)))
+ }
+
+ return timeToReadInt
+}
diff --git a/internal/reader/readingtime/readingtime_test.go b/internal/reader/readingtime/readingtime_test.go
new file mode 100644
index 00000000..4915c7cb
--- /dev/null
+++ b/internal/reader/readingtime/readingtime_test.go
@@ -0,0 +1,61 @@
+// SPDX-FileCopyrightText: Copyright The Miniflux Authors. All rights reserved.
+// SPDX-License-Identifier: Apache-2.0
+
+package readingtime
+
+import "testing"
+
+func TestEstimateReadingTimeInEnglish(t *testing.T) {
+ sampleText := `
+ In turpis lacus, sollicitudin non accumsan sed, suscipit eget magna. Morbi id
+ neque enim. Aenean ac lacus consectetur, accumsan elit ac, suscipit dui. Donec
+ congue mi et nisl bibendum, venenatis fringilla orci tristique. Nullam ullamcorper
+ cursus justo, ac iaculis ante euismod a. Fusce dapibus lacus arcu, consectetur
+ porttitor odio finibus ac. Integer dictum faucibus egestas. Etiam magna diam, placerat
+ sed velit vitae, lobortis accumsan nisi. Sed viverra dui in odio commodo dapibus.
+ Sed pulvinar metus finibus, hendrerit diam eu, faucibus lectus. Mauris est tellus,
+ convallis et velit sit amet, convallis sagittis nunc. Quisque at ex leo. Donec eget leo
+ vel nibh porta molestie. Aenean pellentesque purus non laoreet aliquam.
+
+ In feugiat eget arcu nec sodales. Nunc rutrum felis in tellus venenatis, sit
+ amet tincidunt augue varius. Nunc nec dignissim quam. In euismod gravida rhoncus.
+ Vivamus eget nibh sed diam malesuada facilisis. Donec ac convallis elit. Fusce
+ fermentum tincidunt est. Nunc viverra, eros in gravida convallis, ex augue vehicula
+ magna, sed tincidunt metus sem et mauris. In pretium purus odio, a auctor tellus
+ ornare vel. Donec ac dolor pulvinar, placerat elit eget, ultrices nisi. Donec
+ tincidunt magna eget pretium sodales. In urna lorem, consectetur in fringilla eget,
+ rutrum et erat. Proin fringilla, lectus eget commodo consequat, est massa lacinia
+ lorem, ut ultricies nunc erat id sapien.
+
+ Lorem ipsum dolor sit amet, consectetur adipiscing elit. Fusce fermentum id
+ sem sed commodo. Ut eget mauris eu lectus mollis aliquam. Fusce convallis, quam
+ vel volutpat aliquet, nunc sem rhoncus magna, a iaculis enim ex nec neque.
+ Suspendisse vel imperdiet leo. Quisque ultrices semper commodo. Pellentesque nec libero et
+ mauris gravida porta vitae id nunc. Fusce sed sem sed augue gravida ultricies at nec
+ turpis. Sed semper eu urna sit amet malesuada. Suspendisse blandit condimentum elit,
+ in scelerisque tellus convallis eu. Nunc eleifend sem et mauris vestibulum
+ mattis. Praesent ultricies pellentesque eros non posuere.
+ `
+
+ readingTime := EstimateReadingTime(sampleText, 200, 500)
+ if readingTime != 2 {
+ t.Errorf(`Wrong reading time, got %d instead of 2`, readingTime)
+ }
+}
+
+func TestEstimateReadingTimeInChinese(t *testing.T) {
+ sampleText := `
+ 労問委格名町違載式新青脂通由。割止書円画民京般著治登門画拡下。有国同観教田美森素説砂者徴多。上治速相支存色分繰年活元事集遣逆山。身消年森発世財間世変悲原記潟旅好手真今。現通浪口特愛始信川節身方一表著購。郁不使権草定内防並要更一条露加。載交源図訴際属年券重供健三洗。事北残却女鮎朝分要廷込宣政愛無投事。
+
+ 問警技亮参沼洗請米物模人。誰探重午局新戦報投性病庭。典向載問千著書故表視新権最石車音端乏大。白僚三掲局係仕表広無旧見要最裁。額寄済生年余講前本次載隊劇。権成観始応泉早高拓了経地本稼室目犯井出。暮載必広傷内校岡公南散広転行別釈。康運行関本掲隠泉傷退報告。独変年換差取予口男旅挑講禁姿。出芳工類胸管払時済潟髪内豊。
+
+ 康浴部問玲玉追球化就店岡問画路投。施先太業阪能敏所陸不供探掲方用。手右演社援発示竹育対橋除際愛功旬転好使公。利時改本項輸属嘆員複携者地剤。天政朝戸祝言月接住世黙極者議編連。囲淑覧重弾必治物健賄開頂外称豊開名銀戸院。政稿調励廃演手生告題営味董演何南峰貨。学横公得行提大品回猿齢利込家前役把煎。天代者内身慢作業署間地日。
+
+ 中個興本広坂態掲神中能等無滞長対。号処月画界意気様党目購栃欠歌暮。一耳供意盛四俊健必財下画例本判著堺要北王。宮大攻人水一備治首闘振円分建前趣校。目少供午見掲岡安画入情薦続土世始。診読格七久改急目斉実配正。性止月模多様更社発掲雪奇芸量全兵経負。予転済反問止下生買再無旅的。模治明以共会必華浅知館版領送。
+ `
+
+ readingTime := EstimateReadingTime(sampleText, 200, 500)
+ if readingTime != 2 {
+ t.Errorf(`Wrong reading time, got %d instead of 2`, readingTime)
+ }
+}