diff options
Diffstat (limited to 'internal/reader/processor/processor.go')
-rw-r--r-- | internal/reader/processor/processor.go | 90 |
1 files changed, 56 insertions, 34 deletions
diff --git a/internal/reader/processor/processor.go b/internal/reader/processor/processor.go index 2fa85a7c..6b9af305 100644 --- a/internal/reader/processor/processor.go +++ b/internal/reader/processor/processor.go @@ -12,10 +12,9 @@ import ( "time" "miniflux.app/v2/internal/config" - "miniflux.app/v2/internal/http/client" "miniflux.app/v2/internal/metric" "miniflux.app/v2/internal/model" - "miniflux.app/v2/internal/reader/browser" + "miniflux.app/v2/internal/reader/fetcher" "miniflux.app/v2/internal/reader/readingtime" "miniflux.app/v2/internal/reader/rewrite" "miniflux.app/v2/internal/reader/sanitizer" @@ -52,7 +51,7 @@ func ProcessFeedEntries(store *storage.Storage, feed *model.Feed, user *model.Us continue } - url := getUrlFromEntry(feed, entry) + websiteURL := getUrlFromEntry(feed, entry) entryIsNew := !store.EntryURLExists(feed.ID, entry.URL) if feed.Crawler && (entryIsNew || forceRefresh) { slog.Debug("Scraping entry", @@ -64,13 +63,19 @@ func ProcessFeedEntries(store *storage.Storage, feed *model.Feed, user *model.Us ) startTime := time.Now() - content, scraperErr := scraper.Fetch( - url, + + requestBuilder := fetcher.NewRequestBuilder() + requestBuilder.WithUserAgent(feed.UserAgent) + requestBuilder.WithCookie(feed.Cookie) + requestBuilder.WithTimeout(config.Opts.HTTPClientTimeout()) + requestBuilder.WithProxy(config.Opts.HTTPClientProxy()) + requestBuilder.UseProxy(feed.FetchViaProxy) + requestBuilder.IgnoreTLSErrors(feed.AllowSelfSignedCertificates) + + content, scraperErr := scraper.ScrapeWebsite( + requestBuilder, + websiteURL, feed.ScraperRules, - feed.UserAgent, - feed.Cookie, - feed.AllowSelfSignedCertificates, - feed.FetchViaProxy, ) if config.Opts.HasMetricsCollector() { @@ -96,10 +101,10 @@ func ProcessFeedEntries(store *storage.Storage, feed *model.Feed, user *model.Us } } - rewrite.Rewriter(url, entry, feed.RewriteRules) + rewrite.Rewriter(websiteURL, entry, feed.RewriteRules) // The sanitizer should always run at the end of the process to make sure unsafe HTML is filtered. - entry.Content = sanitizer.Sanitize(url, entry.Content) + entry.Content = sanitizer.Sanitize(websiteURL, entry.Content) updateEntryReadingTime(store, feed, entry, entryIsNew, user) filteredEntries = append(filteredEntries, entry) @@ -146,15 +151,20 @@ func isAllowedEntry(feed *model.Feed, entry *model.Entry) bool { // ProcessEntryWebPage downloads the entry web page and apply rewrite rules. func ProcessEntryWebPage(feed *model.Feed, entry *model.Entry, user *model.User) error { startTime := time.Now() - url := getUrlFromEntry(feed, entry) - - content, scraperErr := scraper.Fetch( - url, - entry.Feed.ScraperRules, - entry.Feed.UserAgent, - entry.Feed.Cookie, - feed.AllowSelfSignedCertificates, - feed.FetchViaProxy, + websiteURL := getUrlFromEntry(feed, entry) + + requestBuilder := fetcher.NewRequestBuilder() + requestBuilder.WithUserAgent(feed.UserAgent) + requestBuilder.WithCookie(feed.Cookie) + requestBuilder.WithTimeout(config.Opts.HTTPClientTimeout()) + requestBuilder.WithProxy(config.Opts.HTTPClientProxy()) + requestBuilder.UseProxy(feed.FetchViaProxy) + requestBuilder.IgnoreTLSErrors(feed.AllowSelfSignedCertificates) + + content, scraperErr := scraper.ScrapeWebsite( + requestBuilder, + websiteURL, + feed.ScraperRules, ) if config.Opts.HasMetricsCollector() { @@ -174,8 +184,8 @@ func ProcessEntryWebPage(feed *model.Feed, entry *model.Entry, user *model.User) entry.ReadingTime = readingtime.EstimateReadingTime(entry.Content, user.DefaultReadingSpeed, user.CJKReadingSpeed) } - rewrite.Rewriter(url, entry, entry.Feed.RewriteRules) - entry.Content = sanitizer.Sanitize(url, entry.Content) + rewrite.Rewriter(websiteURL, entry, entry.Feed.RewriteRules) + entry.Content = sanitizer.Sanitize(websiteURL, entry.Content) return nil } @@ -270,14 +280,20 @@ func shouldFetchOdyseeWatchTime(entry *model.Entry) bool { return matches != nil } -func fetchYouTubeWatchTime(url string) (int, error) { - clt := client.NewClientWithConfig(url, config.Opts) - response, browserErr := browser.Exec(clt) - if browserErr != nil { - return 0, browserErr +func fetchYouTubeWatchTime(websiteURL string) (int, error) { + requestBuilder := fetcher.NewRequestBuilder() + requestBuilder.WithTimeout(config.Opts.HTTPClientTimeout()) + requestBuilder.WithProxy(config.Opts.HTTPClientProxy()) + + responseHandler := fetcher.NewResponseHandler(requestBuilder.ExecuteRequest(websiteURL)) + defer responseHandler.Close() + + if localizedError := responseHandler.LocalizedError(); localizedError != nil { + slog.Warn("Unable to fetch YouTube page", slog.String("website_url", websiteURL), slog.Any("error", localizedError.Error())) + return 0, localizedError.Error() } - doc, docErr := goquery.NewDocumentFromReader(response.Body) + doc, docErr := goquery.NewDocumentFromReader(responseHandler.Body(config.Opts.HTTPClientMaxBodySize())) if docErr != nil { return 0, docErr } @@ -295,14 +311,20 @@ func fetchYouTubeWatchTime(url string) (int, error) { return int(dur.Minutes()), nil } -func fetchOdyseeWatchTime(url string) (int, error) { - clt := client.NewClientWithConfig(url, config.Opts) - response, browserErr := browser.Exec(clt) - if browserErr != nil { - return 0, browserErr +func fetchOdyseeWatchTime(websiteURL string) (int, error) { + requestBuilder := fetcher.NewRequestBuilder() + requestBuilder.WithTimeout(config.Opts.HTTPClientTimeout()) + requestBuilder.WithProxy(config.Opts.HTTPClientProxy()) + + responseHandler := fetcher.NewResponseHandler(requestBuilder.ExecuteRequest(websiteURL)) + defer responseHandler.Close() + + if localizedError := responseHandler.LocalizedError(); localizedError != nil { + slog.Warn("Unable to fetch Odysee watch time", slog.String("website_url", websiteURL), slog.Any("error", localizedError.Error())) + return 0, localizedError.Error() } - doc, docErr := goquery.NewDocumentFromReader(response.Body) + doc, docErr := goquery.NewDocumentFromReader(responseHandler.Body(config.Opts.HTTPClientMaxBodySize())) if docErr != nil { return 0, docErr } |