aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--internal/database/migrations.go6
-rw-r--r--internal/reader/processor/processor.go16
-rw-r--r--internal/storage/entry.go14
3 files changed, 20 insertions, 16 deletions
diff --git a/internal/database/migrations.go b/internal/database/migrations.go
index d40e5d2f..fa3c3972 100644
--- a/internal/database/migrations.go
+++ b/internal/database/migrations.go
@@ -882,4 +882,10 @@ var migrations = []func(tx *sql.Tx) error{
_, err = tx.Exec(sql)
return err
},
+ func(tx *sql.Tx) (err error) {
+ // Entry URLs can exceeds btree maximum size
+ // Checking entry existence is now using entries_feed_id_status_hash_idx index
+ _, err = tx.Exec(`DROP INDEX entries_feed_url_idx`)
+ return err
+ },
}
diff --git a/internal/reader/processor/processor.go b/internal/reader/processor/processor.go
index 913ae0b3..ab4448ef 100644
--- a/internal/reader/processor/processor.go
+++ b/internal/reader/processor/processor.go
@@ -42,8 +42,9 @@ func ProcessFeedEntries(store *storage.Storage, feed *model.Feed, user *model.Us
slog.Debug("Processing entry",
slog.Int64("user_id", user.ID),
- slog.Int64("entry_id", entry.ID),
slog.String("entry_url", entry.URL),
+ slog.String("entry_hash", entry.Hash),
+ slog.String("entry_title", entry.Title),
slog.Int64("feed_id", feed.ID),
slog.String("feed_url", feed.FeedURL),
)
@@ -52,14 +53,18 @@ func ProcessFeedEntries(store *storage.Storage, feed *model.Feed, user *model.Us
}
websiteURL := getUrlFromEntry(feed, entry)
- entryIsNew := !store.EntryURLExists(feed.ID, entry.URL)
+ entryIsNew := store.IsNewEntry(feed.ID, entry.Hash)
if feed.Crawler && (entryIsNew || forceRefresh) {
slog.Debug("Scraping entry",
slog.Int64("user_id", user.ID),
- slog.Int64("entry_id", entry.ID),
slog.String("entry_url", entry.URL),
+ slog.String("entry_hash", entry.Hash),
+ slog.String("entry_title", entry.Title),
slog.Int64("feed_id", feed.ID),
slog.String("feed_url", feed.FeedURL),
+ slog.Bool("entry_is_new", entryIsNew),
+ slog.Bool("force_refresh", forceRefresh),
+ slog.String("website_url", websiteURL),
)
startTime := time.Now()
@@ -90,7 +95,6 @@ func ProcessFeedEntries(store *storage.Storage, feed *model.Feed, user *model.Us
if scraperErr != nil {
slog.Warn("Unable to scrape entry",
slog.Int64("user_id", user.ID),
- slog.Int64("entry_id", entry.ID),
slog.String("entry_url", entry.URL),
slog.Int64("feed_id", feed.ID),
slog.String("feed_url", feed.FeedURL),
@@ -134,7 +138,6 @@ func isBlockedEntry(feed *model.Feed, entry *model.Entry) bool {
if compiledBlocklist.MatchString(entry.URL) || compiledBlocklist.MatchString(entry.Title) || compiledBlocklist.MatchString(entry.Author) || containsBlockedTag {
slog.Debug("Blocking entry based on rule",
- slog.Int64("entry_id", entry.ID),
slog.String("entry_url", entry.URL),
slog.Int64("feed_id", feed.ID),
slog.String("feed_url", feed.FeedURL),
@@ -165,7 +168,6 @@ func isAllowedEntry(feed *model.Feed, entry *model.Entry) bool {
if compiledKeeplist.MatchString(entry.URL) || compiledKeeplist.MatchString(entry.Title) || compiledKeeplist.MatchString(entry.Author) || containsAllowedTag {
slog.Debug("Allow entry based on rule",
- slog.Int64("entry_id", entry.ID),
slog.String("entry_url", entry.URL),
slog.Int64("feed_id", feed.ID),
slog.String("feed_url", feed.FeedURL),
@@ -230,7 +232,6 @@ func getUrlFromEntry(feed *model.Feed, entry *model.Entry) string {
re := regexp.MustCompile(parts[1])
url = re.ReplaceAllString(entry.URL, parts[2])
slog.Debug("Rewriting entry URL",
- slog.Int64("entry_id", entry.ID),
slog.String("original_entry_url", entry.URL),
slog.String("rewritten_entry_url", url),
slog.Int64("feed_id", feed.ID),
@@ -238,7 +239,6 @@ func getUrlFromEntry(feed *model.Feed, entry *model.Entry) string {
)
} else {
slog.Debug("Cannot find search and replace terms for replace rule",
- slog.Int64("entry_id", entry.ID),
slog.String("original_entry_url", entry.URL),
slog.String("rewritten_entry_url", url),
slog.Int64("feed_id", feed.ID),
diff --git a/internal/storage/entry.go b/internal/storage/entry.go
index 1a7cc6d7..867338f7 100644
--- a/internal/storage/entry.go
+++ b/internal/storage/entry.go
@@ -225,6 +225,12 @@ func (s *Storage) entryExists(tx *sql.Tx, entry *model.Entry) (bool, error) {
return result, nil
}
+func (s *Storage) IsNewEntry(feedID int64, entryHash string) bool {
+ var result bool
+ s.db.QueryRow(`SELECT true FROM entries WHERE feed_id=$1 AND hash=$2`, feedID, entryHash).Scan(&result)
+ return !result
+}
+
// GetReadTime fetches the read time of an entry based on its hash, and the feed id and user id from the feed.
// It's intended to be used on entries objects created by parsing a feed as they don't contain much information.
// The feed param helps to scope the search to a specific user and feed in order to avoid hash clashes.
@@ -575,14 +581,6 @@ func (s *Storage) MarkCategoryAsRead(userID, categoryID int64, before time.Time)
return nil
}
-// EntryURLExists returns true if an entry with this URL already exists.
-func (s *Storage) EntryURLExists(feedID int64, entryURL string) bool {
- var result bool
- query := `SELECT true FROM entries WHERE feed_id=$1 AND url=$2`
- s.db.QueryRow(query, feedID, entryURL).Scan(&result)
- return result
-}
-
// EntryShareCode returns the share code of the provided entry.
// It generates a new one if not already defined.
func (s *Storage) EntryShareCode(userID int64, entryID int64) (shareCode string, err error) {