aboutsummaryrefslogtreecommitdiff
path: root/internal/reader/rdf/adapter.go
blob: f90ebaca6df7ab587f4da31baf317212528fe74a (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
// SPDX-FileCopyrightText: Copyright The Miniflux Authors. All rights reserved.
// SPDX-License-Identifier: Apache-2.0

package rdf // import "miniflux.app/v2/internal/reader/rdf"

import (
	"html"
	"log/slog"
	"strings"
	"time"

	"miniflux.app/v2/internal/crypto"
	"miniflux.app/v2/internal/model"
	"miniflux.app/v2/internal/reader/date"
	"miniflux.app/v2/internal/reader/sanitizer"
	"miniflux.app/v2/internal/urllib"
)

type RDFAdapter struct {
	rdf *RDF
}

func NewRDFAdapter(rdf *RDF) *RDFAdapter {
	return &RDFAdapter{rdf}
}

func (r *RDFAdapter) BuildFeed(baseURL string) *model.Feed {
	feed := &model.Feed{
		Title:   stripTags(r.rdf.Channel.Title),
		FeedURL: strings.TrimSpace(baseURL),
		SiteURL: strings.TrimSpace(r.rdf.Channel.Link),
	}

	if feed.Title == "" {
		feed.Title = baseURL
	}

	if siteURL, err := urllib.AbsoluteURL(feed.FeedURL, feed.SiteURL); err == nil {
		feed.SiteURL = siteURL
	}

	for _, item := range r.rdf.Items {
		entry := model.NewEntry()
		itemLink := strings.TrimSpace(item.Link)

		// Populate the entry URL.
		if itemLink == "" {
			entry.URL = feed.SiteURL // Fallback to the feed URL if the entry URL is empty.
		} else if entryURL, err := urllib.AbsoluteURL(feed.SiteURL, itemLink); err == nil {
			entry.URL = entryURL
		} else {
			entry.URL = itemLink
		}

		// Populate the entry title.
		for _, title := range []string{item.Title, item.DublinCoreTitle} {
			title = strings.TrimSpace(title)
			if title != "" {
				entry.Title = html.UnescapeString(title)
				break
			}
		}

		// If the entry title is empty, we use the entry URL as a fallback.
		if entry.Title == "" {
			entry.Title = entry.URL
		}

		// Populate the entry content.
		if item.DublinCoreContent != "" {
			entry.Content = item.DublinCoreContent
		} else {
			entry.Content = item.Description
		}

		// Generate the entry hash.
		hashValue := itemLink
		if hashValue == "" {
			hashValue = item.Title + item.Description // Fallback to the title and description if the link is empty.
		}

		entry.Hash = crypto.Hash(hashValue)

		// Populate the entry date.
		entry.Date = time.Now()
		if item.DublinCoreDate != "" {
			if itemDate, err := date.Parse(item.DublinCoreDate); err != nil {
				slog.Debug("Unable to parse date from RDF feed",
					slog.String("date", item.DublinCoreDate),
					slog.String("link", itemLink),
					slog.Any("error", err),
				)
			} else {
				entry.Date = itemDate
			}
		}

		// Populate the entry author.
		switch {
		case item.DublinCoreCreator != "":
			entry.Author = stripTags(item.DublinCoreCreator)
		case r.rdf.Channel.DublinCoreCreator != "":
			entry.Author = stripTags(r.rdf.Channel.DublinCoreCreator)
		}

		feed.Entries = append(feed.Entries, entry)
	}

	return feed
}

func stripTags(value string) string {
	return strings.TrimSpace(sanitizer.StripTags(value))
}