diff options
Diffstat (limited to 'internal/reader/atom/atom_03.go')
-rw-r--r-- | internal/reader/atom/atom_03.go | 234 |
1 files changed, 95 insertions, 139 deletions
diff --git a/internal/reader/atom/atom_03.go b/internal/reader/atom/atom_03.go index edcb83dc..fb458e91 100644 --- a/internal/reader/atom/atom_03.go +++ b/internal/reader/atom/atom_03.go @@ -6,158 +6,114 @@ package atom // import "miniflux.app/v2/internal/reader/atom" import ( "encoding/base64" "html" - "log/slog" "strings" - "time" - - "miniflux.app/v2/internal/crypto" - "miniflux.app/v2/internal/model" - "miniflux.app/v2/internal/reader/date" - "miniflux.app/v2/internal/reader/sanitizer" - "miniflux.app/v2/internal/urllib" ) // Specs: http://web.archive.org/web/20060811235523/http://www.mnot.net/drafts/draft-nottingham-atom-format-02.html -type atom03Feed struct { - ID string `xml:"id"` - Title atom03Text `xml:"title"` - Author atomPerson `xml:"author"` - Links atomLinks `xml:"link"` - Entries []atom03Entry `xml:"entry"` +type Atom03Feed struct { + Version string `xml:"version,attr"` + + // The "atom:id" element's content conveys a permanent, globally unique identifier for the feed. + // It MUST NOT change over time, even if the feed is relocated. atom:feed elements MAY contain an atom:id element, + // but MUST NOT contain more than one. The content of this element, when present, MUST be a URI. + ID string `xml:"http://purl.org/atom/ns# id"` + + // The "atom:title" element is a Content construct that conveys a human-readable title for the feed. + // atom:feed elements MUST contain exactly one atom:title element. + // If the feed describes a Web resource, its content SHOULD be the same as that resource's title. + Title Atom03Content `xml:"http://purl.org/atom/ns# title"` + + // The "atom:link" element is a Link construct that conveys a URI associated with the feed. + // The nature of the relationship as well as the link itself is determined by the element's content. + // atom:feed elements MUST contain at least one atom:link element with a rel attribute value of "alternate". + // atom:feed elements MUST NOT contain more than one atom:link element with a rel attribute value of "alternate" that has the same type attribute value. + // atom:feed elements MAY contain additional atom:link elements beyond those described above. + Links AtomLinks `xml:"http://purl.org/atom/ns# link"` + + // The "atom:author" element is a Person construct that indicates the default author of the feed. + // atom:feed elements MUST contain exactly one atom:author element, + // UNLESS all of the atom:feed element's child atom:entry elements contain an atom:author element. + // atom:feed elements MUST NOT contain more than one atom:author element. + Author AtomPerson `xml:"http://purl.org/atom/ns# author"` + + // The "atom:entry" element's represents an individual entry that is contained by the feed. + // atom:feed elements MAY contain one or more atom:entry elements. + Entries []Atom03Entry `xml:"http://purl.org/atom/ns# entry"` } -func (a *atom03Feed) Transform(baseURL string) *model.Feed { - var err error - - feed := new(model.Feed) - - feedURL := a.Links.firstLinkWithRelation("self") - feed.FeedURL, err = urllib.AbsoluteURL(baseURL, feedURL) - if err != nil { - feed.FeedURL = feedURL - } - - siteURL := a.Links.originalLink() - feed.SiteURL, err = urllib.AbsoluteURL(baseURL, siteURL) - if err != nil { - feed.SiteURL = siteURL - } - - feed.Title = a.Title.String() - if feed.Title == "" { - feed.Title = feed.SiteURL - } - - for _, entry := range a.Entries { - item := entry.Transform() - entryURL, err := urllib.AbsoluteURL(feed.SiteURL, item.URL) - if err == nil { - item.URL = entryURL - } - - if item.Author == "" { - item.Author = a.Author.String() - } - - if item.Title == "" { - item.Title = sanitizer.TruncateHTML(item.Content, 100) - } - - if item.Title == "" { - item.Title = item.URL - } - - feed.Entries = append(feed.Entries, item) - } - - return feed +type Atom03Entry struct { + // The "atom:id" element's content conveys a permanent, globally unique identifier for the entry. + // It MUST NOT change over time, even if other representations of the entry (such as a web representation pointed to by the entry's atom:link element) are relocated. + // If the same entry is syndicated in two atom:feeds published by the same entity, the entry's atom:id MUST be the same in both feeds. + ID string `xml:"id"` + + // The "atom:title" element is a Content construct that conveys a human-readable title for the entry. + // atom:entry elements MUST have exactly one "atom:title" element. + // If an entry describes a Web resource, its content SHOULD be the same as that resource's title. + Title Atom03Content `xml:"title"` + + // The "atom:modified" element is a Date construct that indicates the time that the entry was last modified. + // atom:entry elements MUST contain an atom:modified element, but MUST NOT contain more than one. + // The content of an atom:modified element MUST have a time zone whose value SHOULD be "UTC". + Modified string `xml:"modified"` + + // The "atom:issued" element is a Date construct that indicates the time that the entry was issued. + // atom:entry elements MUST contain an atom:issued element, but MUST NOT contain more than one. + // The content of an atom:issued element MAY omit a time zone. + Issued string `xml:"issued"` + + // The "atom:created" element is a Date construct that indicates the time that the entry was created. + // atom:entry elements MAY contain an atom:created element, but MUST NOT contain more than one. + // The content of an atom:created element MUST have a time zone whose value SHOULD be "UTC". + // If atom:created is not present, its content MUST considered to be the same as that of atom:modified. + Created string `xml:"created"` + + // The "atom:link" element is a Link construct that conveys a URI associated with the entry. + // The nature of the relationship as well as the link itself is determined by the element's content. + // atom:entry elements MUST contain at least one atom:link element with a rel attribute value of "alternate". + // atom:entry elements MUST NOT contain more than one atom:link element with a rel attribute value of "alternate" that has the same type attribute value. + // atom:entry elements MAY contain additional atom:link elements beyond those described above. + Links AtomLinks `xml:"link"` + + // The "atom:summary" element is a Content construct that conveys a short summary, abstract or excerpt of the entry. + // atom:entry elements MAY contain an atom:created element, but MUST NOT contain more than one. + Summary Atom03Content `xml:"summary"` + + // The "atom:content" element is a Content construct that conveys the content of the entry. + // atom:entry elements MAY contain one or more atom:content elements. + Content Atom03Content `xml:"content"` + + // The "atom:author" element is a Person construct that indicates the default author of the entry. + // atom:entry elements MUST contain exactly one atom:author element, + // UNLESS the atom:feed element containing them contains an atom:author element itself. + // atom:entry elements MUST NOT contain more than one atom:author element. + Author AtomPerson `xml:"author"` } -type atom03Entry struct { - ID string `xml:"id"` - Title atom03Text `xml:"title"` - Modified string `xml:"modified"` - Issued string `xml:"issued"` - Created string `xml:"created"` - Links atomLinks `xml:"link"` - Summary atom03Text `xml:"summary"` - Content atom03Text `xml:"content"` - Author atomPerson `xml:"author"` -} - -func (a *atom03Entry) Transform() *model.Entry { - entry := model.NewEntry() - entry.URL = a.Links.originalLink() - entry.Date = a.entryDate() - entry.Author = a.Author.String() - entry.Hash = a.entryHash() - entry.Content = a.entryContent() - entry.Title = a.entryTitle() - return entry -} - -func (a *atom03Entry) entryTitle() string { - return sanitizer.StripTags(a.Title.String()) -} - -func (a *atom03Entry) entryContent() string { - content := a.Content.String() - if content != "" { - return content - } - - summary := a.Summary.String() - if summary != "" { - return summary - } - - return "" -} - -func (a *atom03Entry) entryDate() time.Time { - dateText := "" - for _, value := range []string{a.Issued, a.Modified, a.Created} { - if value != "" { - dateText = value - break - } - } - - if dateText != "" { - result, err := date.Parse(dateText) - if err != nil { - slog.Debug("Unable to parse date from Atom 0.3 feed", - slog.String("date", dateText), - slog.String("id", a.ID), - slog.Any("error", err), - ) - return time.Now() - } - - return result - } - - return time.Now() -} - -func (a *atom03Entry) entryHash() string { - for _, value := range []string{a.ID, a.Links.originalLink()} { - if value != "" { - return crypto.Hash(value) - } - } - - return "" -} +type Atom03Content struct { + // Content constructs MAY have a "type" attribute, whose value indicates the media type of the content. + // When present, this attribute's value MUST be a registered media type [RFC2045]. + // If not present, its value MUST be considered to be "text/plain". + Type string `xml:"type,attr"` + + // Content constructs MAY have a "mode" attribute, whose value indicates the method used to encode the content. + // When present, this attribute's value MUST be listed below. + // If not present, its value MUST be considered to be "xml". + // + // "xml": A mode attribute with the value "xml" indicates that the element's content is inline xml (for example, namespace-qualified XHTML). + // + // "escaped": A mode attribute with the value "escaped" indicates that the element's content is an escaped string. + // Processors MUST unescape the element's content before considering it as content of the indicated media type. + // + // "base64": A mode attribute with the value "base64" indicates that the element's content is base64-encoded [RFC2045]. + // Processors MUST decode the element's content before considering it as content of the the indicated media type. + Mode string `xml:"mode,attr"` -type atom03Text struct { - Type string `xml:"type,attr"` - Mode string `xml:"mode,attr"` CharData string `xml:",chardata"` InnerXML string `xml:",innerxml"` } -func (a *atom03Text) String() string { +func (a *Atom03Content) Content() string { content := "" switch { |