aboutsummaryrefslogtreecommitdiff
path: root/internal/http/client/response.go
blob: 7b2faca7a47e70c92f53ffa40e3a6a246c92b4d9 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
// SPDX-FileCopyrightText: Copyright The Miniflux Authors. All rights reserved.
// SPDX-License-Identifier: Apache-2.0

package client // import "miniflux.app/v2/internal/http/client"

import (
	"bytes"
	"fmt"
	"io"
	"regexp"
	"strings"
	"unicode/utf8"

	"golang.org/x/net/html/charset"
)

var xmlEncodingRegex = regexp.MustCompile(`<\?xml(.*)encoding=["'](.+)["'](.*)\?>`)

// Response wraps a server response.
type Response struct {
	Body          io.Reader
	StatusCode    int
	EffectiveURL  string
	LastModified  string
	ETag          string
	Expires       string
	ContentType   string
	ContentLength int64
}

func (r *Response) String() string {
	return fmt.Sprintf(
		`StatusCode=%d EffectiveURL=%q LastModified=%q ETag=%s Expires=%s ContentType=%q ContentLength=%d`,
		r.StatusCode,
		r.EffectiveURL,
		r.LastModified,
		r.ETag,
		r.Expires,
		r.ContentType,
		r.ContentLength,
	)
}

// IsNotFound returns true if the resource doesn't exist anymore.
func (r *Response) IsNotFound() bool {
	return r.StatusCode == 404 || r.StatusCode == 410
}

// IsNotAuthorized returns true if the resource require authentication.
func (r *Response) IsNotAuthorized() bool {
	return r.StatusCode == 401
}

// HasServerFailure returns true if the status code represents a failure.
func (r *Response) HasServerFailure() bool {
	return r.StatusCode >= 400
}

// IsModified returns true if the resource has been modified.
func (r *Response) IsModified(etag, lastModified string) bool {
	if r.StatusCode == 304 {
		return false
	}

	if r.ETag != "" && r.ETag == etag {
		return false
	}

	if r.LastModified != "" && r.LastModified == lastModified {
		return false
	}

	return true
}

// EnsureUnicodeBody makes sure the body is encoded in UTF-8.
//
// If a charset other than UTF-8 is detected, we convert the document to UTF-8.
// This is used by the scraper and feed readers.
//
// Do not forget edge cases:
//
// - Feeds with encoding specified only in Content-Type header and not in XML document
// - Feeds with encoding specified in both places
// - Feeds with encoding specified only in XML document and not in HTTP header
// - Feeds with wrong encoding defined and already in UTF-8
func (r *Response) EnsureUnicodeBody() (err error) {
	buffer, err := io.ReadAll(r.Body)
	if err != nil {
		return err
	}

	r.Body = bytes.NewReader(buffer)
	if utf8.Valid(buffer) {
		return nil
	}

	if strings.Contains(r.ContentType, "xml") {
		// We ignore documents with encoding specified in XML prolog.
		// This is going to be handled by the XML parser.
		length := 1024
		if len(buffer) < 1024 {
			length = len(buffer)
		}

		if xmlEncodingRegex.Match(buffer[0:length]) {
			return nil
		}
	}

	r.Body, err = charset.NewReader(r.Body, r.ContentType)
	return err
}

// BodyAsString returns the response body as string.
func (r *Response) BodyAsString() string {
	bytes, _ := io.ReadAll(r.Body)
	return string(bytes)
}