summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--go.mod1
-rw-r--r--go.sum2
-rw-r--r--internal/reader/fetcher/encoding_wrappers.go55
-rw-r--r--internal/reader/fetcher/request_builder.go1
-rw-r--r--internal/reader/fetcher/response_handler.go23
5 files changed, 80 insertions, 2 deletions
diff --git a/go.mod b/go.mod
index a63c6c2f..59feed98 100644
--- a/go.mod
+++ b/go.mod
@@ -27,6 +27,7 @@ require (
)
require (
+ github.com/andybalholm/brotli v1.1.0 // indirect
github.com/andybalholm/cascadia v1.3.2 // indirect
github.com/beorn7/perks v1.0.1 // indirect
github.com/cespare/xxhash/v2 v2.2.0 // indirect
diff --git a/go.sum b/go.sum
index f2013f52..bcda7538 100644
--- a/go.sum
+++ b/go.sum
@@ -2,6 +2,8 @@ github.com/PuerkitoBio/goquery v1.9.1 h1:mTL6XjbJTZdpfL+Gwl5U2h1l9yEkJjhmlTeV9VP
github.com/PuerkitoBio/goquery v1.9.1/go.mod h1:cW1n6TmIMDoORQU5IU/P1T3tGFunOeXEpGP2WHRwkbY=
github.com/abadojack/whatlanggo v1.0.1 h1:19N6YogDnf71CTHm3Mp2qhYfkRdyvbgwWdd2EPxJRG4=
github.com/abadojack/whatlanggo v1.0.1/go.mod h1:66WiQbSbJBIlOZMsvbKe5m6pzQovxCH9B/K8tQB2uoc=
+github.com/andybalholm/brotli v1.1.0 h1:eLKJA0d02Lf0mVpIDgYnqXcUn0GqVmEFny3VuID1U3M=
+github.com/andybalholm/brotli v1.1.0/go.mod h1:sms7XGricyQI9K10gOSf56VKKWS4oLer58Q+mhRPtnY=
github.com/andybalholm/cascadia v1.3.2 h1:3Xi6Dw5lHF15JtdcmAHD3i1+T8plmv7BQ/nsViSLyss=
github.com/andybalholm/cascadia v1.3.2/go.mod h1:7gtRlve5FxPPgIgX36uWBX58OdBsSS6lUvCFb+h7KvU=
github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM=
diff --git a/internal/reader/fetcher/encoding_wrappers.go b/internal/reader/fetcher/encoding_wrappers.go
new file mode 100644
index 00000000..41820341
--- /dev/null
+++ b/internal/reader/fetcher/encoding_wrappers.go
@@ -0,0 +1,55 @@
+package fetcher
+
+import (
+ "compress/gzip"
+ "io"
+
+ "github.com/andybalholm/brotli"
+)
+
+type brotliReadCloser struct {
+ body io.ReadCloser
+ brotliReader io.Reader
+}
+
+func NewBrotliReadCloser(body io.ReadCloser) *brotliReadCloser {
+ return &brotliReadCloser{
+ body: body,
+ brotliReader: brotli.NewReader(body),
+ }
+}
+
+func (b *brotliReadCloser) Read(p []byte) (n int, err error) {
+ return b.brotliReader.Read(p)
+}
+
+func (b *brotliReadCloser) Close() error {
+ return b.body.Close()
+}
+
+type gzipReadCloser struct {
+ body io.ReadCloser
+ gzipReader io.Reader
+ gzipErr error
+}
+
+func NewGzipReadCloser(body io.ReadCloser) *gzipReadCloser {
+ return &gzipReadCloser{body: body}
+}
+
+func (gz *gzipReadCloser) Read(p []byte) (n int, err error) {
+ if gz.gzipReader == nil {
+ if gz.gzipErr == nil {
+ gz.gzipReader, gz.gzipErr = gzip.NewReader(gz.body)
+ }
+ if gz.gzipErr != nil {
+ return 0, gz.gzipErr
+ }
+ }
+
+ return gz.gzipReader.Read(p)
+}
+
+func (gz *gzipReadCloser) Close() error {
+ return gz.body.Close()
+}
diff --git a/internal/reader/fetcher/request_builder.go b/internal/reader/fetcher/request_builder.go
index e2b2258b..77c18948 100644
--- a/internal/reader/fetcher/request_builder.go
+++ b/internal/reader/fetcher/request_builder.go
@@ -169,6 +169,7 @@ func (r *RequestBuilder) ExecuteRequest(requestURL string) (*http.Response, erro
}
req.Header = r.headers
+ req.Header.Set("Accept-Encoding", "br, gzip")
req.Header.Set("Accept", defaultAcceptHeader)
req.Header.Set("Connection", "close")
diff --git a/internal/reader/fetcher/response_handler.go b/internal/reader/fetcher/response_handler.go
index 03ab39ca..1aba5957 100644
--- a/internal/reader/fetcher/response_handler.go
+++ b/internal/reader/fetcher/response_handler.go
@@ -8,6 +8,7 @@ import (
"errors"
"fmt"
"io"
+ "log/slog"
"net"
"net/http"
"net/url"
@@ -71,12 +72,30 @@ func (r *ResponseHandler) Close() {
}
}
+func (r *ResponseHandler) getReader(maxBodySize int64) io.ReadCloser {
+ slog.Debug("Request response",
+ slog.String("effective_url", r.EffectiveURL()),
+ slog.Int64("content_length", r.httpResponse.ContentLength),
+ slog.String("content_encoding", r.httpResponse.Header.Get("Content-Encoding")),
+ slog.String("content_type", r.httpResponse.Header.Get("Content-Type")),
+ )
+
+ reader := r.httpResponse.Body
+ switch r.httpResponse.Header.Get("Content-Encoding") {
+ case "br":
+ reader = NewBrotliReadCloser(r.httpResponse.Body)
+ case "gzip":
+ reader = NewGzipReadCloser(r.httpResponse.Body)
+ }
+ return http.MaxBytesReader(nil, reader, maxBodySize)
+}
+
func (r *ResponseHandler) Body(maxBodySize int64) io.ReadCloser {
- return http.MaxBytesReader(nil, r.httpResponse.Body, maxBodySize)
+ return r.getReader(maxBodySize)
}
func (r *ResponseHandler) ReadBody(maxBodySize int64) ([]byte, *locale.LocalizedErrorWrapper) {
- limitedReader := http.MaxBytesReader(nil, r.httpResponse.Body, maxBodySize)
+ limitedReader := r.getReader(maxBodySize)
buffer, err := io.ReadAll(limitedReader)
if err != nil && err != io.EOF {