summaryrefslogtreecommitdiff
path: root/internal/proxy/media_proxy.go
blob: 14de7d7e13038e0b0a52c7fdb45503423b06fe21 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
// SPDX-FileCopyrightText: Copyright The Miniflux Authors. All rights reserved.
// SPDX-License-Identifier: Apache-2.0

package proxy // import "miniflux.app/v2/internal/proxy"

import (
	"strings"

	"miniflux.app/v2/internal/config"
	"miniflux.app/v2/internal/reader/sanitizer"
	"miniflux.app/v2/internal/urllib"

	"github.com/PuerkitoBio/goquery"
	"github.com/gorilla/mux"
)

type urlProxyRewriter func(router *mux.Router, url string) string

// ProxyRewriter replaces media URLs with internal proxy URLs.
func ProxyRewriter(router *mux.Router, data string) string {
	return genericProxyRewriter(router, ProxifyURL, data)
}

// AbsoluteProxyRewriter do the same as ProxyRewriter except it uses absolute URLs.
func AbsoluteProxyRewriter(router *mux.Router, host, data string) string {
	proxifyFunction := func(router *mux.Router, url string) string {
		return AbsoluteProxifyURL(router, host, url)
	}
	return genericProxyRewriter(router, proxifyFunction, data)
}

func genericProxyRewriter(router *mux.Router, proxifyFunction urlProxyRewriter, data string) string {
	proxyOption := config.Opts.ProxyOption()
	if proxyOption == "none" {
		return data
	}

	doc, err := goquery.NewDocumentFromReader(strings.NewReader(data))
	if err != nil {
		return data
	}

	for _, mediaType := range config.Opts.ProxyMediaTypes() {
		switch mediaType {
		case "image":
			doc.Find("img").Each(func(i int, img *goquery.Selection) {
				if srcAttrValue, ok := img.Attr("src"); ok {
					if !isDataURL(srcAttrValue) && (proxyOption == "all" || !urllib.IsHTTPS(srcAttrValue)) {
						img.SetAttr("src", proxifyFunction(router, srcAttrValue))
					}
				}

				if srcsetAttrValue, ok := img.Attr("srcset"); ok {
					proxifySourceSet(img, router, proxifyFunction, proxyOption, srcsetAttrValue)
				}
			})

			doc.Find("picture source").Each(func(i int, sourceElement *goquery.Selection) {
				if srcsetAttrValue, ok := sourceElement.Attr("srcset"); ok {
					proxifySourceSet(sourceElement, router, proxifyFunction, proxyOption, srcsetAttrValue)
				}
			})

		case "audio":
			doc.Find("audio").Each(func(i int, audio *goquery.Selection) {
				if srcAttrValue, ok := audio.Attr("src"); ok {
					if !isDataURL(srcAttrValue) && (proxyOption == "all" || !urllib.IsHTTPS(srcAttrValue)) {
						audio.SetAttr("src", proxifyFunction(router, srcAttrValue))
					}
				}
			})

			doc.Find("audio source").Each(func(i int, sourceElement *goquery.Selection) {
				if srcAttrValue, ok := sourceElement.Attr("src"); ok {
					if !isDataURL(srcAttrValue) && (proxyOption == "all" || !urllib.IsHTTPS(srcAttrValue)) {
						sourceElement.SetAttr("src", proxifyFunction(router, srcAttrValue))
					}
				}
			})

		case "video":
			doc.Find("video").Each(func(i int, video *goquery.Selection) {
				if srcAttrValue, ok := video.Attr("src"); ok {
					if !isDataURL(srcAttrValue) && (proxyOption == "all" || !urllib.IsHTTPS(srcAttrValue)) {
						video.SetAttr("src", proxifyFunction(router, srcAttrValue))
					}
				}
			})

			doc.Find("video source").Each(func(i int, sourceElement *goquery.Selection) {
				if srcAttrValue, ok := sourceElement.Attr("src"); ok {
					if !isDataURL(srcAttrValue) && (proxyOption == "all" || !urllib.IsHTTPS(srcAttrValue)) {
						sourceElement.SetAttr("src", proxifyFunction(router, srcAttrValue))
					}
				}
			})
		}
	}

	output, err := doc.Find("body").First().Html()
	if err != nil {
		return data
	}

	return output
}

func proxifySourceSet(element *goquery.Selection, router *mux.Router, proxifyFunction urlProxyRewriter, proxyOption, srcsetAttrValue string) {
	imageCandidates := sanitizer.ParseSrcSetAttribute(srcsetAttrValue)

	for _, imageCandidate := range imageCandidates {
		if !isDataURL(imageCandidate.ImageURL) && (proxyOption == "all" || !urllib.IsHTTPS(imageCandidate.ImageURL)) {
			imageCandidate.ImageURL = proxifyFunction(router, imageCandidate.ImageURL)
		}
	}

	element.SetAttr("srcset", imageCandidates.String())
}

func isDataURL(s string) bool {
	return strings.HasPrefix(s, "data:")
}