1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
|
package scrapfly
import (
"encoding/json"
"fmt"
"io"
"net/http"
"net/url"
"strconv"
"github.com/ansg191/ibd-trader-backend/internal/ibd/transport"
)
type ScrapflyTransport struct {
client *http.Client
apiKey string
options ScrapeOptions
}
func New(client *http.Client, apiKey string, opts ...ScrapeOption) *ScrapflyTransport {
options := defaultScrapeOptions
for _, opt := range opts {
opt(&options)
}
return &ScrapflyTransport{
client: client,
apiKey: apiKey,
options: options,
}
}
func (s *ScrapflyTransport) String() string {
return "scrapfly"
}
func (s *ScrapflyTransport) Do(req *http.Request) (*http.Response, error) {
// Construct scrape request URL
scrapeUrl, err := url.Parse(s.options.baseURL)
if err != nil {
panic(err)
}
scrapeUrl.RawQuery = s.constructRawQuery(req.URL, req.Header)
// We can't handle `Content-Type` header on GET requests
// Wierd quirk of the Scrapfly API
if req.Method == http.MethodGet && req.Header.Get("Content-Type") != "" {
return nil, transport.ErrUnsupportedRequest
}
// Construct scrape request
scrapeReq, err := http.NewRequestWithContext(req.Context(), req.Method, scrapeUrl.String(), req.Body)
if err != nil {
return nil, err
}
// Send scrape request
resp, err := s.client.Do(scrapeReq)
if err != nil {
return nil, err
}
defer func(Body io.ReadCloser) {
_ = Body.Close()
}(resp.Body)
// Parse scrape response
scraperResponse := new(ScraperResponse)
err = json.NewDecoder(resp.Body).Decode(scraperResponse)
if err != nil {
return nil, err
}
// Convert scraper response to http.Response
return scraperResponse.ToHTTPResponse()
}
func (s *ScrapflyTransport) Properties() transport.Properties {
return transport.PropertiesReliable
}
func (s *ScrapflyTransport) constructRawQuery(u *url.URL, headers http.Header) string {
params := url.Values{}
params.Set("key", s.apiKey)
params.Set("url", u.String())
if s.options.country != nil {
params.Set("country", *s.options.country)
}
params.Set("asp", strconv.FormatBool(s.options.asp))
params.Set("proxy_pool", s.options.proxyPool.String())
params.Set("render_js", strconv.FormatBool(s.options.renderJS))
params.Set("cache", strconv.FormatBool(s.options.cache))
for k, v := range headers {
for i, vv := range v {
params.Add(
fmt.Sprintf("headers[%s][%d]", k, i),
vv,
)
}
}
return params.Encode()
}
|