1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
|
package ibd
import (
"context"
"encoding/json"
"errors"
"fmt"
"io"
"net/http"
"net/url"
"strconv"
"ibd-trader/internal/database"
)
var ErrNoAvailableCookies = errors.New("no available cookies")
type Client struct {
// HTTP client used to make requests
client *http.Client
// Scrapfly API key
apiKey string
// Client-wide Scrape options
options ScrapeOptions
// Cookie source
cookies database.CookieSource
// Proxy URL for non-scrapfly requests
proxyUrl *url.URL
}
func NewClient(
client *http.Client,
apiKey string,
cookies database.CookieSource,
proxyUrl string,
opts ...ScrapeOption,
) (*Client, error) {
options := defaultScrapeOptions
for _, opt := range opts {
opt(&options)
}
pProxyUrl, err := url.Parse(proxyUrl)
if err != nil {
return nil, err
}
return &Client{
client: client,
options: options,
apiKey: apiKey,
cookies: cookies,
proxyUrl: pProxyUrl,
}, nil
}
func (c *Client) getCookie(ctx context.Context, subject *string) (uint, *http.Cookie, error) {
if subject == nil {
// No subject requirement, get any cookie
cookie, err := c.cookies.GetAnyCookie(ctx)
if err != nil {
return 0, nil, err
}
if cookie == nil {
return 0, nil, ErrNoAvailableCookies
}
return cookie.ID, cookie.ToHTTPCookie(), nil
}
// Get cookie by subject
cookies, err := c.cookies.GetCookies(ctx, *subject, false)
if err != nil {
return 0, nil, err
}
if len(cookies) == 0 {
return 0, nil, ErrNoAvailableCookies
}
cookie := cookies[0]
return cookie.ID, cookie.ToHTTPCookie(), nil
}
func (c *Client) Do(req *http.Request, opts ...ScrapeOption) (*ScraperResponse, error) {
options := c.options
for _, opt := range opts {
opt(&options)
}
// Construct scrape request URL
scrapeUrl, err := url.Parse(options.baseURL)
if err != nil {
panic(err)
}
scrapeUrl.RawQuery = c.constructRawQuery(options, req.URL, req.Header)
// Construct scrape request
scrapeReq, err := http.NewRequestWithContext(req.Context(), req.Method, scrapeUrl.String(), req.Body)
if err != nil {
return nil, err
}
// Send scrape request
resp, err := c.client.Do(scrapeReq)
if err != nil {
return nil, err
}
defer func(Body io.ReadCloser) {
_ = Body.Close()
}(resp.Body)
// Parse scrape response
scraperResponse := new(ScraperResponse)
err = json.NewDecoder(resp.Body).Decode(scraperResponse)
if err != nil {
return nil, err
}
return scraperResponse, nil
}
func (c *Client) constructRawQuery(options ScrapeOptions, u *url.URL, headers http.Header) string {
params := url.Values{}
params.Set("key", c.apiKey)
params.Set("url", u.String())
if options.country != nil {
params.Set("country", *options.country)
}
params.Set("asp", strconv.FormatBool(options.asp))
params.Set("proxy_pool", options.proxyPool.String())
params.Set("render_js", strconv.FormatBool(options.renderJS))
params.Set("cache", strconv.FormatBool(options.cache))
for k, v := range headers {
for i, vv := range v {
params.Add(
fmt.Sprintf("headers[%s][%d]", k, i),
vv,
)
}
}
return params.Encode()
}
|