aboutsummaryrefslogtreecommitdiff
path: root/backend/internal/ibd/client.go
blob: c1cbb8a5b7cd4ae17157a8499ae7b311a659988c (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
package ibd

import (
	"context"
	"encoding/json"
	"errors"
	"fmt"
	"io"
	"net/http"
	"net/url"
	"strconv"

	"github.com/ansg191/ibd-trader-backend/internal/database"
)

var ErrNoAvailableCookies = errors.New("no available cookies")

type Client struct {
	// HTTP client used to make requests
	client *http.Client
	// Scrapfly API key
	apiKey string
	// Client-wide Scrape options
	options ScrapeOptions
	// Cookie source
	cookies database.CookieSource
	// Proxy URL for non-scrapfly requests
	proxyUrl *url.URL
}

func NewClient(
	client *http.Client,
	apiKey string,
	cookies database.CookieSource,
	proxyUrl string,
	opts ...ScrapeOption,
) (*Client, error) {
	options := defaultScrapeOptions
	for _, opt := range opts {
		opt(&options)
	}

	pProxyUrl, err := url.Parse(proxyUrl)
	if err != nil {
		return nil, err
	}

	return &Client{
		client:   client,
		options:  options,
		apiKey:   apiKey,
		cookies:  cookies,
		proxyUrl: pProxyUrl,
	}, nil
}

func (c *Client) getCookie(ctx context.Context, subject *string) (uint, *http.Cookie, error) {
	if subject == nil {
		// No subject requirement, get any cookie
		cookie, err := c.cookies.GetAnyCookie(ctx)
		if err != nil {
			return 0, nil, err
		}
		if cookie == nil {
			return 0, nil, ErrNoAvailableCookies
		}

		return cookie.ID, cookie.ToHTTPCookie(), nil
	}

	// Get cookie by subject
	cookies, err := c.cookies.GetCookies(ctx, *subject, false)
	if err != nil {
		return 0, nil, err
	}

	if len(cookies) == 0 {
		return 0, nil, ErrNoAvailableCookies
	}

	cookie := cookies[0]

	return cookie.ID, cookie.ToHTTPCookie(), nil
}

func (c *Client) Do(req *http.Request, opts ...ScrapeOption) (*ScraperResponse, error) {
	options := c.options
	for _, opt := range opts {
		opt(&options)
	}

	// Construct scrape request URL
	scrapeUrl, err := url.Parse(options.baseURL)
	if err != nil {
		panic(err)
	}
	scrapeUrl.RawQuery = c.constructRawQuery(options, req.URL, req.Header)

	// Construct scrape request
	scrapeReq, err := http.NewRequestWithContext(req.Context(), req.Method, scrapeUrl.String(), req.Body)
	if err != nil {
		return nil, err
	}

	// Send scrape request
	resp, err := c.client.Do(scrapeReq)
	if err != nil {
		return nil, err
	}
	defer func(Body io.ReadCloser) {
		_ = Body.Close()
	}(resp.Body)

	// Parse scrape response
	scraperResponse := new(ScraperResponse)
	err = json.NewDecoder(resp.Body).Decode(scraperResponse)
	if err != nil {
		return nil, err
	}

	return scraperResponse, nil
}

func (c *Client) constructRawQuery(options ScrapeOptions, u *url.URL, headers http.Header) string {
	params := url.Values{}
	params.Set("key", c.apiKey)
	params.Set("url", u.String())
	if options.country != nil {
		params.Set("country", *options.country)
	}
	params.Set("asp", strconv.FormatBool(options.asp))
	params.Set("proxy_pool", options.proxyPool.String())
	params.Set("render_js", strconv.FormatBool(options.renderJS))
	params.Set("cache", strconv.FormatBool(options.cache))

	for k, v := range headers {
		for i, vv := range v {
			params.Add(
				fmt.Sprintf("headers[%s][%d]", k, i),
				vv,
			)
		}
	}

	return params.Encode()
}