package scrapfly import ( "encoding/json" "fmt" "io" "net/http" "net/url" "strconv" "github.com/ansg191/ibd-trader-backend/internal/ibd/transport" ) type ScrapflyTransport struct { client *http.Client apiKey string options ScrapeOptions } func New(client *http.Client, apiKey string, opts ...ScrapeOption) *ScrapflyTransport { options := defaultScrapeOptions for _, opt := range opts { opt(&options) } return &ScrapflyTransport{ client: client, apiKey: apiKey, options: options, } } func (s *ScrapflyTransport) String() string { return "scrapfly" } func (s *ScrapflyTransport) Do(req *http.Request) (*http.Response, error) { // Construct scrape request URL scrapeUrl, err := url.Parse(s.options.baseURL) if err != nil { panic(err) } scrapeUrl.RawQuery = s.constructRawQuery(req.URL, req.Header) // We can't handle `Content-Type` header on GET requests // Wierd quirk of the Scrapfly API if req.Method == http.MethodGet && req.Header.Get("Content-Type") != "" { return nil, transport.ErrUnsupportedRequest } // Construct scrape request scrapeReq, err := http.NewRequestWithContext(req.Context(), req.Method, scrapeUrl.String(), req.Body) if err != nil { return nil, err } // Send scrape request resp, err := s.client.Do(scrapeReq) if err != nil { return nil, err } defer func(Body io.ReadCloser) { _ = Body.Close() }(resp.Body) // Parse scrape response scraperResponse := new(ScraperResponse) err = json.NewDecoder(resp.Body).Decode(scraperResponse) if err != nil { return nil, err } // Convert scraper response to http.Response return scraperResponse.ToHTTPResponse() } func (s *ScrapflyTransport) Properties() transport.Properties { return transport.PropertiesReliable } func (s *ScrapflyTransport) constructRawQuery(u *url.URL, headers http.Header) string { params := url.Values{} params.Set("key", s.apiKey) params.Set("url", u.String()) if s.options.country != nil { params.Set("country", *s.options.country) } params.Set("asp", strconv.FormatBool(s.options.asp)) params.Set("proxy_pool", s.options.proxyPool.String()) params.Set("render_js", strconv.FormatBool(s.options.renderJS)) params.Set("cache", strconv.FormatBool(s.options.cache)) for k, v := range headers { for i, vv := range v { params.Add( fmt.Sprintf("headers[%s][%d]", k, i), vv, ) } } return params.Encode() }