diff options
author | 2024-08-11 13:15:50 -0700 | |
---|---|---|
committer | 2024-08-11 13:15:50 -0700 | |
commit | 6a3c21fb0b1c126849f2bbff494403bbe901448e (patch) | |
tree | 5d7805524357c2c8a9819c39d2051a4e3633a1d5 /backend/internal/ibd | |
parent | 29c6040a51616e9e4cf6c70ee16391b2a3b238c9 (diff) | |
parent | f34b92ded11b07f78575ac62c260a380c468e5ea (diff) | |
download | ibd-trader-6a3c21fb0b1c126849f2bbff494403bbe901448e.tar.gz ibd-trader-6a3c21fb0b1c126849f2bbff494403bbe901448e.tar.zst ibd-trader-6a3c21fb0b1c126849f2bbff494403bbe901448e.zip |
Merge remote-tracking branch 'backend/main'
Diffstat (limited to 'backend/internal/ibd')
-rw-r--r-- | backend/internal/ibd/auth.go | 333 | ||||
-rw-r--r-- | backend/internal/ibd/auth_test.go | 215 | ||||
-rw-r--r-- | backend/internal/ibd/check_ibd_username.go | 68 | ||||
-rw-r--r-- | backend/internal/ibd/client.go | 97 | ||||
-rw-r--r-- | backend/internal/ibd/client_test.go | 201 | ||||
-rw-r--r-- | backend/internal/ibd/html_helpers.go | 99 | ||||
-rw-r--r-- | backend/internal/ibd/html_helpers_test.go | 79 | ||||
-rw-r--r-- | backend/internal/ibd/ibd50.go | 182 | ||||
-rw-r--r-- | backend/internal/ibd/options.go | 26 | ||||
-rw-r--r-- | backend/internal/ibd/search.go | 111 | ||||
-rw-r--r-- | backend/internal/ibd/search_test.go | 205 | ||||
-rw-r--r-- | backend/internal/ibd/stockinfo.go | 233 | ||||
-rw-r--r-- | backend/internal/ibd/transport/scrapfly/options.go | 84 | ||||
-rw-r--r-- | backend/internal/ibd/transport/scrapfly/scraper_types.go | 253 | ||||
-rw-r--r-- | backend/internal/ibd/transport/scrapfly/scrapfly.go | 103 | ||||
-rw-r--r-- | backend/internal/ibd/transport/standard.go | 41 | ||||
-rw-r--r-- | backend/internal/ibd/transport/transport.go | 66 | ||||
-rw-r--r-- | backend/internal/ibd/userinfo.go | 156 |
18 files changed, 2552 insertions, 0 deletions
diff --git a/backend/internal/ibd/auth.go b/backend/internal/ibd/auth.go new file mode 100644 index 0000000..7b82057 --- /dev/null +++ b/backend/internal/ibd/auth.go @@ -0,0 +1,333 @@ +package ibd + +import ( + "bytes" + "context" + "encoding/base64" + "encoding/json" + "errors" + "fmt" + "io" + "net/http" + "strings" + + "github.com/ansg191/ibd-trader-backend/internal/ibd/transport" + "golang.org/x/net/html" +) + +const ( + signInUrl = "https://myibd.investors.com/secure/signin.aspx?eurl=https%3A%2F%2Fwww.investors.com" + authenticateUrl = "https://sso.accounts.dowjones.com/authenticate" + postAuthUrl = "https://sso.accounts.dowjones.com/postauth/handler" + cookieName = ".ASPXAUTH" +) + +var ErrAuthCookieNotFound = errors.New("cookie not found") +var ErrBadCredentials = errors.New("bad credentials") + +func (c *Client) Authenticate( + ctx context.Context, + username, + password string, +) (*http.Cookie, error) { + cfg, err := c.getLoginPage(ctx) + if err != nil { + return nil, err + } + + token, params, err := c.sendAuthRequest(ctx, cfg, username, password) + if err != nil { + return nil, err + } + + return c.sendPostAuth(ctx, token, params) +} + +func (c *Client) getLoginPage(ctx context.Context) (*authConfig, error) { + req, err := http.NewRequestWithContext(ctx, http.MethodGet, signInUrl, nil) + if err != nil { + return nil, err + } + + resp, err := c.Do(req, withRequiredProps(transport.PropertiesReliable)) + if err != nil { + return nil, err + } + defer func(Body io.ReadCloser) { + _ = Body.Close() + }(resp.Body) + + if resp.StatusCode != http.StatusOK { + content, err := io.ReadAll(resp.Body) + if err != nil { + return nil, fmt.Errorf("failed to read response body: %w", err) + } + return nil, fmt.Errorf( + "unexpected status code %d: %s", + resp.StatusCode, + string(content), + ) + } + + node, err := html.Parse(resp.Body) + if err != nil { + return nil, err + } + + cfg, err := extractAuthConfig(node) + if err != nil { + return nil, fmt.Errorf("failed to extract auth config: %w", err) + } + + return cfg, nil +} + +func (c *Client) sendAuthRequest(ctx context.Context, cfg *authConfig, username, password string) (string, string, error) { + body := authRequestBody{ + ClientId: cfg.ClientID, + RedirectUri: cfg.CallbackURL, + Tenant: "sso", + ResponseType: cfg.ExtraParams.ResponseType, + Username: username, + Password: password, + Scope: cfg.ExtraParams.Scope, + State: cfg.ExtraParams.State, + Headers: struct { + XRemoteUser string `json:"x-_remote-_user"` + }(struct{ XRemoteUser string }{ + XRemoteUser: username, + }), + XOidcProvider: "localop", + Protocol: cfg.ExtraParams.Protocol, + Nonce: cfg.ExtraParams.Nonce, + UiLocales: cfg.ExtraParams.UiLocales, + Csrf: cfg.ExtraParams.Csrf, + Intstate: cfg.ExtraParams.Intstate, + Connection: "DJldap", + } + bodyJson, err := json.Marshal(body) + if err != nil { + return "", "", fmt.Errorf("failed to marshal auth request body: %w", err) + } + + req, err := http.NewRequestWithContext(ctx, http.MethodPost, authenticateUrl, bytes.NewReader(bodyJson)) + if err != nil { + return "", "", err + } + + req.Header.Set("Content-Type", "application/json") + req.Header.Set("Auth0-Client", "eyJuYW1lIjoiYXV0aDAuanMtdWxwIiwidmVyc2lvbiI6IjkuMjQuMSJ9") + + resp, err := c.Do(req, + withRequiredProps(transport.PropertiesReliable), + withExpectedStatuses(http.StatusOK, http.StatusUnauthorized)) + if err != nil { + return "", "", err + } + defer func(Body io.ReadCloser) { + _ = Body.Close() + }(resp.Body) + + if resp.StatusCode == http.StatusUnauthorized { + return "", "", ErrBadCredentials + } else if resp.StatusCode != http.StatusOK { + content, err := io.ReadAll(resp.Body) + if err != nil { + return "", "", fmt.Errorf("failed to read response body: %w", err) + } + return "", "", fmt.Errorf( + "unexpected status code %d: %s", + resp.StatusCode, + string(content), + ) + } + + node, err := html.Parse(resp.Body) + if err != nil { + return "", "", err + } + + return extractTokenParams(node) +} + +func (c *Client) sendPostAuth(ctx context.Context, token, params string) (*http.Cookie, error) { + body := fmt.Sprintf("token=%s¶ms=%s", token, params) + req, err := http.NewRequestWithContext(ctx, http.MethodPost, postAuthUrl, strings.NewReader(body)) + if err != nil { + return nil, err + } + + req.Header.Set("Content-Type", "application/x-www-form-urlencoded") + + resp, err := c.Do(req, withRequiredProps(transport.PropertiesReliable)) + if err != nil { + return nil, err + } + defer func(Body io.ReadCloser) { + _ = Body.Close() + }(resp.Body) + + if resp.StatusCode != http.StatusOK { + content, err := io.ReadAll(resp.Body) + if err != nil { + return nil, fmt.Errorf("failed to read response body: %w", err) + } + return nil, fmt.Errorf( + "unexpected status code %d: %s", + resp.StatusCode, + string(content), + ) + } + + // Extract cookie + for _, cookie := range resp.Cookies() { + if cookie.Name == cookieName { + return cookie, nil + } + } + + return nil, ErrAuthCookieNotFound +} + +func extractAuthConfig(node *html.Node) (*authConfig, error) { + // Find `root` element + root := findId(node, "root") + if root == nil { + return nil, fmt.Errorf("root element not found") + } + + // Get adjacent script element + var script *html.Node + for s := root.NextSibling; s != nil; s = s.NextSibling { + if s.Type == html.ElementNode && s.Data == "script" { + script = s + break + } + } + + if script == nil { + return nil, fmt.Errorf("script element not found") + } + + // Get script content + content := extractText(script) + + // Find `AUTH_CONFIG` variable + const authConfigVar = "const AUTH_CONFIG = '" + i := strings.Index(content, authConfigVar) + if i == -1 { + return nil, fmt.Errorf("AUTH_CONFIG not found") + } + + // Find end of `AUTH_CONFIG` variable + j := strings.Index(content[i+len(authConfigVar):], "'") + + // Extract `AUTH_CONFIG` value + authConfigJSONB64 := content[i+len(authConfigVar) : i+len(authConfigVar)+j] + + // Decode `AUTH_CONFIG` value + authConfigJSON, err := base64.StdEncoding.DecodeString(authConfigJSONB64) + if err != nil { + return nil, fmt.Errorf("failed to decode AUTH_CONFIG: %w", err) + } + + // Unmarshal `AUTH_CONFIG` value + var cfg authConfig + if err = json.Unmarshal(authConfigJSON, &cfg); err != nil { + return nil, fmt.Errorf("failed to unmarshal AUTH_CONFIG: %w", err) + } + + return &cfg, nil +} + +type authConfig struct { + Auth0Domain string `json:"auth0Domain"` + CallbackURL string `json:"callbackURL"` + ClientID string `json:"clientID"` + ExtraParams struct { + Protocol string `json:"protocol"` + Scope string `json:"scope"` + ResponseType string `json:"response_type"` + Nonce string `json:"nonce"` + UiLocales string `json:"ui_locales"` + Csrf string `json:"_csrf"` + Intstate string `json:"_intstate"` + State string `json:"state"` + } `json:"extraParams"` + InternalOptions struct { + ResponseType string `json:"response_type"` + ClientId string `json:"client_id"` + Scope string `json:"scope"` + RedirectUri string `json:"redirect_uri"` + UiLocales string `json:"ui_locales"` + Eurl string `json:"eurl"` + Nonce string `json:"nonce"` + State string `json:"state"` + Resource string `json:"resource"` + Protocol string `json:"protocol"` + Client string `json:"client"` + } `json:"internalOptions"` + IsThirdPartyClient bool `json:"isThirdPartyClient"` + AuthorizationServer struct { + Url string `json:"url"` + Issuer string `json:"issuer"` + } `json:"authorizationServer"` +} + +func extractTokenParams(node *html.Node) (token string, params string, err error) { + inputs := findChildrenRecursive(node, func(node *html.Node) bool { + return node.Type == html.ElementNode && node.Data == "input" + }) + + var tokenNode, paramsNode *html.Node + for _, input := range inputs { + for _, attr := range input.Attr { + if attr.Key == "name" && attr.Val == "token" { + tokenNode = input + } else if attr.Key == "name" && attr.Val == "params" { + paramsNode = input + } + } + } + + if tokenNode == nil { + return "", "", fmt.Errorf("token input not found") + } + if paramsNode == nil { + return "", "", fmt.Errorf("params input not found") + } + + for _, attr := range tokenNode.Attr { + if attr.Key == "value" { + token = attr.Val + } + } + for _, attr := range paramsNode.Attr { + if attr.Key == "value" { + params = attr.Val + } + } + + return +} + +type authRequestBody struct { + ClientId string `json:"client_id"` + RedirectUri string `json:"redirect_uri"` + Tenant string `json:"tenant"` + ResponseType string `json:"response_type"` + Username string `json:"username"` + Password string `json:"password"` + Scope string `json:"scope"` + State string `json:"state"` + Headers struct { + XRemoteUser string `json:"x-_remote-_user"` + } `json:"headers"` + XOidcProvider string `json:"x-_oidc-_provider"` + Protocol string `json:"protocol"` + Nonce string `json:"nonce"` + UiLocales string `json:"ui_locales"` + Csrf string `json:"_csrf"` + Intstate string `json:"_intstate"` + Connection string `json:"connection"` +} diff --git a/backend/internal/ibd/auth_test.go b/backend/internal/ibd/auth_test.go new file mode 100644 index 0000000..157b507 --- /dev/null +++ b/backend/internal/ibd/auth_test.go @@ -0,0 +1,215 @@ +package ibd + +import ( + "context" + "encoding/json" + "net/http" + "net/url" + "strings" + "testing" + "time" + + "github.com/ansg191/ibd-trader-backend/internal/ibd/transport" + "github.com/jarcoal/httpmock" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + "golang.org/x/net/html" +) + +const extractAuthHtml = ` +<!doctype html> +<html lang="en"> + <head> + <title>Log in · Dow Jones</title> + <meta charset="UTF-8"/> + <meta name="theme-color" content="white"/> + <meta name="viewport" content="width=device-width,initial-scale=1"/> + <meta name="description" content="Dow Jones One Identity Login page"/> + <link rel="apple-touch-icon" sizes="180x180" href="/one_identity_login_pages/login/0ce1520be322adcd762319573804f56d/images/apple-touch-icon.png"/> + <link rel="icon" type="image/png" sizes="32x32" href="/one_identity_login_pages/login/0ce1520be322adcd762319573804f56d/images/favicon-32x32.png"/> + <link rel="icon" type="image/png" sizes="16x16" href="/one_identity_login_pages/login/0ce1520be322adcd762319573804f56d/images/favicon-16x16.png"/> + <link rel="icon" type="image/png" sizes="192x192" href="/one_identity_login_pages/login/0ce1520be322adcd762319573804f56d/images/android-chrome-192x192.png"/> + <link rel="icon" type="image/png" sizes="512x512" href="/one_identity_login_pages/login/0ce1520be322adcd762319573804f56d/images/android-chrome-512x512.png"/> + <link rel="prefetch" href="https://cdn.optimizely.com/js/14856860742.js"/> + <link rel="preconnect" href="//cdn.optimizely.com"/> + <link rel="preconnect" href="//logx.optimizely.com"/> + <script type="module" crossorigin src="/one_identity_login_pages/login/0ce1520be322adcd762319573804f56d/js/index.js"></script> + <link rel="modulepreload" crossorigin href="/one_identity_login_pages/login/0ce1520be322adcd762319573804f56d/js/vendor.js"> + <link rel="modulepreload" crossorigin href="/one_identity_login_pages/login/0ce1520be322adcd762319573804f56d/js/auth.js"> + <link rel="modulepreload" crossorigin href="/one_identity_login_pages/login/0ce1520be322adcd762319573804f56d/js/router.js"> + <link rel="stylesheet" crossorigin href="/one_identity_login_pages/login/0ce1520be322adcd762319573804f56d/assets/styles.css"> + </head> + <body> + <div id="root" vaul-drawer-wrapper="" class="root-container"></div> + <script> + const AUTH_CONFIG = 'eyJhdXRoMERvbWFpbiI6InNzby5hY2NvdW50cy5kb3dqb25lcy5jb20iLCJjYWxsYmFja1VSTCI6Imh0dHBzOi8vbXlpYmQuaW52ZXN0b3JzLmNvbS9vaWRjL2NhbGxiYWNrIiwiY2xpZW50SUQiOiJHU1UxcEcyQnJnZDNQdjJLQm5BWjI0enZ5NXVXU0NRbiIsImV4dHJhUGFyYW1zIjp7InByb3RvY29sIjoib2F1dGgyIiwic2NvcGUiOiJvcGVuaWQgaWRwX2lkIHJvbGVzIGVtYWlsIGdpdmVuX25hbWUgZmFtaWx5X25hbWUgdXVpZCBkalVzZXJuYW1lIGRqU3RhdHVzIHRyYWNraWQgdGFncyBwcnRzIHVwZGF0ZWRfYXQgY3JlYXRlZF9hdCBvZmZsaW5lX2FjY2VzcyBkamlkIiwicmVzcG9uc2VfdHlwZSI6ImNvZGUiLCJub25jZSI6IjY0MDJmYWJiLTFiNzUtNGEyYy1hODRmLTExYWQ2MWFhZGI2YiIsInVpX2xvY2FsZXMiOiJlbi11cy14LWliZC0yMy03IiwiX2NzcmYiOiJOZFVSZ3dPQ3VYRU5URXFDcDhNV25tcGtxd3lva2JjU2E2VV9fLTVib3lWc1NzQVNWTkhLU0EiLCJfaW50c3RhdGUiOiJkZXByZWNhdGVkIiwic3RhdGUiOiJlYXJjN3E2UnE2a3lHS3h5LlltbGlxOU4xRXZvU1V0ejhDVjhuMFZBYzZWc1V4RElSTTRTcmxtSWJXMmsifSwiaW50ZXJuYWxPcHRpb25zIjp7InJlc3BvbnNlX3R5cGUiOiJjb2RlIiwiY2xpZW50X2lkIjoiR1NVMXBHMkJyZ2QzUHYyS0JuQVoyNHp2eTV1V1NDUW4iLCJzY29wZSI6Im9wZW5pZCBpZHBfaWQgcm9sZXMgZW1haWwgZ2l2ZW5fbmFtZSBmYW1pbHlfbmFtZSB1dWlkIGRqVXNlcm5hbWUgZGpTdGF0dXMgdHJhY2tpZCB0YWdzIHBydHMgdXBkYXRlZF9hdCBjcmVhdGVkX2F0IG9mZmxpbmVfYWNjZXNzIGRqaWQiLCJyZWRpcmVjdF91cmkiOiJodHRwczovL215aWJkLmludmVzdG9ycy5jb20vb2lkYy9jYWxsYmFjayIsInVpX2xvY2FsZXMiOiJlbi11cy14LWliZC0yMy03IiwiZXVybCI6Imh0dHBzOi8vd3d3LmludmVzdG9ycy5jb20iLCJub25jZSI6IjY0MDJmYWJiLTFiNzUtNGEyYy1hODRmLTExYWQ2MWFhZGI2YiIsInN0YXRlIjoiZWFyYzdxNlJxNmt5R0t4eS5ZbWxpcTlOMUV2b1NVdHo4Q1Y4bjBWQWM2VnNVeERJUk00U3JsbUliVzJrIiwicmVzb3VyY2UiOiJodHRwcyUzQSUyRiUyRnd3dy5pbnZlc3RvcnMuY29tIiwicHJvdG9jb2wiOiJvYXV0aDIiLCJjbGllbnQiOiJHU1UxcEcyQnJnZDNQdjJLQm5BWjI0enZ5NXVXU0NRbiJ9LCJpc1RoaXJkUGFydHlDbGllbnQiOmZhbHNlLCJhdXRob3JpemF0aW9uU2VydmVyIjp7InVybCI6Imh0dHBzOi8vc3NvLmFjY291bnRzLmRvd2pvbmVzLmNvbSIsImlzc3VlciI6Imh0dHBzOi8vc3NvLmFjY291bnRzLmRvd2pvbmVzLmNvbS8ifX0=' + const ENV_CONFIG = 'production' + + window.sessionStorage.setItem('auth-config', AUTH_CONFIG) + window.sessionStorage.setItem('env-config', ENV_CONFIG) + </script> + <script src="https://cdn.optimizely.com/js/14856860742.js" crossorigin="anonymous"></script> + <script type="text/javascript" src="https://dcdd29eaa743c493e732-7dc0216bc6cc2f4ed239035dfc17235b.ssl.cf3.rackcdn.com/tags/wsj/hokbottom.js"></script> + <script type="text/javascript" src="/R8As7u5b/init.js"></script> + </body> +</html> +` + +func Test_extractAuthConfig(t *testing.T) { + t.Parallel() + expectedJSON := ` +{ + "auth0Domain": "sso.accounts.dowjones.com", + "callbackURL": "https://myibd.investors.com/oidc/callback", + "clientID": "GSU1pG2Brgd3Pv2KBnAZ24zvy5uWSCQn", + "extraParams": { + "protocol": "oauth2", + "scope": "openid idp_id roles email given_name family_name uuid djUsername djStatus trackid tags prts updated_at created_at offline_access djid", + "response_type": "code", + "nonce": "6402fabb-1b75-4a2c-a84f-11ad61aadb6b", + "ui_locales": "en-us-x-ibd-23-7", + "_csrf": "NdURgwOCuXENTEqCp8MWnmpkqwyokbcSa6U__-5boyVsSsASVNHKSA", + "_intstate": "deprecated", + "state": "earc7q6Rq6kyGKxy.Ymliq9N1EvoSUtz8CV8n0VAc6VsUxDIRM4SrlmIbW2k" + }, + "internalOptions": { + "response_type": "code", + "client_id": "GSU1pG2Brgd3Pv2KBnAZ24zvy5uWSCQn", + "scope": "openid idp_id roles email given_name family_name uuid djUsername djStatus trackid tags prts updated_at created_at offline_access djid", + "redirect_uri": "https://myibd.investors.com/oidc/callback", + "ui_locales": "en-us-x-ibd-23-7", + "eurl": "https://www.investors.com", + "nonce": "6402fabb-1b75-4a2c-a84f-11ad61aadb6b", + "state": "earc7q6Rq6kyGKxy.Ymliq9N1EvoSUtz8CV8n0VAc6VsUxDIRM4SrlmIbW2k", + "resource": "https%3A%2F%2Fwww.investors.com", + "protocol": "oauth2", + "client": "GSU1pG2Brgd3Pv2KBnAZ24zvy5uWSCQn" + }, + "isThirdPartyClient": false, + "authorizationServer": { + "url": "https://sso.accounts.dowjones.com", + "issuer": "https://sso.accounts.dowjones.com/" + } +}` + var expectedCfg authConfig + err := json.Unmarshal([]byte(expectedJSON), &expectedCfg) + require.NoError(t, err) + + node, err := html.Parse(strings.NewReader(extractAuthHtml)) + require.NoError(t, err) + + cfg, err := extractAuthConfig(node) + require.NoError(t, err) + require.NotNil(t, cfg) + + assert.Equal(t, expectedCfg, *cfg) +} + +const extractTokenParamsHtml = ` +<form method="post" name="hiddenform" action="https://sso.accounts.dowjones.com/postauth/handler"> + <input type="hidden" name="token" value="eyJhbGciOiJSUzI1NiIsInR5cCI6IkpXVCJ9.eyJkalVzZXJuYW1lIjoiYW5zZzE5MUB5YWhvby5jb20iLCJpZCI6IjAxZWFmNTE5LTA0OWItNGIyOS04ZjZhLWQyNjIyZjNiMWJjNiIsImdpdmVuX25hbWUiOiJBbnNodWwiLCJmYW1pbHlfbmFtZSI6Ikd1cHRhIiwibmFtZSI6IkFuc2h1bCBHdXB0YSIsImVtYWlsIjoiYW5zZzE5MUB5YWhvby5jb20iLCJlbWFpbF92ZXJpZmllZCI6dHJ1ZSwiYWNjb3VudF9pZCI6Ijk5NzI5Mzc0NDIxMiIsImRqaWQiOiIwMWVhZjUxOS0wNDliLTRiMjktOGY2YS1kMjYyMmYzYjFiYzYiLCJ0cmFja2lkIjoiMWM0NGQyMTRmM2VlYTZiMzcyNDYxNDc3NDc0NDMyODJmMTRmY2ZjYmI4NmE4NmVjYTI0MDc2ZDVlMzU4ZmUzZCIsInVwZGF0ZWRfYXQiOjE3MTI3OTQxNTYsImNyZWF0ZWRfYXQiOjE3MTI3OTQxNTYsInVhdCI6MTcyMjU1MjMzOSwicm9sZXMiOlsiQkFSUk9OUy1DSEFOR0VQQVNTV09SRCIsIkZSRUVSRUctQkFTRSIsIkZSRUVSRUctSU5ESVZJRFVBTCIsIldTSi1DSEFOR0VQQVNTV09SRCIsIldTSi1BUkNISVZFIiwiV1NKLVNFTEZTRVJWIiwiSUJELUlORElWSURVQUwiLCJJQkQtSUNBIiwiSUJELUFFSSJdLCJkalN0YXR1cyI6WyJJQkRfVVNFUlMiXSwicHJ0cyI6IjIwMjQwNDEwMTcwOTE2LTA0MDAiLCJjcmVhdGVUaW1lc3RhbXAiOiIyMDI0MDQxMTAwMDkxNloiLCJzdXVpZCI6Ik1ERmxZV1kxTVRrdE1EUTVZaTAwWWpJNUxUaG1ObUV0WkRJMk1qSm1NMkl4WW1NMi50S09fM014VkVReks3dE5qTkdxUXNZMlBNbXp5cUxGRkxySnBrZGhrcDZrIiwic3ViIjoiMDFlYWY1MTktMDQ5Yi00YjI5LThmNmEtZDI2MjJmM2IxYmM2IiwiYXVkIjoiR1NVMXBHMkJyZ2QzUHYyS0JuQVoyNHp2eTV1V1NDUW4iLCJpc3MiOiJodHRwczovL3Nzby5hY2NvdW50cy5kb3dqb25lcy5jb20vIiwiaWF0IjoxNzIyNTUyMzM5MTI0LCJleHAiOjE3MjI1NTI3NzExMjR9.HVn33IFttQrG1JKEV2oElIy3mm8TJ-3GpV_jqZE81_cY22z4IMWPz7zUGz0WgOoUuQGyrYXiaNrfxD6GaoimRL6wxrH0Fy5iYC3dOEdlGfldswfgEOwSiZkBJRc2wWTVQLm93EeJ5ZZyKIXGY_ZkwcYfhrwaTAz8McBBnRmZkm0eiNJQ5YK-QZL-yFa3DxMdPPW91jLA2rjOIVnJ-I_0nMwaJ4ZwXHG2Sw4aAXxtbFqIqarKwIdOUSpRFOCSYpeWcxmbliurKlP1djrKrYgYSZxsKOHZhnbikZDtoDCAlPRlfbKOO4u36KXooDYGJ6p__s2kGCLOLLkP_QLHMNU8Jg"> + <input type="hidden" name="params" value="%7B%22response_type%22%3A%22code%22%2C%22client_id%22%3A%22GSU1pG2Brgd3Pv2KBnAZ24zvy5uWSCQn%22%2C%22redirect_uri%22%3A%22https%3A%2F%2Fmyibd.investors.com%2Foidc%2Fcallback%22%2C%22state%22%3A%22J-ihUYZIYzey682D.aOLszineC9qjPkM6Y6wWgFC61ABYBiuK9u48AHTFS5I%22%2C%22scope%22%3A%22openid%20idp_id%20roles%20email%20given_name%20family_name%20uuid%20djUsername%20djStatus%20trackid%20tags%20prts%20updated_at%20created_at%20offline_access%20djid%22%2C%22nonce%22%3A%22457bb517-f490-43b6-a55f-d93f90d698ad%22%7D"> + <noscript> + <p>Script is disabled. Click Submit to continue.</p> + <input type="submit" value="Submit"> + </noscript> +</form> +` +const extractTokenExpectedToken = "eyJhbGciOiJSUzI1NiIsInR5cCI6IkpXVCJ9.eyJkalVzZXJuYW1lIjoiYW5zZzE5MUB5YWhvby5jb20iLCJpZCI6IjAxZWFmNTE5LTA0OWItNGIyOS04ZjZhLWQyNjIyZjNiMWJjNiIsImdpdmVuX25hbWUiOiJBbnNodWwiLCJmYW1pbHlfbmFtZSI6Ikd1cHRhIiwibmFtZSI6IkFuc2h1bCBHdXB0YSIsImVtYWlsIjoiYW5zZzE5MUB5YWhvby5jb20iLCJlbWFpbF92ZXJpZmllZCI6dHJ1ZSwiYWNjb3VudF9pZCI6Ijk5NzI5Mzc0NDIxMiIsImRqaWQiOiIwMWVhZjUxOS0wNDliLTRiMjktOGY2YS1kMjYyMmYzYjFiYzYiLCJ0cmFja2lkIjoiMWM0NGQyMTRmM2VlYTZiMzcyNDYxNDc3NDc0NDMyODJmMTRmY2ZjYmI4NmE4NmVjYTI0MDc2ZDVlMzU4ZmUzZCIsInVwZGF0ZWRfYXQiOjE3MTI3OTQxNTYsImNyZWF0ZWRfYXQiOjE3MTI3OTQxNTYsInVhdCI6MTcyMjU1MjMzOSwicm9sZXMiOlsiQkFSUk9OUy1DSEFOR0VQQVNTV09SRCIsIkZSRUVSRUctQkFTRSIsIkZSRUVSRUctSU5ESVZJRFVBTCIsIldTSi1DSEFOR0VQQVNTV09SRCIsIldTSi1BUkNISVZFIiwiV1NKLVNFTEZTRVJWIiwiSUJELUlORElWSURVQUwiLCJJQkQtSUNBIiwiSUJELUFFSSJdLCJkalN0YXR1cyI6WyJJQkRfVVNFUlMiXSwicHJ0cyI6IjIwMjQwNDEwMTcwOTE2LTA0MDAiLCJjcmVhdGVUaW1lc3RhbXAiOiIyMDI0MDQxMTAwMDkxNloiLCJzdXVpZCI6Ik1ERmxZV1kxTVRrdE1EUTVZaTAwWWpJNUxUaG1ObUV0WkRJMk1qSm1NMkl4WW1NMi50S09fM014VkVReks3dE5qTkdxUXNZMlBNbXp5cUxGRkxySnBrZGhrcDZrIiwic3ViIjoiMDFlYWY1MTktMDQ5Yi00YjI5LThmNmEtZDI2MjJmM2IxYmM2IiwiYXVkIjoiR1NVMXBHMkJyZ2QzUHYyS0JuQVoyNHp2eTV1V1NDUW4iLCJpc3MiOiJodHRwczovL3Nzby5hY2NvdW50cy5kb3dqb25lcy5jb20vIiwiaWF0IjoxNzIyNTUyMzM5MTI0LCJleHAiOjE3MjI1NTI3NzExMjR9.HVn33IFttQrG1JKEV2oElIy3mm8TJ-3GpV_jqZE81_cY22z4IMWPz7zUGz0WgOoUuQGyrYXiaNrfxD6GaoimRL6wxrH0Fy5iYC3dOEdlGfldswfgEOwSiZkBJRc2wWTVQLm93EeJ5ZZyKIXGY_ZkwcYfhrwaTAz8McBBnRmZkm0eiNJQ5YK-QZL-yFa3DxMdPPW91jLA2rjOIVnJ-I_0nMwaJ4ZwXHG2Sw4aAXxtbFqIqarKwIdOUSpRFOCSYpeWcxmbliurKlP1djrKrYgYSZxsKOHZhnbikZDtoDCAlPRlfbKOO4u36KXooDYGJ6p__s2kGCLOLLkP_QLHMNU8Jg" +const extractTokenExpectedParams = "%7B%22response_type%22%3A%22code%22%2C%22client_id%22%3A%22GSU1pG2Brgd3Pv2KBnAZ24zvy5uWSCQn%22%2C%22redirect_uri%22%3A%22https%3A%2F%2Fmyibd.investors.com%2Foidc%2Fcallback%22%2C%22state%22%3A%22J-ihUYZIYzey682D.aOLszineC9qjPkM6Y6wWgFC61ABYBiuK9u48AHTFS5I%22%2C%22scope%22%3A%22openid%20idp_id%20roles%20email%20given_name%20family_name%20uuid%20djUsername%20djStatus%20trackid%20tags%20prts%20updated_at%20created_at%20offline_access%20djid%22%2C%22nonce%22%3A%22457bb517-f490-43b6-a55f-d93f90d698ad%22%7D" + +func Test_extractTokenParams(t *testing.T) { + t.Parallel() + + node, err := html.Parse(strings.NewReader(extractTokenParamsHtml)) + require.NoError(t, err) + + token, params, err := extractTokenParams(node) + require.NoError(t, err) + assert.Equal(t, extractTokenExpectedToken, token) + assert.Equal(t, extractTokenExpectedParams, params) +} + +func TestClient_Authenticate(t *testing.T) { + t.Parallel() + + expectedVal := "test-cookie" + expectedExp := time.Now().Add(time.Hour).Round(time.Second).In(time.UTC) + + tp := httpmock.NewMockTransport() + tp.RegisterResponder("GET", signInUrl, + httpmock.NewStringResponder(http.StatusOK, extractAuthHtml)) + tp.RegisterResponder("POST", authenticateUrl, + func(request *http.Request) (*http.Response, error) { + var body authRequestBody + require.NoError(t, json.NewDecoder(request.Body).Decode(&body)) + assert.Equal(t, "abc", body.Username) + assert.Equal(t, "xyz", body.Password) + + return httpmock.NewStringResponse(http.StatusOK, extractTokenParamsHtml), nil + }) + tp.RegisterResponder("POST", postAuthUrl, + func(request *http.Request) (*http.Response, error) { + require.NoError(t, request.ParseForm()) + assert.Equal(t, extractTokenExpectedToken, request.Form.Get("token")) + + params, err := url.QueryUnescape(extractTokenExpectedParams) + require.NoError(t, err) + assert.Equal(t, params, request.Form.Get("params")) + + resp := httpmock.NewStringResponse(http.StatusOK, "OK") + cookie := &http.Cookie{Name: cookieName, Value: expectedVal, Expires: expectedExp} + resp.Header.Set("Set-Cookie", cookie.String()) + return resp, nil + }) + + client := NewClient(nil, nil, newTransport(tp)) + + cookie, err := client.Authenticate(context.Background(), "abc", "xyz") + require.NoError(t, err) + require.NotNil(t, cookie) + + assert.Equal(t, expectedVal, cookie.Value) + assert.Equal(t, expectedExp, cookie.Expires) +} + +func TestClient_Authenticate_401(t *testing.T) { + t.Parallel() + + tp := httpmock.NewMockTransport() + tp.RegisterResponder("GET", signInUrl, + httpmock.NewStringResponder(http.StatusOK, extractAuthHtml)) + tp.RegisterResponder("POST", authenticateUrl, + func(request *http.Request) (*http.Response, error) { + var body authRequestBody + require.NoError(t, json.NewDecoder(request.Body).Decode(&body)) + assert.Equal(t, "abc", body.Username) + assert.Equal(t, "xyz", body.Password) + + return httpmock.NewStringResponse(http.StatusUnauthorized, `{"name":"ValidationError","code":"ERR016","message":"Wrong username or password","description":"Wrong username or password"}`), nil + }) + + client := NewClient(nil, nil, newTransport(tp)) + + cookie, err := client.Authenticate(context.Background(), "abc", "xyz") + assert.Nil(t, cookie) + assert.ErrorIs(t, err, ErrBadCredentials) +} + +type testReliableTransport http.Client + +func newTransport(tp *httpmock.MockTransport) *testReliableTransport { + return (*testReliableTransport)(&http.Client{Transport: tp}) +} + +func (t *testReliableTransport) String() string { + return "testReliableTransport" +} + +func (t *testReliableTransport) Do(req *http.Request) (*http.Response, error) { + return (*http.Client)(t).Do(req) +} + +func (t *testReliableTransport) Properties() transport.Properties { + return transport.PropertiesFree | transport.PropertiesReliable +} diff --git a/backend/internal/ibd/check_ibd_username.go b/backend/internal/ibd/check_ibd_username.go new file mode 100644 index 0000000..b026151 --- /dev/null +++ b/backend/internal/ibd/check_ibd_username.go @@ -0,0 +1,68 @@ +package ibd + +import ( + "bytes" + "context" + "encoding/json" + "fmt" + "io" + "net/http" +) + +const ( + checkUsernameUrl = "https://sso.accounts.dowjones.com/getuser" +) + +func (c *Client) CheckIBDUsername(ctx context.Context, username string) (bool, error) { + cfg, err := c.getLoginPage(ctx) + if err != nil { + return false, err + } + + return c.checkIBDUsername(ctx, cfg, username) +} + +func (c *Client) checkIBDUsername(ctx context.Context, cfg *authConfig, username string) (bool, error) { + body := map[string]string{ + "username": username, + "csrf": cfg.ExtraParams.Csrf, + } + bodyJson, err := json.Marshal(body) + if err != nil { + return false, err + } + + req, err := http.NewRequestWithContext(ctx, http.MethodPost, checkUsernameUrl, bytes.NewReader(bodyJson)) + if err != nil { + return false, err + } + + req.Header.Set("Content-Type", "application/json") + req.Header.Set("X-REMOTE-USER", username) + req.Header.Set("X-REQUEST-EDITIONID", "IBD-EN_US") + req.Header.Set("X-REQUEST-SCHEME", "https") + + resp, err := c.Do(req, withExpectedStatuses(http.StatusOK, http.StatusUnauthorized)) + if err != nil { + return false, err + } + defer func(Body io.ReadCloser) { + _ = Body.Close() + }(resp.Body) + + if resp.StatusCode == http.StatusUnauthorized { + return false, nil + } else if resp.StatusCode != http.StatusOK { + contentBytes, err := io.ReadAll(resp.Body) + if err != nil { + return false, fmt.Errorf("failed to read response body: %w", err) + } + content := string(contentBytes) + return false, fmt.Errorf( + "unexpected status code %d: %s", + resp.StatusCode, + content, + ) + } + return true, nil +} diff --git a/backend/internal/ibd/client.go b/backend/internal/ibd/client.go new file mode 100644 index 0000000..c8575e3 --- /dev/null +++ b/backend/internal/ibd/client.go @@ -0,0 +1,97 @@ +package ibd + +import ( + "context" + "errors" + "log/slog" + "net/http" + "slices" + + "github.com/ansg191/ibd-trader-backend/internal/database" + "github.com/ansg191/ibd-trader-backend/internal/ibd/transport" + "github.com/ansg191/ibd-trader-backend/internal/keys" +) + +var ErrNoAvailableCookies = errors.New("no available cookies") +var ErrNoAvailableTransports = errors.New("no available transports") + +type Client struct { + transports []transport.Transport + db database.Executor + kms keys.KeyManagementService +} + +func NewClient( + db database.Executor, + kms keys.KeyManagementService, + transports ...transport.Transport, +) *Client { + return &Client{transports, db, kms} +} + +func (c *Client) getCookie(ctx context.Context, subject *string) (uint, *http.Cookie, error) { + if subject == nil { + // No subject requirement, get any cookie + cookie, err := database.GetAnyCookie(ctx, c.db, c.kms) + if err != nil { + return 0, nil, err + } + if cookie == nil { + return 0, nil, ErrNoAvailableCookies + } + + return cookie.ID, cookie.ToHTTPCookie(), nil + } + + // Get cookie by subject + cookies, err := database.GetCookies(ctx, c.db, c.kms, *subject, false) + if err != nil { + return 0, nil, err + } + + if len(cookies) == 0 { + return 0, nil, ErrNoAvailableCookies + } + + cookie := cookies[0] + + return cookie.ID, cookie.ToHTTPCookie(), nil +} + +func (c *Client) Do(req *http.Request, opts ...optionFunc) (*http.Response, error) { + o := defaultOptions + for _, opt := range opts { + opt(&o) + } + + // Sort and filter transports by properties + transports := transport.FilterTransports(c.transports, o.requiredProps) + transport.SortTransports(transports) + + for _, tp := range transports { + resp, err := tp.Do(req) + if errors.Is(err, transport.ErrUnsupportedRequest) { + // Skip unsupported transport + continue + } + if err != nil { + slog.ErrorContext(req.Context(), "transport error", + "transport", tp.String(), + "error", err, + ) + continue + } + if slices.Contains(o.expectedStatuses, resp.StatusCode) { + return resp, nil + } else { + slog.ErrorContext(req.Context(), "unexpected status code", + "transport", tp.String(), + "expected", o.expectedStatuses, + "actual", resp.StatusCode, + ) + continue + } + } + + return nil, ErrNoAvailableTransports +} diff --git a/backend/internal/ibd/client_test.go b/backend/internal/ibd/client_test.go new file mode 100644 index 0000000..2368a31 --- /dev/null +++ b/backend/internal/ibd/client_test.go @@ -0,0 +1,201 @@ +package ibd + +import ( + "context" + "database/sql" + "fmt" + "log" + "math/rand/v2" + "testing" + "time" + + "github.com/ansg191/ibd-trader-backend/internal/database" + "github.com/ansg191/ibd-trader-backend/internal/keys" + _ "github.com/lib/pq" + "github.com/ory/dockertest/v3" + "github.com/ory/dockertest/v3/docker" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +var ( + db *sql.DB + maxTime = time.Date(2100, 1, 1, 0, 0, 0, 0, time.UTC) + letterRunes = []rune("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ") +) + +func TestMain(m *testing.M) { + pool, err := dockertest.NewPool("") + if err != nil { + log.Fatalf("Could not create pool: %s", err) + } + + err = pool.Client.Ping() + if err != nil { + log.Fatalf("Could not connect to Docker: %s", err) + } + + resource, err := pool.RunWithOptions(&dockertest.RunOptions{ + Repository: "postgres", + Tag: "16", + Env: []string{ + "POSTGRES_PASSWORD=secret", + "POSTGRES_USER=ibd-client-test", + "POSTGRES_DB=ibd-client-test", + "listen_addresses='*'", + }, + Cmd: []string{ + "postgres", + "-c", + "log_statement=all", + }, + }, func(config *docker.HostConfig) { + config.AutoRemove = true + config.RestartPolicy = docker.RestartPolicy{Name: "no"} + }) + if err != nil { + log.Fatalf("Could not start resource: %s", err) + } + + hostAndPort := resource.GetHostPort("5432/tcp") + databaseUrl := fmt.Sprintf("postgres://ibd-client-test:secret@%s/ibd-client-test?sslmode=disable", hostAndPort) + + // Kill container after 120 seconds + _ = resource.Expire(120) + + pool.MaxWait = 120 * time.Second + if err = pool.Retry(func() error { + db, err = sql.Open("postgres", databaseUrl) + if err != nil { + return err + } + return db.Ping() + }); err != nil { + log.Fatalf("Could not connect to database: %s", err) + } + + err = database.Migrate(context.Background(), databaseUrl) + if err != nil { + log.Fatalf("Could not migrate database: %s", err) + } + + defer func() { + if err := pool.Purge(resource); err != nil { + log.Fatalf("Could not purge resource: %s", err) + } + }() + + m.Run() +} + +func randStringRunes(n int) string { + b := make([]rune, n) + for i := range b { + b[i] = letterRunes[rand.IntN(len(letterRunes))] + } + return string(b) +} + +func addCookie(t *testing.T) (user, token string) { + t.Helper() + + // Randomly generate a user and token + user = randStringRunes(8) + token = randStringRunes(16) + + ciphertext, key, err := keys.Encrypt(context.Background(), new(kmsStub), "", []byte(token)) + require.NoError(t, err) + + tx, err := db.Begin() + require.NoError(t, err) + + var keyID uint + err = tx.QueryRow(` +INSERT INTO keys (kms_key_name, encrypted_key) + VALUES ('', $1) + RETURNING id; +`, key).Scan(&keyID) + require.NoError(t, err) + + _, err = tx.Exec(` +INSERT +INTO users (subject, encryption_key) +VALUES ($1, $2); +`, user, keyID) + require.NoError(t, err) + + _, err = tx.Exec(` +INSERT +INTO ibd_tokens (user_subject, token, encryption_key, expires_at) +VALUES ($1, $2, $3, $4);`, + user, + ciphertext, + keyID, + maxTime, + ) + require.NoError(t, err) + + err = tx.Commit() + require.NoError(t, err) + + return user, token +} + +func TestClient_getCookie(t *testing.T) { + t.Run("no cookies", func(t *testing.T) { + client := NewClient(db, new(kmsStub)) + + _, _, err := client.getCookie(context.Background(), nil) + assert.ErrorIs(t, err, ErrNoAvailableCookies) + }) + + t.Run("no cookies by subject", func(t *testing.T) { + client := NewClient(db, new(kmsStub)) + + subject := "test" + _, _, err := client.getCookie(context.Background(), &subject) + assert.ErrorIs(t, err, ErrNoAvailableCookies) + }) + + t.Run("get any cookie", func(t *testing.T) { + _, token := addCookie(t) + + client := NewClient(db, new(kmsStub)) + + _, cookie, err := client.getCookie(context.Background(), nil) + require.NoError(t, err) + assert.Equal(t, cookieName, cookie.Name) + assert.Equal(t, token, cookie.Value) + assert.Equal(t, "/", cookie.Path) + assert.Equal(t, maxTime, cookie.Expires) + assert.Equal(t, "investors.com", cookie.Domain) + }) + + t.Run("get cookie by subject", func(t *testing.T) { + subject, token := addCookie(t) + + client := NewClient(db, new(kmsStub)) + + _, cookie, err := client.getCookie(context.Background(), &subject) + require.NoError(t, err) + assert.Equal(t, cookieName, cookie.Name) + assert.Equal(t, token, cookie.Value) + assert.Equal(t, "/", cookie.Path) + assert.Equal(t, maxTime, cookie.Expires) + assert.Equal(t, "investors.com", cookie.Domain) + }) +} + +type kmsStub struct{} + +func (k *kmsStub) Close() error { + return nil +} + +func (k *kmsStub) Encrypt(_ context.Context, _ string, plaintext []byte) ([]byte, error) { + return plaintext, nil +} + +func (k *kmsStub) Decrypt(_ context.Context, _ string, ciphertext []byte) ([]byte, error) { + return ciphertext, nil +} diff --git a/backend/internal/ibd/html_helpers.go b/backend/internal/ibd/html_helpers.go new file mode 100644 index 0000000..0176bc5 --- /dev/null +++ b/backend/internal/ibd/html_helpers.go @@ -0,0 +1,99 @@ +package ibd + +import ( + "strings" + + "golang.org/x/net/html" +) + +func findChildren(node *html.Node, f func(node *html.Node) bool) (found []*html.Node) { + for c := node.FirstChild; c != nil; c = c.NextSibling { + if f(c) { + found = append(found, c) + } + } + return +} + +func findChildrenRecursive(node *html.Node, f func(node *html.Node) bool) (found []*html.Node) { + if f(node) { + found = append(found, node) + } + + for c := node.FirstChild; c != nil; c = c.NextSibling { + found = append(found, findChildrenRecursive(c, f)...) + } + + return +} + +func findClass(node *html.Node, className string) (found *html.Node) { + if isClass(node, className) { + return node + } + + for c := node.FirstChild; c != nil; c = c.NextSibling { + if found = findClass(c, className); found != nil { + return + } + } + + return +} + +func isClass(node *html.Node, className string) bool { + if node.Type == html.ElementNode { + for _, attr := range node.Attr { + if attr.Key != "class" { + continue + } + classes := strings.Fields(attr.Val) + for _, class := range classes { + if class == className { + return true + } + } + } + } + return false +} + +func extractText(node *html.Node) string { + var result strings.Builder + extractTextInner(node, &result) + return result.String() +} + +func extractTextInner(node *html.Node, result *strings.Builder) { + if node.Type == html.TextNode { + result.WriteString(node.Data) + } + for c := node.FirstChild; c != nil; c = c.NextSibling { + extractTextInner(c, result) + } +} + +func findId(node *html.Node, id string) (found *html.Node) { + if isId(node, id) { + return node + } + + for c := node.FirstChild; c != nil; c = c.NextSibling { + if found = findId(c, id); found != nil { + return + } + } + + return +} + +func isId(node *html.Node, id string) bool { + if node.Type == html.ElementNode { + for _, attr := range node.Attr { + if attr.Key == "id" && attr.Val == id { + return true + } + } + } + return false +} diff --git a/backend/internal/ibd/html_helpers_test.go b/backend/internal/ibd/html_helpers_test.go new file mode 100644 index 0000000..d251c39 --- /dev/null +++ b/backend/internal/ibd/html_helpers_test.go @@ -0,0 +1,79 @@ +package ibd + +import ( + "strings" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + "golang.org/x/net/html" +) + +func Test_findClass(t *testing.T) { + t.Parallel() + tests := []struct { + name string + html string + className string + found bool + expData string + }{ + { + name: "class exists", + html: `<div class="foo"></div>`, + className: "foo", + found: true, + expData: "div", + }, + { + name: "class exists nested", + html: `<div class="foo"><a class="abc"></a></div>`, + className: "abc", + found: true, + expData: "a", + }, + { + name: "class exists multiple", + html: `<div class="foo"><a class="foo"></a></div>`, + className: "foo", + found: true, + expData: "div", + }, + { + name: "class missing", + html: `<div class="abc"><a class="xyz"></a></div>`, + className: "foo", + found: false, + expData: "", + }, + { + name: "class missing", + html: `<div id="foo"><a abc="xyz"></a></div>`, + className: "foo", + found: false, + expData: "", + }, + { + name: "class exists multiple save div", + html: `<div class="foo bar"></div>`, + className: "bar", + found: true, + expData: "div", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + node, err := html.Parse(strings.NewReader(tt.html)) + require.NoError(t, err) + + got := findClass(node, tt.className) + if !tt.found { + require.Nil(t, got) + return + } + require.NotNil(t, got) + assert.Equal(t, tt.expData, got.Data) + }) + } +} diff --git a/backend/internal/ibd/ibd50.go b/backend/internal/ibd/ibd50.go new file mode 100644 index 0000000..52e28aa --- /dev/null +++ b/backend/internal/ibd/ibd50.go @@ -0,0 +1,182 @@ +package ibd + +import ( + "context" + "encoding/json" + "errors" + "io" + "log/slog" + "net/http" + "net/url" + "strconv" + + "github.com/ansg191/ibd-trader-backend/internal/database" +) + +const ibd50Url = "https://research.investors.com/Services/SiteAjaxService.asmx/GetIBD50?sortcolumn1=%22ibd100rank%22&sortOrder1=%22asc%22&sortcolumn2=%22%22&sortOrder2=%22ASC%22" + +// GetIBD50 returns the IBD50 list. +func (c *Client) GetIBD50(ctx context.Context) ([]*Stock, error) { + // We cannot use the scraper here because scrapfly does not support + // Content-Type in GET requests. + req, err := http.NewRequestWithContext(ctx, http.MethodGet, ibd50Url, nil) + if err != nil { + return nil, err + } + + cookieId, cookie, err := c.getCookie(ctx, nil) + if err != nil { + return nil, err + } + req.AddCookie(cookie) + + req.Header.Add("content-type", "application/json; charset=utf-8") + // Add browser-emulating headers + req.Header.Add("accept", "*/*") + req.Header.Add("accept-language", "en-US,en;q=0.9") + req.Header.Add("newrelic", "eyJ2IjpbMCwxXSwiZCI6eyJ0eSI6IkJyb3dzZXIiLCJhYyI6IjMzOTYxMDYiLCJhcCI6IjEzODU5ODMwMDEiLCJpZCI6IjM1Zjk5NmM2MzNjYTViMWYiLCJ0ciI6IjM3ZmRhZmJlOGY2YjhmYTMwYWMzOTkzOGNlMmM0OWMxIiwidGkiOjE3MjIyNzg0NTk3MjUsInRrIjoiMTAyMjY4MSJ9fQ==") + req.Header.Add("priority", "u=1, i") + req.Header.Add("referer", "https://research.investors.com/stock-lists/ibd-50/") + req.Header.Add("sec-ch-ua", "\"Not/A)Brand\";v=\"8\", \"Chromium\";v=\"126\", \"Google Chrome\";v=\"126\"") + req.Header.Add("sec-ch-ua-mobile", "?0") + req.Header.Add("sec-ch-ua-platform", "\"macOS\"") + req.Header.Add("sec-fetch-dest", "empty") + req.Header.Add("sec-fetch-mode", "cors") + req.Header.Add("sec-fetch-site", "same-origin") + req.Header.Add("traceparent", "00-37fdafbe8f6b8fa30ac39938ce2c49c1-35f996c633ca5b1f-01") + req.Header.Add("tracestate", "1022681@nr=0-1-3396106-1385983001-35f996c633ca5b1f----1722278459725") + req.Header.Add("user-agent", "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0.0.0 Safari/537.36") + req.Header.Add("x-newrelic-id", "VwUOV1dTDhABV1FRBgQOVVUF") + req.Header.Add("x-requested-with", "XMLHttpRequest") + + resp, err := c.Do(req) + if err != nil { + return nil, err + } + defer func(Body io.ReadCloser) { + _ = Body.Close() + }(resp.Body) + + var ibd50Resp getIBD50Response + if err = json.NewDecoder(resp.Body).Decode(&ibd50Resp); err != nil { + return nil, err + } + + // If there are less than 10 stocks in the IBD50 list, it's likely that authentication failed. + if len(ibd50Resp.D.ETablesDataList) < 10 { + // Report cookie failure to DB + if err = database.ReportCookieFailure(ctx, c.db, cookieId); err != nil { + slog.Error("Failed to report cookie failure", "error", err) + } + return nil, errors.New("failed to get IBD50 list") + } + + return ibd50Resp.ToStockList(), nil +} + +type Stock struct { + Rank int64 + Symbol string + Name string + + QuoteURL *url.URL +} + +type getIBD50Response struct { + D struct { + Type *string `json:"__type"` + ETablesDataList []struct { + Rank string `json:"Rank"` + Symbol string `json:"Symbol"` + CompanyName string `json:"CompanyName"` + CompRating *string `json:"CompRating"` + EPSRank *string `json:"EPSRank"` + RelSt *string `json:"RelSt"` + GrpStr *string `json:"GrpStr"` + Smr *string `json:"Smr"` + AccDis *string `json:"AccDis"` + SponRating *string `json:"SponRating"` + Price *string `json:"Price"` + PriceClose *string `json:"PriceClose"` + PriceChange *string `json:"PriceChange"` + PricePerChange *string `json:"PricePerChange"` + VolPerChange *string `json:"VolPerChange"` + DailyVol *string `json:"DailyVol"` + WeekHigh52 *string `json:"WeekHigh52"` + PerOffHigh *string `json:"PerOffHigh"` + PERatio *string `json:"PERatio"` + DivYield *string `json:"DivYield"` + LastQtrSalesPerChg *string `json:"LastQtrSalesPerChg"` + LastQtrEpsPerChg *string `json:"LastQtrEpsPerChg"` + ConsecQtrEpsGrt15 *string `json:"ConsecQtrEpsGrt15"` + CurQtrEpsEstPerChg *string `json:"CurQtrEpsEstPerChg"` + CurYrEpsEstPerChg *string `json:"CurYrEpsEstPerChg"` + PretaxMargin *string `json:"PretaxMargin"` + ROE *string `json:"ROE"` + MgmtOwnsPer *string `json:"MgmtOwnsPer"` + QuoteUrl *string `json:"QuoteUrl"` + StockCheckupUrl *string `json:"StockCheckupUrl"` + MarketsmithUrl *string `json:"MarketsmithUrl"` + LeaderboardUrl *string `json:"LeaderboardUrl"` + ChartAnalysisUrl *string `json:"ChartAnalysisUrl"` + Ibd100NewEntryFlag *string `json:"Ibd100NewEntryFlag"` + Ibd100UpInRankFlag *string `json:"Ibd100UpInRankFlag"` + IbdBigCap20NewEntryFlag *string `json:"IbdBigCap20NewEntryFlag"` + CompDesc *string `json:"CompDesc"` + NumberFunds *string `json:"NumberFunds"` + GlobalRank *string `json:"GlobalRank"` + EPSPriorQtr *string `json:"EPSPriorQtr"` + QtrsFundIncrease *string `json:"QtrsFundIncrease"` + } `json:"ETablesDataList"` + IBD50PdfUrl *string `json:"IBD50PdfUrl"` + CAP20PdfUrl *string `json:"CAP20PdfUrl"` + IBD50Date *string `json:"IBD50Date"` + CAP20Date *string `json:"CAP20Date"` + UpdatedDate *string `json:"UpdatedDate"` + GetAllFlags *string `json:"getAllFlags"` + Flag *int `json:"flag"` + Message *string `json:"Message"` + PaywallDesktopMarkup *string `json:"PaywallDesktopMarkup"` + PaywallMobileMarkup *string `json:"PaywallMobileMarkup"` + } `json:"d"` +} + +func (r getIBD50Response) ToStockList() (ibd []*Stock) { + ibd = make([]*Stock, 0, len(r.D.ETablesDataList)) + for _, data := range r.D.ETablesDataList { + rank, err := strconv.ParseInt(data.Rank, 10, 64) + if err != nil { + slog.Error( + "Failed to parse Rank", + "error", err, + "rank", data.Rank, + "symbol", data.Symbol, + "name", data.CompanyName, + ) + continue + } + + var quoteUrl *url.URL + if data.QuoteUrl != nil { + quoteUrl, err = url.Parse(*data.QuoteUrl) + if err != nil { + slog.Error( + "Failed to parse QuoteUrl", + "error", err, + "quoteUrl", *data.QuoteUrl, + "rank", data.Rank, + "symbol", data.Symbol, + "name", data.CompanyName, + ) + } + } + + ibd = append(ibd, &Stock{ + Rank: rank, + Symbol: data.Symbol, + Name: data.CompanyName, + QuoteURL: quoteUrl, + }) + } + return +} diff --git a/backend/internal/ibd/options.go b/backend/internal/ibd/options.go new file mode 100644 index 0000000..5c378d5 --- /dev/null +++ b/backend/internal/ibd/options.go @@ -0,0 +1,26 @@ +package ibd + +import "github.com/ansg191/ibd-trader-backend/internal/ibd/transport" + +type optionFunc func(*options) + +var defaultOptions = options{ + expectedStatuses: []int{200}, +} + +type options struct { + expectedStatuses []int + requiredProps transport.Properties +} + +func withExpectedStatuses(statuses ...int) optionFunc { + return func(o *options) { + o.expectedStatuses = append(o.expectedStatuses, statuses...) + } +} + +func withRequiredProps(props transport.Properties) optionFunc { + return func(o *options) { + o.requiredProps = props + } +} diff --git a/backend/internal/ibd/search.go b/backend/internal/ibd/search.go new file mode 100644 index 0000000..341b14b --- /dev/null +++ b/backend/internal/ibd/search.go @@ -0,0 +1,111 @@ +package ibd + +import ( + "context" + "encoding/json" + "fmt" + "io" + "net/http" + "net/url" + "time" + + "github.com/ansg191/ibd-trader-backend/internal/database" +) + +const ( + searchUrl = "https://ibdservices.investors.com/im/api/search" +) + +var ErrSymbolNotFound = fmt.Errorf("symbol not found") + +func (c *Client) Search(ctx context.Context, symbol string) (database.Stock, error) { + req, err := http.NewRequestWithContext(ctx, http.MethodGet, searchUrl, nil) + if err != nil { + return database.Stock{}, err + } + + _, cookie, err := c.getCookie(ctx, nil) + if err != nil { + return database.Stock{}, err + } + req.AddCookie(cookie) + + params := url.Values{} + params.Set("key", symbol) + req.URL.RawQuery = params.Encode() + + resp, err := c.Do(req) + if err != nil { + return database.Stock{}, err + } + defer func(Body io.ReadCloser) { + _ = Body.Close() + }(resp.Body) + + if resp.StatusCode != http.StatusOK { + content, err := io.ReadAll(resp.Body) + if err != nil { + return database.Stock{}, fmt.Errorf("failed to read response body: %w", err) + } + return database.Stock{}, fmt.Errorf( + "unexpected status code %d: %s", + resp.StatusCode, + string(content), + ) + } + + var sr searchResponse + if err = json.NewDecoder(resp.Body).Decode(&sr); err != nil { + return database.Stock{}, err + } + + for _, stock := range sr.StockData { + if stock.Symbol == symbol { + return database.Stock{ + Symbol: stock.Symbol, + Name: stock.Company, + IBDUrl: stock.QuoteUrl, + }, nil + } + } + + return database.Stock{}, ErrSymbolNotFound +} + +type searchResponse struct { + Status int `json:"_status"` + Timestamp string `json:"_timestamp"` + StockData []struct { + Id int `json:"id"` + Symbol string `json:"symbol"` + Company string `json:"company"` + PriceDate string `json:"priceDate"` + Price float64 `json:"price"` + PreviousPrice float64 `json:"previousPrice"` + PriceChange float64 `json:"priceChange"` + PricePctChange float64 `json:"pricePctChange"` + Volume int `json:"volume"` + VolumeChange int `json:"volumeChange"` + VolumePctChange int `json:"volumePctChange"` + QuoteUrl string `json:"quoteUrl"` + } `json:"stockData"` + News []struct { + Title string `json:"title"` + Category string `json:"category"` + Body string `json:"body"` + ImageAlt string `json:"imageAlt"` + ImageUrl string `json:"imageUrl"` + NewsUrl string `json:"newsUrl"` + CategoryUrl string `json:"categoryUrl"` + PublishDate time.Time `json:"publishDate"` + PublishDateUnixts int `json:"publishDateUnixts"` + Stocks []struct { + Id int `json:"id"` + Index int `json:"index"` + Symbol string `json:"symbol"` + PricePctChange string `json:"pricePctChange"` + } `json:"stocks"` + VideoFormat bool `json:"videoFormat"` + } `json:"news"` + FullUrl string `json:"fullUrl"` +} diff --git a/backend/internal/ibd/search_test.go b/backend/internal/ibd/search_test.go new file mode 100644 index 0000000..05e93dc --- /dev/null +++ b/backend/internal/ibd/search_test.go @@ -0,0 +1,205 @@ +package ibd + +import ( + "context" + "net/http" + "testing" + + "github.com/ansg191/ibd-trader-backend/internal/ibd/transport" + "github.com/jarcoal/httpmock" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +const searchResponseJSON = ` +{ + "_status": 200, + "_timestamp": "1722879439.724106", + "stockData": [ + { + "id": 13717, + "symbol": "AAPL", + "company": "Apple", + "priceDate": "2024-08-05T09:18:00", + "price": 212.33, + "previousPrice": 219.86, + "priceChange": -7.53, + "pricePctChange": -3.42, + "volume": 643433, + "volumeChange": -2138, + "volumePctChange": 124, + "quoteUrl": "https://research.investors.com/stock-quotes/nasdaq-apple-aapl.htm" + }, + { + "id": 79964, + "symbol": "AAPU", + "company": "Direxion AAPL Bull 2X", + "priceDate": "2024-08-05T09:18:00", + "price": 32.48, + "previousPrice": 34.9, + "priceChange": -2.42, + "pricePctChange": -6.92, + "volume": 15265, + "volumeChange": -35, + "volumePctChange": 212, + "quoteUrl": "https://research.investors.com/stock-quotes/nasdaq-direxion-aapl-bull-2x-aapu.htm" + }, + { + "id": 80423, + "symbol": "APLY", + "company": "YieldMax AAPL Option Incm", + "priceDate": "2024-08-05T09:11:00", + "price": 17.52, + "previousPrice": 18.15, + "priceChange": -0.63, + "pricePctChange": -3.47, + "volume": 617, + "volumeChange": -2, + "volumePctChange": 97, + "quoteUrl": "https://research.investors.com/stock-quotes/nyse-yieldmax-aapl-option-incm-aply.htm" + }, + { + "id": 79962, + "symbol": "AAPD", + "company": "Direxion Dly AAPL Br 1X", + "priceDate": "2024-08-05T09:18:00", + "price": 18.11, + "previousPrice": 17.53, + "priceChange": 0.58, + "pricePctChange": 3.31, + "volume": 14572, + "volumeChange": -7, + "volumePctChange": 885, + "quoteUrl": "https://research.investors.com/stock-quotes/nasdaq-direxion-dly-aapl-br-1x-aapd.htm" + }, + { + "id": 79968, + "symbol": "AAPB", + "company": "GraniteSh 2x Lg AAPL", + "priceDate": "2024-08-05T09:16:00", + "price": 25.22, + "previousPrice": 27.25, + "priceChange": -2.03, + "pricePctChange": -7.45, + "volume": 2505, + "volumeChange": -7, + "volumePctChange": 151, + "quoteUrl": "https://research.investors.com/stock-quotes/nasdaq-granitesh-2x-lg-aapl-aapb.htm" + } + ], + "news": [ + { + "title": "Warren Buffett Dumped Berkshire Hathaway's Favorite Stocks — Right Before They Plunged", + "category": "News", + "body": "Berkshire Hathaway earnings rose solidly in Q2. Warren Buffett sold nearly half his Apple stock stake. Berkshire stock fell...", + "imageAlt": "", + "imageUrl": "https://www.investors.com/wp-content/uploads/2024/06/Stock-WarrenBuffettwave-01-shutt-640x360.jpg", + "newsUrl": "https://investors.com/news/berkshire-hathaway-earnings-q2-2024-warren-buffett-apple/", + "categoryUrl": "https://investors.com/category/news/", + "publishDate": "2024-08-05T15:51:57+00:00", + "publishDateUnixts": 1722858717, + "stocks": [ + { + "id": 13717, + "index": 0, + "symbol": "AAPL", + "pricePctChange": "-3.42" + } + ], + "videoFormat": false + }, + { + "title": "Nvidia Plunges On Report Of AI Chip Flaw; Is It A Buy Now?", + "category": "Research", + "body": "Nvidia will roll out its Blackwell chip at least three months later than planned.", + "imageAlt": "", + "imageUrl": "https://www.investors.com/wp-content/uploads/2024/01/Stock-Nvidia-studio-01-company-640x360.jpg", + "newsUrl": "https://investors.com/research/nvda-stock-is-nvidia-a-buy-2/", + "categoryUrl": "https://investors.com/category/research/", + "publishDate": "2024-08-05T14:59:22+00:00", + "publishDateUnixts": 1722855562, + "stocks": [ + { + "id": 38607, + "index": 0, + "symbol": "NVDA", + "pricePctChange": "-5.18" + } + ], + "videoFormat": false + }, + { + "title": "Magnificent Seven Stocks Roiled: Nvidia Plunges On AI Chip Delay; Apple, Tesla Dive", + "category": "Research", + "body": "Nvidia stock dived Monday, while Apple and Tesla also fell sharply.", + "imageAlt": "", + "imageUrl": "https://www.investors.com/wp-content/uploads/2022/08/Stock-Nvidia-RTXa5500-comp-640x360.jpg", + "newsUrl": "https://investors.com/research/magnificent-seven-stocks-to-buy-and-and-watch/", + "categoryUrl": "https://investors.com/category/research/", + "publishDate": "2024-08-05T14:51:42+00:00", + "publishDateUnixts": 1722855102, + "stocks": [ + { + "id": 13717, + "index": 0, + "symbol": "AAPL", + "pricePctChange": "-3.42" + } + ], + "videoFormat": false + } + ], + "fullUrl": "https://www.investors.com/search-results/?query=AAPL" +}` + +const emptySearchResponseJSON = ` +{ + "_status": 200, + "_timestamp": "1722879662.804395", + "stockData": [], + "news": [], + "fullUrl": "https://www.investors.com/search-results/?query=abcdefg" +}` + +func TestClient_Search(t *testing.T) { + tests := []struct { + name string + response string + f func(t *testing.T, client *Client) + }{ + { + name: "found", + response: searchResponseJSON, + f: func(t *testing.T, client *Client) { + u, err := client.Search(context.Background(), "AAPL") + require.NoError(t, err) + assert.Equal(t, "AAPL", u.Symbol) + assert.Equal(t, "Apple", u.Name) + assert.Equal(t, "https://research.investors.com/stock-quotes/nasdaq-apple-aapl.htm", u.IBDUrl) + }, + }, + { + name: "not found", + response: emptySearchResponseJSON, + f: func(t *testing.T, client *Client) { + _, err := client.Search(context.Background(), "abcdefg") + assert.Error(t, err) + assert.ErrorIs(t, err, ErrSymbolNotFound) + }, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + tp := httpmock.NewMockTransport() + tp.RegisterResponder("GET", searchUrl, httpmock.NewStringResponder(200, tt.response)) + + client := NewClient( + db, + new(kmsStub), + transport.NewStandardTransport(&http.Client{Transport: tp}), + ) + + tt.f(t, client) + }) + } +} diff --git a/backend/internal/ibd/stockinfo.go b/backend/internal/ibd/stockinfo.go new file mode 100644 index 0000000..1e3b96f --- /dev/null +++ b/backend/internal/ibd/stockinfo.go @@ -0,0 +1,233 @@ +package ibd + +import ( + "context" + "fmt" + "io" + "net/http" + "net/url" + "strconv" + "strings" + + "github.com/ansg191/ibd-trader-backend/internal/database" + "github.com/ansg191/ibd-trader-backend/internal/utils" + + "github.com/Rhymond/go-money" + "golang.org/x/net/html" +) + +func (c *Client) StockInfo(ctx context.Context, uri string) (*database.StockInfo, error) { + req, err := http.NewRequestWithContext(ctx, http.MethodGet, uri, nil) + if err != nil { + return nil, err + } + + _, cookie, err := c.getCookie(ctx, nil) + if err != nil { + return nil, err + } + req.AddCookie(cookie) + + // Set required query parameters + params := url.Values{} + params.Set("list", "ibd50") + params.Set("type", "weekly") + req.URL.RawQuery = params.Encode() + + resp, err := c.Do(req) + if err != nil { + return nil, err + } + defer func(Body io.ReadCloser) { + _ = Body.Close() + }(resp.Body) + + if resp.StatusCode != http.StatusOK { + content, err := io.ReadAll(resp.Body) + if err != nil { + return nil, fmt.Errorf("failed to read response body: %w", err) + } + return nil, fmt.Errorf( + "unexpected status code %d: %s", + resp.StatusCode, + string(content), + ) + } + + node, err := html.Parse(resp.Body) + if err != nil { + return nil, err + } + + name, symbol, err := extractNameAndSymbol(node) + if err != nil { + return nil, fmt.Errorf("failed to extract name and symbol: %w", err) + } + chartAnalysis, err := extractChartAnalysis(node) + if err != nil { + return nil, fmt.Errorf("failed to extract chart analysis: %w", err) + } + ratings, err := extractRatings(node) + if err != nil { + return nil, fmt.Errorf("failed to extract ratings: %w", err) + } + price, err := extractPrice(node) + if err != nil { + return nil, fmt.Errorf("failed to extract price: %w", err) + } + + return &database.StockInfo{ + Symbol: symbol, + Name: name, + ChartAnalysis: chartAnalysis, + Ratings: ratings, + Price: price, + }, nil +} + +func extractNameAndSymbol(node *html.Node) (name string, symbol string, err error) { + // Find span with ID "quote-symbol" + quoteSymbolNode := findId(node, "quote-symbol") + if quoteSymbolNode == nil { + return "", "", fmt.Errorf("could not find `quote-symbol` span") + } + + // Get the text of the quote-symbol span + name = strings.TrimSpace(extractText(quoteSymbolNode)) + + // Find span with ID "qteSymb" + qteSymbNode := findId(node, "qteSymb") + if qteSymbNode == nil { + return "", "", fmt.Errorf("could not find `qteSymb` span") + } + + // Get the text of the qteSymb span + symbol = strings.TrimSpace(extractText(qteSymbNode)) + + // Get index of last closing parenthesis + lastParenIndex := strings.LastIndex(name, ")") + if lastParenIndex == -1 { + return + } + + // Find the last opening parenthesis before the closing parenthesis + lastOpenParenIndex := strings.LastIndex(name[:lastParenIndex], "(") + if lastOpenParenIndex == -1 { + return + } + + // Remove the parenthesis pair + name = strings.TrimSpace(name[:lastOpenParenIndex] + name[lastParenIndex+1:]) + return +} + +func extractPrice(node *html.Node) (*money.Money, error) { + // Find the div with the ID "lstPrice" + lstPriceNode := findId(node, "lstPrice") + if lstPriceNode == nil { + return nil, fmt.Errorf("could not find `lstPrice` div") + } + + // Get the text of the lstPrice div + priceStr := strings.TrimSpace(extractText(lstPriceNode)) + + // Parse the price + price, err := utils.ParseMoney(priceStr) + if err != nil { + return nil, fmt.Errorf("failed to parse price: %w", err) + } + + return price, nil +} + +func extractRatings(node *html.Node) (ratings database.Ratings, err error) { + // Find the div with class "smartContent" + smartSelectNode := findClass(node, "smartContent") + if smartSelectNode == nil { + return ratings, fmt.Errorf("could not find `smartContent` div") + } + + // Iterate over children, looking for "smartRating" divs + for c := smartSelectNode.FirstChild; c != nil; c = c.NextSibling { + if !isClass(c, "smartRating") { + continue + } + + err = processSmartRating(c, &ratings) + if err != nil { + return + } + } + return +} + +// processSmartRating extracts the rating from a "smartRating" div and updates the ratings struct. +// +// The node should look like this: +// +// <ul class="smartRating"> +// <li><a><span>Composite Rating</span></a></li> +// <li>94</li> +// ... +// </ul> +func processSmartRating(node *html.Node, ratings *database.Ratings) error { + // Check that the node is a ul + if node.Type != html.ElementNode || node.Data != "ul" { + return fmt.Errorf("expected ul node, got %s", node.Data) + } + + // Get all `li` children + children := findChildren(node, func(node *html.Node) bool { + return node.Type == html.ElementNode && node.Data == "li" + }) + + // Extract the rating name + ratingName := strings.TrimSpace(extractText(children[0])) + + // Extract the rating value + ratingValueStr := strings.TrimSpace(extractText(children[1])) + + switch ratingName { + case "Composite Rating": + ratingValue, err := strconv.ParseUint(ratingValueStr, 10, 8) + if err != nil { + return fmt.Errorf("failed to parse Composite Rating: %w", err) + } + ratings.Composite = uint8(ratingValue) + case "EPS Rating": + ratingValue, err := strconv.ParseUint(ratingValueStr, 10, 8) + if err != nil { + return fmt.Errorf("failed to parse EPS Rating: %w", err) + } + ratings.EPS = uint8(ratingValue) + case "RS Rating": + ratingValue, err := strconv.ParseUint(ratingValueStr, 10, 8) + if err != nil { + return fmt.Errorf("failed to parse RS Rating: %w", err) + } + ratings.RelStr = uint8(ratingValue) + case "Group RS Rating": + ratings.GroupRelStr = database.LetterRatingFromString(ratingValueStr) + case "SMR Rating": + ratings.SMR = database.LetterRatingFromString(ratingValueStr) + case "Acc/Dis Rating": + ratings.AccDis = database.LetterRatingFromString(ratingValueStr) + default: + return fmt.Errorf("unknown rating name: %s", ratingName) + } + + return nil +} + +func extractChartAnalysis(node *html.Node) (string, error) { + // Find the div with class "chartAnalysis" + chartAnalysisNode := findClass(node, "chartAnalysis") + if chartAnalysisNode == nil { + return "", fmt.Errorf("could not find `chartAnalysis` div") + } + + // Get the text of the chart analysis div + chartAnalysis := strings.TrimSpace(extractText(chartAnalysisNode)) + + return chartAnalysis, nil +} diff --git a/backend/internal/ibd/transport/scrapfly/options.go b/backend/internal/ibd/transport/scrapfly/options.go new file mode 100644 index 0000000..f16a4b0 --- /dev/null +++ b/backend/internal/ibd/transport/scrapfly/options.go @@ -0,0 +1,84 @@ +package scrapfly + +const BaseURL = "https://api.scrapfly.io/scrape" + +var defaultScrapeOptions = ScrapeOptions{ + baseURL: BaseURL, + country: nil, + asp: true, + proxyPool: ProxyPoolDatacenter, + renderJS: false, + cache: false, +} + +type ScrapeOption func(*ScrapeOptions) + +type ScrapeOptions struct { + baseURL string + country *string + asp bool + proxyPool ProxyPool + renderJS bool + cache bool + debug bool +} + +type ProxyPool uint8 + +const ( + ProxyPoolDatacenter ProxyPool = iota + ProxyPoolResidential +) + +func (p ProxyPool) String() string { + switch p { + case ProxyPoolDatacenter: + return "public_datacenter_pool" + case ProxyPoolResidential: + return "public_residential_pool" + default: + panic("invalid proxy pool") + } +} + +func WithCountry(country string) ScrapeOption { + return func(o *ScrapeOptions) { + o.country = &country + } +} + +func WithASP(asp bool) ScrapeOption { + return func(o *ScrapeOptions) { + o.asp = asp + } +} + +func WithProxyPool(proxyPool ProxyPool) ScrapeOption { + return func(o *ScrapeOptions) { + o.proxyPool = proxyPool + } +} + +func WithRenderJS(jsRender bool) ScrapeOption { + return func(o *ScrapeOptions) { + o.renderJS = jsRender + } +} + +func WithCache(cache bool) ScrapeOption { + return func(o *ScrapeOptions) { + o.cache = cache + } +} + +func WithDebug(debug bool) ScrapeOption { + return func(o *ScrapeOptions) { + o.debug = debug + } +} + +func WithBaseURL(baseURL string) ScrapeOption { + return func(o *ScrapeOptions) { + o.baseURL = baseURL + } +} diff --git a/backend/internal/ibd/transport/scrapfly/scraper_types.go b/backend/internal/ibd/transport/scrapfly/scraper_types.go new file mode 100644 index 0000000..f3cf651 --- /dev/null +++ b/backend/internal/ibd/transport/scrapfly/scraper_types.go @@ -0,0 +1,253 @@ +package scrapfly + +import ( + "fmt" + "io" + "net/http" + "strings" + "time" +) + +type ScraperResponse struct { + Config struct { + Asp bool `json:"asp"` + AutoScroll bool `json:"auto_scroll"` + Body interface{} `json:"body"` + Cache bool `json:"cache"` + CacheClear bool `json:"cache_clear"` + CacheTtl int `json:"cache_ttl"` + CorrelationId interface{} `json:"correlation_id"` + CostBudget interface{} `json:"cost_budget"` + Country interface{} `json:"country"` + Debug bool `json:"debug"` + Dns bool `json:"dns"` + Env string `json:"env"` + Extract interface{} `json:"extract"` + ExtractionModel interface{} `json:"extraction_model"` + ExtractionModelCustomSchema interface{} `json:"extraction_model_custom_schema"` + ExtractionPrompt interface{} `json:"extraction_prompt"` + ExtractionTemplate interface{} `json:"extraction_template"` + Format string `json:"format"` + Geolocation interface{} `json:"geolocation"` + Headers struct { + Cookie []string `json:"Cookie"` + } `json:"headers"` + JobUuid interface{} `json:"job_uuid"` + Js interface{} `json:"js"` + JsScenario interface{} `json:"js_scenario"` + Lang interface{} `json:"lang"` + LogEvictionDate string `json:"log_eviction_date"` + Method string `json:"method"` + Origin string `json:"origin"` + Os interface{} `json:"os"` + Project string `json:"project"` + ProxyPool string `json:"proxy_pool"` + RenderJs bool `json:"render_js"` + RenderingStage string `json:"rendering_stage"` + RenderingWait int `json:"rendering_wait"` + Retry bool `json:"retry"` + ScheduleName interface{} `json:"schedule_name"` + ScreenshotFlags interface{} `json:"screenshot_flags"` + ScreenshotResolution interface{} `json:"screenshot_resolution"` + Screenshots interface{} `json:"screenshots"` + Session interface{} `json:"session"` + SessionStickyProxy bool `json:"session_sticky_proxy"` + Ssl bool `json:"ssl"` + Tags interface{} `json:"tags"` + Timeout int `json:"timeout"` + Url string `json:"url"` + UserUuid string `json:"user_uuid"` + Uuid string `json:"uuid"` + WaitForSelector interface{} `json:"wait_for_selector"` + WebhookName interface{} `json:"webhook_name"` + } `json:"config"` + Context struct { + Asp interface{} `json:"asp"` + BandwidthConsumed int `json:"bandwidth_consumed"` + BandwidthImagesConsumed int `json:"bandwidth_images_consumed"` + Cache struct { + Entry interface{} `json:"entry"` + State string `json:"state"` + } `json:"cache"` + Cookies []struct { + Comment interface{} `json:"comment"` + Domain string `json:"domain"` + Expires *string `json:"expires"` + HttpOnly bool `json:"http_only"` + MaxAge interface{} `json:"max_age"` + Name string `json:"name"` + Path string `json:"path"` + Secure bool `json:"secure"` + Size int `json:"size"` + Value string `json:"value"` + Version interface{} `json:"version"` + } `json:"cookies"` + Cost struct { + Details []struct { + Amount int `json:"amount"` + Code string `json:"code"` + Description string `json:"description"` + } `json:"details"` + Total int `json:"total"` + } `json:"cost"` + CreatedAt string `json:"created_at"` + Debug interface{} `json:"debug"` + Env string `json:"env"` + Fingerprint string `json:"fingerprint"` + Headers struct { + Cookie string `json:"Cookie"` + } `json:"headers"` + IsXmlHttpRequest bool `json:"is_xml_http_request"` + Job interface{} `json:"job"` + Lang []string `json:"lang"` + Os struct { + Distribution string `json:"distribution"` + Name string `json:"name"` + Type string `json:"type"` + Version string `json:"version"` + } `json:"os"` + Project string `json:"project"` + Proxy struct { + Country string `json:"country"` + Identity string `json:"identity"` + Network string `json:"network"` + Pool string `json:"pool"` + } `json:"proxy"` + Redirects []interface{} `json:"redirects"` + Retry int `json:"retry"` + Schedule interface{} `json:"schedule"` + Session interface{} `json:"session"` + Spider interface{} `json:"spider"` + Throttler interface{} `json:"throttler"` + Uri struct { + BaseUrl string `json:"base_url"` + Fragment interface{} `json:"fragment"` + Host string `json:"host"` + Params interface{} `json:"params"` + Port int `json:"port"` + Query string `json:"query"` + RootDomain string `json:"root_domain"` + Scheme string `json:"scheme"` + } `json:"uri"` + Url string `json:"url"` + Webhook interface{} `json:"webhook"` + } `json:"context"` + Insights interface{} `json:"insights"` + Result ScraperResult `json:"result"` + Uuid string `json:"uuid"` +} + +type ScraperResult struct { + BrowserData struct { + JavascriptEvaluationResult interface{} `json:"javascript_evaluation_result"` + JsScenario []interface{} `json:"js_scenario"` + LocalStorageData struct { + } `json:"local_storage_data"` + SessionStorageData struct { + } `json:"session_storage_data"` + Websockets []interface{} `json:"websockets"` + XhrCall interface{} `json:"xhr_call"` + } `json:"browser_data"` + Content string `json:"content"` + ContentEncoding string `json:"content_encoding"` + ContentFormat string `json:"content_format"` + ContentType string `json:"content_type"` + Cookies []ScraperCookie `json:"cookies"` + Data interface{} `json:"data"` + Dns interface{} `json:"dns"` + Duration float64 `json:"duration"` + Error interface{} `json:"error"` + ExtractedData interface{} `json:"extracted_data"` + Format string `json:"format"` + Iframes []interface{} `json:"iframes"` + LogUrl string `json:"log_url"` + Reason string `json:"reason"` + RequestHeaders map[string]string `json:"request_headers"` + ResponseHeaders map[string]string `json:"response_headers"` + Screenshots struct { + } `json:"screenshots"` + Size int `json:"size"` + Ssl interface{} `json:"ssl"` + Status string `json:"status"` + StatusCode int `json:"status_code"` + Success bool `json:"success"` + Url string `json:"url"` +} + +type ScraperCookie struct { + Name string `json:"name"` + Value string `json:"value"` + Expires string `json:"expires"` + Path string `json:"path"` + Comment string `json:"comment"` + Domain string `json:"domain"` + MaxAge int `json:"max_age"` + Secure bool `json:"secure"` + HttpOnly bool `json:"http_only"` + Version string `json:"version"` + Size int `json:"size"` +} + +func (c *ScraperCookie) ToHTTPCookie() (*http.Cookie, error) { + var expires time.Time + if c.Expires != "" { + var err error + expires, err = time.Parse("2006-01-02 15:04:05", c.Expires) + if err != nil { + return nil, fmt.Errorf("failed to parse cookie expiration: %w", err) + } + } + return &http.Cookie{ + Name: c.Name, + Value: c.Value, + Path: c.Path, + Domain: c.Domain, + Expires: expires, + Secure: c.Secure, + HttpOnly: c.HttpOnly, + }, nil +} + +func (c *ScraperCookie) FromHTTPCookie(cookie *http.Cookie) { + var expires string + if !cookie.Expires.IsZero() { + expires = cookie.Expires.Format("2006-01-02 15:04:05") + } + *c = ScraperCookie{ + Comment: "", + Domain: cookie.Domain, + Expires: expires, + HttpOnly: cookie.HttpOnly, + MaxAge: cookie.MaxAge, + Name: cookie.Name, + Path: cookie.Path, + Secure: cookie.Secure, + Size: len(cookie.Value), + Value: cookie.Value, + Version: "", + } +} + +func (r *ScraperResponse) ToHTTPResponse() (*http.Response, error) { + resp := &http.Response{ + StatusCode: r.Result.StatusCode, + Header: make(http.Header), + Body: io.NopCloser(strings.NewReader(r.Result.Content)), + ContentLength: int64(len(r.Result.Content)), + Close: true, + } + + for k, v := range r.Result.ResponseHeaders { + resp.Header.Set(k, v) + } + + for _, c := range r.Result.Cookies { + cookie, err := c.ToHTTPCookie() + if err != nil { + return nil, err + } + resp.Header.Add("Set-Cookie", cookie.String()) + } + + return resp, nil +} diff --git a/backend/internal/ibd/transport/scrapfly/scrapfly.go b/backend/internal/ibd/transport/scrapfly/scrapfly.go new file mode 100644 index 0000000..3b414de --- /dev/null +++ b/backend/internal/ibd/transport/scrapfly/scrapfly.go @@ -0,0 +1,103 @@ +package scrapfly + +import ( + "encoding/json" + "fmt" + "io" + "net/http" + "net/url" + "strconv" + + "github.com/ansg191/ibd-trader-backend/internal/ibd/transport" +) + +type ScrapflyTransport struct { + client *http.Client + apiKey string + options ScrapeOptions +} + +func New(client *http.Client, apiKey string, opts ...ScrapeOption) *ScrapflyTransport { + options := defaultScrapeOptions + for _, opt := range opts { + opt(&options) + } + + return &ScrapflyTransport{ + client: client, + apiKey: apiKey, + options: options, + } +} + +func (s *ScrapflyTransport) String() string { + return "scrapfly" +} + +func (s *ScrapflyTransport) Do(req *http.Request) (*http.Response, error) { + // Construct scrape request URL + scrapeUrl, err := url.Parse(s.options.baseURL) + if err != nil { + panic(err) + } + scrapeUrl.RawQuery = s.constructRawQuery(req.URL, req.Header) + + // We can't handle `Content-Type` header on GET requests + // Wierd quirk of the Scrapfly API + if req.Method == http.MethodGet && req.Header.Get("Content-Type") != "" { + return nil, transport.ErrUnsupportedRequest + } + + // Construct scrape request + scrapeReq, err := http.NewRequestWithContext(req.Context(), req.Method, scrapeUrl.String(), req.Body) + if err != nil { + return nil, err + } + + // Send scrape request + resp, err := s.client.Do(scrapeReq) + if err != nil { + return nil, err + } + defer func(Body io.ReadCloser) { + _ = Body.Close() + }(resp.Body) + + // Parse scrape response + scraperResponse := new(ScraperResponse) + err = json.NewDecoder(resp.Body).Decode(scraperResponse) + if err != nil { + return nil, err + } + + // Convert scraper response to http.Response + return scraperResponse.ToHTTPResponse() +} + +func (s *ScrapflyTransport) Properties() transport.Properties { + return transport.PropertiesReliable +} + +func (s *ScrapflyTransport) constructRawQuery(u *url.URL, headers http.Header) string { + params := url.Values{} + params.Set("key", s.apiKey) + params.Set("url", u.String()) + if s.options.country != nil { + params.Set("country", *s.options.country) + } + params.Set("asp", strconv.FormatBool(s.options.asp)) + params.Set("proxy_pool", s.options.proxyPool.String()) + params.Set("render_js", strconv.FormatBool(s.options.renderJS)) + params.Set("cache", strconv.FormatBool(s.options.cache)) + + for k, v := range headers { + for i, vv := range v { + params.Add( + fmt.Sprintf("headers[%s][%d]", k, i), + vv, + ) + } + } + + return params.Encode() +} diff --git a/backend/internal/ibd/transport/standard.go b/backend/internal/ibd/transport/standard.go new file mode 100644 index 0000000..9fa9ff9 --- /dev/null +++ b/backend/internal/ibd/transport/standard.go @@ -0,0 +1,41 @@ +package transport + +import ( + "net/http" + + "github.com/EDDYCJY/fake-useragent" +) + +type StandardTransport http.Client + +func NewStandardTransport(client *http.Client) *StandardTransport { + return (*StandardTransport)(client) +} + +func (t *StandardTransport) Do(req *http.Request) (*http.Response, error) { + addFakeHeaders(req) + return (*http.Client)(t).Do(req) +} + +func (t *StandardTransport) String() string { + return "standard" +} + +func (t *StandardTransport) Properties() Properties { + return PropertiesFree +} + +func addFakeHeaders(req *http.Request) { + req.Header.Set("User-Agent", browser.Linux()) + req.Header.Set("Sec-CH-UA", `"Not)A;Brand";v="99", "Google Chrome";v="127", "Chromium";v="127"`) + req.Header.Set("Sec-CH-UA-Mobile", "?0") + req.Header.Set("Sec-CH-UA-Platform", "Linux") + req.Header.Set("Upgrade-Insecure-Requests", "1") + req.Header.Set("Priority", "u=0, i") + req.Header.Set("Sec-Fetch-Site", "none") + req.Header.Set("Sec-Fetch-Mode", "navigate") + req.Header.Set("Sec-Fetch-Dest", "document") + req.Header.Set("Sec-Fetch-User", "?1") + req.Header.Set("Accept-Language", "en-US,en;q=0.9") + req.Header.Set("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7") +} diff --git a/backend/internal/ibd/transport/transport.go b/backend/internal/ibd/transport/transport.go new file mode 100644 index 0000000..95e9ef3 --- /dev/null +++ b/backend/internal/ibd/transport/transport.go @@ -0,0 +1,66 @@ +package transport + +import ( + "cmp" + "errors" + "fmt" + "net/http" + "slices" +) + +var ErrUnsupportedRequest = errors.New("unsupported request") + +type Properties uint8 + +const ( + // PropertiesFree indicates that the transport is free. + // This means that requests made with this transport don't cost any money. + PropertiesFree Properties = 1 << iota + // PropertiesReliable indicates that the transport is reliable. + // This means that requests made with this transport are guaranteed to be + // successful if the server is reachable. + PropertiesReliable +) + +func (p Properties) IsReliable() bool { + return p&PropertiesReliable != 0 +} + +func (p Properties) IsFree() bool { + return p&PropertiesFree != 0 +} + +type Transport interface { + fmt.Stringer + + Do(req *http.Request) (*http.Response, error) + Properties() Properties +} + +// SortTransports sorts the transports by their properties. +// +// The transports are sorted in the following order: +// 1. Free transports +// 2. Reliable transports +func SortTransports(transports []Transport) { + priorities := map[Properties]int{ + PropertiesFree | PropertiesReliable: 0, + PropertiesFree: 1, + PropertiesReliable: 2, + } + slices.SortStableFunc(transports, func(a, b Transport) int { + iPriority := priorities[a.Properties()] + jPriority := priorities[b.Properties()] + return cmp.Compare(iPriority, jPriority) + }) +} + +func FilterTransports(transport []Transport, props Properties) []Transport { + var filtered []Transport + for _, tp := range transport { + if tp.Properties()&props == props { + filtered = append(filtered, tp) + } + } + return filtered +} diff --git a/backend/internal/ibd/userinfo.go b/backend/internal/ibd/userinfo.go new file mode 100644 index 0000000..ed61497 --- /dev/null +++ b/backend/internal/ibd/userinfo.go @@ -0,0 +1,156 @@ +package ibd + +import ( + "context" + "encoding/json" + "fmt" + "io" + "log/slog" + "net/http" +) + +const ( + userInfoUrl = "https://myibd.investors.com/services/userprofile.aspx?format=json" +) + +func (c *Client) UserInfo(ctx context.Context, cookie *http.Cookie) (*UserProfile, error) { + req, err := http.NewRequestWithContext(ctx, http.MethodGet, userInfoUrl, nil) + if err != nil { + return nil, err + } + + req.AddCookie(cookie) + + resp, err := c.Do(req) + if err != nil { + return nil, err + } + defer func(Body io.ReadCloser) { + _ = Body.Close() + }(resp.Body) + + content, err := io.ReadAll(resp.Body) + if err != nil { + return nil, fmt.Errorf("failed to read response body: %w", err) + } + + if resp.StatusCode != http.StatusOK { + return nil, fmt.Errorf( + "unexpected status code %d: %s", + resp.StatusCode, + string(content), + ) + } + + up := new(UserProfile) + if err = up.UnmarshalJSON(content); err != nil { + return nil, err + } + + return up, nil +} + +type UserStatus string + +const ( + UserStatusUnknown UserStatus = "" + UserStatusVisitor UserStatus = "Visitor" + UserStatusSubscriber UserStatus = "Subscriber" +) + +type UserProfile struct { + DisplayName string + Email string + FirstName string + LastName string + Status UserStatus +} + +func (u *UserProfile) UnmarshalJSON(bytes []byte) error { + var resp userProfileResponse + if err := json.Unmarshal(bytes, &resp); err != nil { + return err + } + + u.DisplayName = resp.UserProfile.UserDisplayName + u.Email = resp.UserProfile.UserEmailAddress + u.FirstName = resp.UserProfile.UserFirstName + u.LastName = resp.UserProfile.UserLastName + + switch resp.UserProfile.UserTrialStatus { + case "Visitor": + u.Status = UserStatusVisitor + case "Subscriber": + u.Status = UserStatusSubscriber + default: + slog.Warn("Unknown user status", "status", resp.UserProfile.UserTrialStatus) + u.Status = UserStatusUnknown + } + + return nil +} + +type userProfileResponse struct { + UserProfile userProfile `json:"userProfile"` +} + +type userProfile struct { + UserSubType string `json:"userSubType"` + UserId string `json:"userId"` + UserDisplayName string `json:"userDisplayName"` + Countrycode string `json:"countrycode"` + IsEUCountry string `json:"isEUCountry"` + Log string `json:"log"` + AgeGroup string `json:"ageGroup"` + Gender string `json:"gender"` + InvestingExperience string `json:"investingExperience"` + NumberOfTrades string `json:"numberOfTrades"` + Occupation string `json:"occupation"` + TypeOfInvestments string `json:"typeOfInvestments"` + UserEmailAddress string `json:"userEmailAddress"` + UserEmailAddressSHA1 string `json:"userEmailAddressSHA1"` + UserEmailAddressSHA256 string `json:"userEmailAddressSHA256"` + UserEmailAddressMD5 string `json:"userEmailAddressMD5"` + UserFirstName string `json:"userFirstName"` + UserLastName string `json:"userLastName"` + UserZip string `json:"userZip"` + UserTrialStatus string `json:"userTrialStatus"` + UserProductsOnTrial string `json:"userProductsOnTrial"` + UserProductsOwned string `json:"userProductsOwned"` + UserAdTrade string `json:"userAdTrade"` + UserAdTime string `json:"userAdTime"` + UserAdHold string `json:"userAdHold"` + UserAdJob string `json:"userAdJob"` + UserAdAge string `json:"userAdAge"` + UserAdOutSell string `json:"userAdOutSell"` + UserVisitCount string `json:"userVisitCount"` + RoleLeaderboard bool `json:"role_leaderboard"` + RoleOws bool `json:"role_ows"` + RoleIbdlive bool `json:"role_ibdlive"` + RoleFounderclub bool `json:"role_founderclub"` + RoleEibd bool `json:"role_eibd"` + RoleIcom bool `json:"role_icom"` + RoleEtables bool `json:"role_etables"` + RoleTru10 bool `json:"role_tru10"` + RoleMarketsurge bool `json:"role_marketsurge"` + RoleSwingtrader bool `json:"role_swingtrader"` + RoleAdfree bool `json:"role_adfree"` + RoleMarketdiem bool `json:"role_marketdiem"` + RoleWsjPlus bool `json:"role_wsj_plus"` + RoleWsj bool `json:"role_wsj"` + RoleBarrons bool `json:"role_barrons"` + RoleMarketwatch bool `json:"role_marketwatch"` + UserAdRoles string `json:"userAdRoles"` + TrialDailyPrintNeg bool `json:"trial_daily_print_neg"` + TrialDailyPrintNon bool `json:"trial_daily_print_non"` + TrialWeeklyPrintNeg bool `json:"trial_weekly_print_neg"` + TrialWeeklyPrintNon bool `json:"trial_weekly_print_non"` + TrialDailyComboNeg bool `json:"trial_daily_combo_neg"` + TrialDailyComboNon bool `json:"trial_daily_combo_non"` + TrialWeeklyComboNeg bool `json:"trial_weekly_combo_neg"` + TrialWeeklyComboNon bool `json:"trial_weekly_combo_non"` + TrialEibdNeg bool `json:"trial_eibd_neg"` + TrialEibdNon bool `json:"trial_eibd_non"` + UserVideoPreference string `json:"userVideoPreference"` + UserProfessionalStatus bool `json:"userProfessionalStatus"` +} |