From 5fb5d22ff22bb9c144315555010400757ea5be89 Mon Sep 17 00:00:00 2001 From: Bilal Date: Fri, 22 May 2020 16:00:25 +0200 Subject: [PATCH 1/2] Load proxy list before client connections and reload periodically --- config/config.go | 4 +++ structures/structures.go | 16 ++++++------ workerclient/workerclient.go | 47 +++++++++++++++++++++++++++--------- 3 files changed, 48 insertions(+), 19 deletions(-) diff --git a/config/config.go b/config/config.go index 3ad6a88..6d069c2 100644 --- a/config/config.go +++ b/config/config.go @@ -44,6 +44,8 @@ func generateClientConfigObject() { ClientConfig.WorkerServerAddress = getString("WORKER_SERVER_ADDRESS") ClientConfig.RequestMessagePrefix = getString("REQUEST_MESSAGE_PREFIX") ClientConfig.ProxyListBaseURL = getString("PROXY_LIST_BASE_URL") + ClientConfig.ProxyListReloadInterval = getInt("PROXY_LIST_RELOAD_INTERVAL") + ClientConfig.ProxyListTimeout = getInt("PROXY_LIST_TIMEOUT") customSOCKS5ProxyListString := getString("CUSTOM_SOCKS5_PROXY_LIST") customSOCKS5ProxyList := strings.Split(customSOCKS5ProxyListString, ",") @@ -74,6 +76,8 @@ func initClientConfigDefaultValues() { defaultClientConfigValues["REQUEST_MESSAGE_PREFIX"] = "URL " defaultClientConfigValues["PROXY_LIST_BASE_URL"] = "" defaultClientConfigValues["CUSTOM_SOCKS5_PROXY_LIST"] = "" + defaultClientConfigValues["PROXY_LIST_RELOAD_INTERVAL"] = "30" + defaultClientConfigValues["PROXY_LIST_TIMEOUT"] = "10" } func initServerConfigDefaultValues() { diff --git a/structures/structures.go b/structures/structures.go index 8086305..3166780 100644 --- a/structures/structures.go +++ b/structures/structures.go @@ -34,11 +34,13 @@ type WorkerServerConfig struct { } type ClientConfig struct { - ConnectionsCount int - ConnectionTimeout int // In seconds - WaitingTimeout int // In seconds - WorkerServerAddress string - RequestMessagePrefix string - ProxyListBaseURL string - CustomSOCKS5ProxyList []ProxyServer + ConnectionsCount int + ConnectionTimeout int // In seconds + WaitingTimeout int // In seconds + WorkerServerAddress string + RequestMessagePrefix string + ProxyListBaseURL string + ProxyListTimeout int // In seconds + ProxyListReloadInterval int // In minutes + CustomSOCKS5ProxyList []ProxyServer } diff --git a/workerclient/workerclient.go b/workerclient/workerclient.go index 12dc1ab..5b65947 100644 --- a/workerclient/workerclient.go +++ b/workerclient/workerclient.go @@ -3,6 +3,7 @@ package workerclient import ( "bufio" b64 "encoding/base64" + "errors" c "gitlab.com/saburly/kiviscraplib/config" "gitlab.com/saburly/kiviscraplib/structures" "golang.org/x/net/proxy" @@ -16,17 +17,30 @@ import ( "time" ) +var proxyList []structures.ProxyServer + func StartClientConnections() { rand.Seed(time.Now().Unix()) - + reloadProxyList(false) numberOfConnections := c.ClientConfig.ConnectionsCount for i := 0; i < numberOfConnections; i++ { go startSingleConnection(i) } - // TODO: Make one initial global proxy list load - // TODO: Make goroutine for periodic proxy reload and health check + go reloadProxyList(true) +} + +func reloadProxyList(async bool) { + for { + proxyList = getAllProxies() + log.Printf("[CLIENT] proxy list reloaded with %d servers : \n", len(proxyList)) + // TODO: Add proxy health check + if !async { + return + } + time.Sleep(time.Duration(c.ClientConfig.ProxyListReloadInterval) * time.Minute) + } } // startSingleConnection starts one single connection waiting for request message from load balancer server @@ -154,8 +168,6 @@ func fetchPage(url string, connectionId int) (string, error) { // getHttpClient creates client with proxy connection; Proxy is selected randomly from list of proxies func getHttpClient(connectionId int) (*http.Client, error) { - proxyList := getAllProxies() - // setup a http client httpTransport := &http.Transport{} httpClient := &http.Client{Transport: httpTransport} @@ -166,9 +178,14 @@ func getHttpClient(connectionId int) (*http.Client, error) { } // get random proxy from the list - selectedProxy := proxyList[rand.Intn(len(proxyList))] + proxyListLength := len(proxyList) + var selectedProxy structures.ProxyServer + if proxyListLength > 0 { + selectedProxy = proxyList[rand.Intn(proxyListLength)] + } - if selectedProxy.Type == "https" { + switch selectedProxy.Type { + case "https": proxyUrl, err := url.Parse("http://" + selectedProxy.Address) if err != nil { log.Printf("(%d) [PROXY] Cannot parse proxy address (%s) %s : %s\n", @@ -176,10 +193,8 @@ func getHttpClient(connectionId int) (*http.Client, error) { return nil, err } httpTransport.Proxy = http.ProxyURL(proxyUrl) - //myClient := &http.Client{Transport: &http.Transport{Proxy: http.ProxyURL(proxyUrl)}} - } - - if selectedProxy.Type == "socks5" { + break + case "socks5": dialer, err := proxy.SOCKS5("tcp", selectedProxy.Address, nil, proxy.Direct) if err != nil { log.Printf("(%d) [PROXY] Cannot create connection to the proxy (%s) %s : %s\n", @@ -187,6 +202,10 @@ func getHttpClient(connectionId int) (*http.Client, error) { return nil, err } httpTransport.Dial = dialer.Dial + break + default: + log.Printf("(%d) [PROXY] Failed to select proxy\n", connectionId) + return nil, errors.New("failed to select proxy") } log.Printf("(%d) [PROXY] Selected proxy (%s) %s\n", connectionId, selectedProxy.Type, selectedProxy.Address) @@ -223,7 +242,11 @@ func getProxiesList(proxyType string) []structures.ProxyServer { proxyListUrl := c.ClientConfig.ProxyListBaseURL + proxyType - resp, err := http.Get(proxyListUrl) + client := http.Client{ + Timeout: time.Duration(c.ClientConfig.ProxyListTimeout) * time.Second, + } + + resp, err := client.Get(proxyListUrl) if err != nil { log.Printf("Cannot get list of proxies [%s] : %s", proxyType, err) return []structures.ProxyServer{} From 0772c2ba1bac540ba13cfe144444eb78f55639a1 Mon Sep 17 00:00:00 2001 From: Bilal Date: Fri, 22 May 2020 16:08:59 +0200 Subject: [PATCH 2/2] Add timeout on proxy connections --- config/config.go | 2 ++ structures/structures.go | 1 + workerclient/workerclient.go | 5 ++++- 3 files changed, 7 insertions(+), 1 deletion(-) diff --git a/config/config.go b/config/config.go index 6d069c2..efe302f 100644 --- a/config/config.go +++ b/config/config.go @@ -46,6 +46,7 @@ func generateClientConfigObject() { ClientConfig.ProxyListBaseURL = getString("PROXY_LIST_BASE_URL") ClientConfig.ProxyListReloadInterval = getInt("PROXY_LIST_RELOAD_INTERVAL") ClientConfig.ProxyListTimeout = getInt("PROXY_LIST_TIMEOUT") + ClientConfig.FetchTimeout = getInt("FETCH_TIMEOUT") customSOCKS5ProxyListString := getString("CUSTOM_SOCKS5_PROXY_LIST") customSOCKS5ProxyList := strings.Split(customSOCKS5ProxyListString, ",") @@ -78,6 +79,7 @@ func initClientConfigDefaultValues() { defaultClientConfigValues["CUSTOM_SOCKS5_PROXY_LIST"] = "" defaultClientConfigValues["PROXY_LIST_RELOAD_INTERVAL"] = "30" defaultClientConfigValues["PROXY_LIST_TIMEOUT"] = "10" + defaultClientConfigValues["FETCH_TIMEOUT"] = "60" } func initServerConfigDefaultValues() { diff --git a/structures/structures.go b/structures/structures.go index 3166780..10c0904 100644 --- a/structures/structures.go +++ b/structures/structures.go @@ -43,4 +43,5 @@ type ClientConfig struct { ProxyListTimeout int // In seconds ProxyListReloadInterval int // In minutes CustomSOCKS5ProxyList []ProxyServer + FetchTimeout int // In seconds } diff --git a/workerclient/workerclient.go b/workerclient/workerclient.go index 5b65947..d041b18 100644 --- a/workerclient/workerclient.go +++ b/workerclient/workerclient.go @@ -170,7 +170,10 @@ func fetchPage(url string, connectionId int) (string, error) { func getHttpClient(connectionId int) (*http.Client, error) { // setup a http client httpTransport := &http.Transport{} - httpClient := &http.Client{Transport: httpTransport} + httpClient := &http.Client{ + Transport: httpTransport, + Timeout: time.Duration(c.ClientConfig.FetchTimeout) * time.Second, + } if len(proxyList) == 0 { log.Printf("(%d) [PROXY] No proxy found, will continue without proxy!\n", connectionId)