From 601186352520b6f8d6210752d3655eb823afcb1d Mon Sep 17 00:00:00 2001 From: Bilal Date: Thu, 7 May 2020 10:02:49 +0200 Subject: [PATCH 1/9] Implement worker client --- structures/structures.go | 5 + workerclient/workerclient.go | 205 +++++++++++++++++++++++++++++++++++ 2 files changed, 210 insertions(+) create mode 100644 workerclient/workerclient.go diff --git a/structures/structures.go b/structures/structures.go index deab078..ce705e5 100644 --- a/structures/structures.go +++ b/structures/structures.go @@ -15,3 +15,8 @@ type WorkerDescription struct { Ip string Req chan Request } + +type ProxyServer struct { + Type string + Address string +} diff --git a/workerclient/workerclient.go b/workerclient/workerclient.go new file mode 100644 index 0000000..7459514 --- /dev/null +++ b/workerclient/workerclient.go @@ -0,0 +1,205 @@ +package workerclient + +import ( + "bufio" + b64 "encoding/base64" + "gitlab.com/saburly/kiviscraplib/structures" + "golang.org/x/net/proxy" + "io/ioutil" + "log" + "math/rand" + "net" + "net/http" + "strings" + "time" +) + +func StartClientConnections() { + rand.Seed(time.Now().Unix()) + + // TODO: Take number of connections from ENV + numberOfConnections := 1 + + for i := 0; i < numberOfConnections; i++ { + go startSingleConnection(i) + } + + // TODO: Make one initial global proxy list load + // TODO: Make goroutine for periodic proxy reload and health check +} + +// startSingleConnection starts one single connection waiting for request message from load balancer server +func startSingleConnection(connectionId int) { + log.Printf("(%d) Starting new client connection\n", connectionId) + + // TODO: Move initial connection timeout to the ENV + connectionTimeout := 2 + + for { + // TODO: Move server address to the ENV + const serverAddress = "127.0.0.1:1338" + conn, err := net.Dial("tcp", serverAddress) + if err != nil { + log.Printf("(%d) Cannot connect to the load balancer server on %s : %s", connectionId, serverAddress, err) + time.Sleep(time.Duration(connectionTimeout) * time.Second) + continue + } + + for { + requestMessage, err := bufio.NewReader(conn).ReadString('\n') + if err != nil { + log.Printf("(%d) Error receiving request from load balancer server : %s\n", connectionId, err) + _ = conn.Close() + time.Sleep(time.Duration(connectionTimeout) * time.Second) + break + } + + log.Printf("(%d) Received new request message : %s", connectionId, requestMessage) + + // TODO: Move prefix to the ENV + const requestMessagePrefix = "URL " + if !strings.HasPrefix(requestMessage, requestMessagePrefix) { + log.Printf("(%d) Request message has to start with %s", connectionId, requestMessagePrefix) + _ = conn.Close() + time.Sleep(time.Duration(connectionTimeout) * time.Second) + break + } + + urlToFetch := strings.TrimSuffix(strings.TrimPrefix(requestMessage, requestMessagePrefix), "\n") + + pageBody, err := fetchPage(urlToFetch, connectionId) + if err != nil { + _ = conn.Close() + time.Sleep(time.Duration(connectionTimeout) * time.Second) + break + } + + pageBodyBase64 := b64.StdEncoding.EncodeToString([]byte(pageBody)) + + _, err = conn.Write([]byte(pageBodyBase64 + "\n")) + if err != nil { + log.Printf("(%d) Cannot write to the load balancer server : %s", connectionId, err) + _ = conn.Close() + time.Sleep(time.Duration(connectionTimeout) * time.Second) + break + } + + log.Printf("(%d) Response sent. Waiting for a new job", connectionId) + } + } +} + +// fetchPage fetches page from desired url using random proxy +func fetchPage(url string, connectionId int) (string, error) { + dialer, proxyType, proxyAddr, err := getDialer() + if err != nil { + log.Printf("(%d) [PROXY] Cannot create connection to the proxy %s %s : %s\n", connectionId, proxyType, proxyAddr, err) + return "", err + } + + log.Printf("(%d) [PROXY] Selected proxy (%s) %s\n", connectionId, proxyType, proxyAddr) + + // setup a http client + httpTransport := &http.Transport{} + httpClient := &http.Client{Transport: httpTransport} + + httpTransport.Dial = dialer.Dial + + // create + req, err := http.NewRequest("GET", url, nil) + if err != nil { + log.Printf("(%d) [PROXY] Cannot create GET request to the %s : %s", connectionId, url, err) + return "", err + } + + // use the http client to fetch the page + resp, err2 := httpClient.Do(req) + if err2 != nil { + log.Printf("(%d) [PROXY] Cannot GET page %s : %s", connectionId, url, err2) + return "", err2 + } + + // TODO: Handle error + defer resp.Body.Close() + + pageBody, err := ioutil.ReadAll(resp.Body) + if err != nil { + log.Printf("(%d) [PROXY] Cannot read body of %s : %s", connectionId, url, err) + return "", err + } + + return string(pageBody), nil +} + +// getDialer selects one random proxy from the slice of all proxies +func getDialer() (proxy.Dialer, string, string, error) { + proxyList := getAllProxies() + + if len(proxyList) == 0 { + return proxy.Direct, "", "", nil // No proxy + } + + // get random proxy from the list + selectedProxy := proxyList[rand.Intn(len(proxyList))] + + if selectedProxy.Type == "https" { + // TODO: Implement https proxy + //parsedProxyURL, err := url.Parse("https://" + selectedProxy.Address) + //if err != nil { + // return nil, "", "", err + //} + // + //dialer, err := proxy.FromURL(parsedProxyURL, proxy.Direct) + //return dialer, selectedProxy.Type, selectedProxy.Address, err + } + + if selectedProxy.Type == "socks5" { + dialer, err := proxy.SOCKS5("tcp", selectedProxy.Address, nil, proxy.Direct) + return dialer, selectedProxy.Type, selectedProxy.Address, err + } + + return proxy.Direct, "", "", nil +} + +// getAllProxies combines all proxy types in one slice +func getAllProxies() []structures.ProxyServer { + //return append(getProxiesList("socks5"), getProxiesList("https")...) + return getProxiesList("socks5") +} + +// getProxiesList fetches list of proxies of specific type (valid types are : "socks5", "https") +func getProxiesList(proxyType string) []structures.ProxyServer { + switch proxyType { + case "https": + case "socks5": + break + default: + log.Printf("%s is not a valid proxy type", proxyType) + return []structures.ProxyServer{} + } + + proxyListUrl := "https://www.proxy-list.download/api/v1/get?type=" + proxyType + + resp, err := http.Get(proxyListUrl) + if err != nil { + log.Printf("Cannot get list of proxies [%s] : %s", proxyType, err) + return []structures.ProxyServer{} + } + + defer resp.Body.Close() + + proxyList, err := ioutil.ReadAll(resp.Body) + if err != nil { + log.Printf("Cannot read response for a list of proxies [%s] : %s", proxyType, err) + return []structures.ProxyServer{} + } + + proxyAddresses := strings.Split(strings.TrimSuffix(string(proxyList), "\n"), "\n") + + var result []structures.ProxyServer + for _, addr := range proxyAddresses { + result = append(result, structures.ProxyServer{Type: proxyType, Address: strings.TrimSpace(addr)}) + } + + return result +} From a77b46aad64fa5885a5bb84325c01f7cf8fce666 Mon Sep 17 00:00:00 2001 From: Bilal Date: Thu, 7 May 2020 10:03:03 +0200 Subject: [PATCH 2/9] Adapt worker server for base64 client response --- workerserver/workerserver.go | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/workerserver/workerserver.go b/workerserver/workerserver.go index 7d568af..248508b 100644 --- a/workerserver/workerserver.go +++ b/workerserver/workerserver.go @@ -2,10 +2,12 @@ package workerserver import ( "bufio" + b64 "encoding/base64" "gitlab.com/saburly/kiviscraplib/structures" "log" "math/rand" "net" + "strings" "time" ) @@ -53,6 +55,8 @@ func handleConnection(conn net.Conn) { Req: make(chan structures.Request), } + log.Printf("New worker client connected from %s\n", clientAddr) + workers <- workerDescription // add new worker to the end of the queue for { request := <-workerDescription.Req @@ -65,7 +69,7 @@ func handleConnection(conn net.Conn) { return // don't return worker to the queue - not healthy } - bufferBytes, err := bufio.NewReader(conn).ReadBytes('\n') + bufferString, err := bufio.NewReader(conn).ReadString('\n') if err != nil { log.Println("client left..") conn.Close() @@ -73,10 +77,12 @@ func handleConnection(conn net.Conn) { return // don't return worker to the queue - not healthy } + decodedPageBody, _ := b64.StdEncoding.DecodeString(strings.TrimSpace(bufferString)) + log.Printf("Recieving response from %s\n", clientAddr) response := structures.Response{ Url: request.Url, - Content: bufferBytes, + Content: decodedPageBody, Err: nil, } From 7e12d2819e8e6512e8b2850efb0b1ac5d8c79930 Mon Sep 17 00:00:00 2001 From: Bilal Date: Fri, 8 May 2020 06:32:04 +0200 Subject: [PATCH 3/9] Encode/decode request from load balancer server to the worker client --- workerclient/workerclient.go | 5 ++++- workerserver/workerserver.go | 4 +++- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/workerclient/workerclient.go b/workerclient/workerclient.go index 7459514..ac7682d 100644 --- a/workerclient/workerclient.go +++ b/workerclient/workerclient.go @@ -46,7 +46,7 @@ func startSingleConnection(connectionId int) { } for { - requestMessage, err := bufio.NewReader(conn).ReadString('\n') + encodedRequestMessage, err := bufio.NewReader(conn).ReadString('\n') if err != nil { log.Printf("(%d) Error receiving request from load balancer server : %s\n", connectionId, err) _ = conn.Close() @@ -54,6 +54,9 @@ func startSingleConnection(connectionId int) { break } + requestMessageBytes, _ := b64.StdEncoding.DecodeString(strings.TrimSpace(encodedRequestMessage)) + requestMessage := string(requestMessageBytes) + log.Printf("(%d) Received new request message : %s", connectionId, requestMessage) // TODO: Move prefix to the ENV diff --git a/workerserver/workerserver.go b/workerserver/workerserver.go index 248508b..b7f0c6f 100644 --- a/workerserver/workerserver.go +++ b/workerserver/workerserver.go @@ -61,7 +61,9 @@ func handleConnection(conn net.Conn) { for { request := <-workerDescription.Req - _, err := conn.Write([]byte("URL " + request.Url + "\n")) + requestBase64 := b64.StdEncoding.EncodeToString([]byte("URL " + request.Url)) + + _, err := conn.Write([]byte(requestBase64 + "\n")) if err != nil { log.Println("Cannot send to " + clientAddr) conn.Close() From 87b18c7f7e73d716265874e5cba4815c6b732e5d Mon Sep 17 00:00:00 2001 From: Bilal Date: Fri, 8 May 2020 07:50:27 +0200 Subject: [PATCH 4/9] Handle config as struct with values from ENV file --- config/config.go | 59 ++++++++++++++++++++++++++++++++++++++++ structures/structures.go | 8 ++++++ 2 files changed, 67 insertions(+) create mode 100644 config/config.go diff --git a/config/config.go b/config/config.go new file mode 100644 index 0000000..cb6ac98 --- /dev/null +++ b/config/config.go @@ -0,0 +1,59 @@ +package config + +import ( + "github.com/joho/godotenv" + "gitlab.com/saburly/kiviscraplib/structures" + "log" + "os" + "strconv" +) + +var ClientConfig structures.ClientConfig +var defaultValues = make(map[string]string) + +func InitConfig() { + err := godotenv.Load() + if err != nil { + log.Fatal("Unable to load ENV variables") + } + + initDefaultValues() + generateClientConfigObject() +} + +func generateClientConfigObject() { + ClientConfig.ConnectionsCount = getInt("CLIENT_CONNECTIONS_COUNT") + ClientConfig.ConnectionTimeout = getInt("CLIENT_CONNECTION_TIMEOUT") + ClientConfig.WorkerServerAddress = getString("WORKER_SERVER_ADDRESS") + ClientConfig.RequestMessagePrefix = getString("REQUEST_MESSAGE_PREFIX") + ClientConfig.ProxyListBaseURL = getString("PROXY_LIST_BASE_URL") +} + +func initDefaultValues() { + defaultValues["CLIENT_CONNECTIONS_COUNT"] = "5" + defaultValues["CLIENT_CONNECTION_TIMEOUT"] = "2" + defaultValues["WORKER_SERVER_ADDRESS"] = "127.0.0.1:1338" + defaultValues["REQUEST_MESSAGE_PREFIX"] = "URL " + defaultValues["PROXY_LIST_BASE_URL"] = "https://www.proxy-list.download/api/v1/get?type=" +} + +func getInt(key string) int { + value := os.Getenv(key) + if len(value) == 0 { + value = defaultValues[key] + } + + numericalValue, err := strconv.Atoi(value) + if err != nil { + log.Fatalf("Cannot convert ENV value for %s to the integer", key) + } + return numericalValue +} + +func getString(key string) string { + value := os.Getenv(key) + if len(value) == 0 { + return defaultValues[key] + } + return value +} diff --git a/structures/structures.go b/structures/structures.go index ce705e5..d254e1e 100644 --- a/structures/structures.go +++ b/structures/structures.go @@ -20,3 +20,11 @@ type ProxyServer struct { Type string Address string } + +type ClientConfig struct { + ConnectionsCount int + ConnectionTimeout int // In seconds + WorkerServerAddress string + RequestMessagePrefix string + ProxyListBaseURL string +} From e279dda637602eb1e230d7054df0f88ecdf3293c Mon Sep 17 00:00:00 2001 From: Bilal Date: Fri, 8 May 2020 07:51:02 +0200 Subject: [PATCH 5/9] Replace hardcoded values with config in worker client --- workerclient/workerclient.go | 19 ++++++++----------- 1 file changed, 8 insertions(+), 11 deletions(-) diff --git a/workerclient/workerclient.go b/workerclient/workerclient.go index ac7682d..720e1ee 100644 --- a/workerclient/workerclient.go +++ b/workerclient/workerclient.go @@ -3,6 +3,7 @@ package workerclient import ( "bufio" b64 "encoding/base64" + c "gitlab.com/saburly/kiviscraplib/config" "gitlab.com/saburly/kiviscraplib/structures" "golang.org/x/net/proxy" "io/ioutil" @@ -17,8 +18,7 @@ import ( func StartClientConnections() { rand.Seed(time.Now().Unix()) - // TODO: Take number of connections from ENV - numberOfConnections := 1 + numberOfConnections := c.ClientConfig.ConnectionsCount for i := 0; i < numberOfConnections; i++ { go startSingleConnection(i) @@ -32,12 +32,10 @@ func StartClientConnections() { func startSingleConnection(connectionId int) { log.Printf("(%d) Starting new client connection\n", connectionId) - // TODO: Move initial connection timeout to the ENV - connectionTimeout := 2 + connectionTimeout := c.ClientConfig.ConnectionTimeout for { - // TODO: Move server address to the ENV - const serverAddress = "127.0.0.1:1338" + serverAddress := c.ClientConfig.WorkerServerAddress conn, err := net.Dial("tcp", serverAddress) if err != nil { log.Printf("(%d) Cannot connect to the load balancer server on %s : %s", connectionId, serverAddress, err) @@ -59,8 +57,7 @@ func startSingleConnection(connectionId int) { log.Printf("(%d) Received new request message : %s", connectionId, requestMessage) - // TODO: Move prefix to the ENV - const requestMessagePrefix = "URL " + requestMessagePrefix := c.ClientConfig.RequestMessagePrefix if !strings.HasPrefix(requestMessage, requestMessagePrefix) { log.Printf("(%d) Request message has to start with %s", connectionId, requestMessagePrefix) _ = conn.Close() @@ -68,7 +65,7 @@ func startSingleConnection(connectionId int) { break } - urlToFetch := strings.TrimSuffix(strings.TrimPrefix(requestMessage, requestMessagePrefix), "\n") + urlToFetch := strings.TrimSpace(strings.TrimPrefix(requestMessage, requestMessagePrefix)) pageBody, err := fetchPage(urlToFetch, connectionId) if err != nil { @@ -181,7 +178,7 @@ func getProxiesList(proxyType string) []structures.ProxyServer { return []structures.ProxyServer{} } - proxyListUrl := "https://www.proxy-list.download/api/v1/get?type=" + proxyType + proxyListUrl := c.ClientConfig.ProxyListBaseURL + proxyType resp, err := http.Get(proxyListUrl) if err != nil { @@ -197,7 +194,7 @@ func getProxiesList(proxyType string) []structures.ProxyServer { return []structures.ProxyServer{} } - proxyAddresses := strings.Split(strings.TrimSuffix(string(proxyList), "\n"), "\n") + proxyAddresses := strings.Split(strings.TrimSpace(string(proxyList)), "\n") var result []structures.ProxyServer for _, addr := range proxyAddresses { From 6d626ccecc3e0b443a6defb076cc2b4b156b0847 Mon Sep 17 00:00:00 2001 From: Bilal Date: Fri, 8 May 2020 09:51:09 +0200 Subject: [PATCH 6/9] Include https proxies --- workerclient/workerclient.go | 64 +++++++++++++++++++----------------- 1 file changed, 34 insertions(+), 30 deletions(-) diff --git a/workerclient/workerclient.go b/workerclient/workerclient.go index 720e1ee..b82f4f7 100644 --- a/workerclient/workerclient.go +++ b/workerclient/workerclient.go @@ -11,6 +11,7 @@ import ( "math/rand" "net" "net/http" + "net/url" "strings" "time" ) @@ -91,31 +92,23 @@ func startSingleConnection(connectionId int) { // fetchPage fetches page from desired url using random proxy func fetchPage(url string, connectionId int) (string, error) { - dialer, proxyType, proxyAddr, err := getDialer() + // create request + req, err := http.NewRequest("GET", url, nil) if err != nil { - log.Printf("(%d) [PROXY] Cannot create connection to the proxy %s %s : %s\n", connectionId, proxyType, proxyAddr, err) + log.Printf("(%d) Cannot create GET request to the %s : %s\n", connectionId, url, err) return "", err } - log.Printf("(%d) [PROXY] Selected proxy (%s) %s\n", connectionId, proxyType, proxyAddr) - - // setup a http client - httpTransport := &http.Transport{} - httpClient := &http.Client{Transport: httpTransport} - - httpTransport.Dial = dialer.Dial - - // create - req, err := http.NewRequest("GET", url, nil) + // create httpClient to execute request + httpClient, err := getHttpClient(connectionId) if err != nil { - log.Printf("(%d) [PROXY] Cannot create GET request to the %s : %s", connectionId, url, err) return "", err } // use the http client to fetch the page resp, err2 := httpClient.Do(req) if err2 != nil { - log.Printf("(%d) [PROXY] Cannot GET page %s : %s", connectionId, url, err2) + log.Printf("(%d) [PROXY] Cannot GET page %s : %s\n", connectionId, url, err2) return "", err2 } @@ -124,47 +117,58 @@ func fetchPage(url string, connectionId int) (string, error) { pageBody, err := ioutil.ReadAll(resp.Body) if err != nil { - log.Printf("(%d) [PROXY] Cannot read body of %s : %s", connectionId, url, err) + log.Printf("(%d) [PROXY] Cannot read body of %s : %s\n", connectionId, url, err) return "", err } return string(pageBody), nil } -// getDialer selects one random proxy from the slice of all proxies -func getDialer() (proxy.Dialer, string, string, error) { +// getHttpClient creates client with proxy connection; Proxy is selected randomly from list of proxies +func getHttpClient(connectionId int) (*http.Client, error) { proxyList := getAllProxies() + // setup a http client + httpTransport := &http.Transport{} + httpClient := &http.Client{Transport: httpTransport} + if len(proxyList) == 0 { - return proxy.Direct, "", "", nil // No proxy + log.Printf("(%d) [PROXY] No proxy found, will continue without proxy!\n", connectionId) + return httpClient, nil } // get random proxy from the list selectedProxy := proxyList[rand.Intn(len(proxyList))] if selectedProxy.Type == "https" { - // TODO: Implement https proxy - //parsedProxyURL, err := url.Parse("https://" + selectedProxy.Address) - //if err != nil { - // return nil, "", "", err - //} - // - //dialer, err := proxy.FromURL(parsedProxyURL, proxy.Direct) - //return dialer, selectedProxy.Type, selectedProxy.Address, err + proxyUrl, err := url.Parse("http://" + selectedProxy.Address) + if err != nil { + log.Printf("(%d) [PROXY] Cannot parse proxy address (%s) %s : %s\n", + connectionId, selectedProxy.Type, selectedProxy.Address, err) + return nil, err + } + httpTransport.Proxy = http.ProxyURL(proxyUrl) + //myClient := &http.Client{Transport: &http.Transport{Proxy: http.ProxyURL(proxyUrl)}} } if selectedProxy.Type == "socks5" { dialer, err := proxy.SOCKS5("tcp", selectedProxy.Address, nil, proxy.Direct) - return dialer, selectedProxy.Type, selectedProxy.Address, err + if err != nil { + log.Printf("(%d) [PROXY] Cannot create connection to the proxy (%s) %s : %s\n", + connectionId, selectedProxy.Type, selectedProxy.Address, err) + return nil, err + } + httpTransport.Dial = dialer.Dial } - return proxy.Direct, "", "", nil + log.Printf("(%d) [PROXY] Selected proxy (%s) %s\n", connectionId, selectedProxy.Type, selectedProxy.Address) + + return httpClient, nil } // getAllProxies combines all proxy types in one slice func getAllProxies() []structures.ProxyServer { - //return append(getProxiesList("socks5"), getProxiesList("https")...) - return getProxiesList("socks5") + return append(getProxiesList("socks5"), getProxiesList("https")...) } // getProxiesList fetches list of proxies of specific type (valid types are : "socks5", "https") From 9a2425d245b78d772c65777001d8acc4b625f2cf Mon Sep 17 00:00:00 2001 From: Bilal Date: Fri, 8 May 2020 10:41:49 +0200 Subject: [PATCH 7/9] Update config with server configuration --- config/config.go | 61 +++++++++++++++++++++++++++++++--------- structures/structures.go | 12 ++++++++ 2 files changed, 60 insertions(+), 13 deletions(-) diff --git a/config/config.go b/config/config.go index cb6ac98..b900027 100644 --- a/config/config.go +++ b/config/config.go @@ -8,17 +8,32 @@ import ( "strconv" ) +var WebServerConfig structures.WebServerConfig +var WorkerServerConfig structures.WorkerServerConfig var ClientConfig structures.ClientConfig -var defaultValues = make(map[string]string) -func InitConfig() { +var defaultClientConfigValues = make(map[string]string) +var defaultServerConfigValues = make(map[string]string) + +func InitServerConfig() { + loadEnvVariables() + + initServerConfigDefaultValues() + generateServerConfigObject() +} + +func InitClientConfig() { + loadEnvVariables() + + initClientConfigDefaultValues() + generateClientConfigObject() +} + +func loadEnvVariables() { err := godotenv.Load() if err != nil { log.Fatal("Unable to load ENV variables") } - - initDefaultValues() - generateClientConfigObject() } func generateClientConfigObject() { @@ -29,18 +44,38 @@ func generateClientConfigObject() { ClientConfig.ProxyListBaseURL = getString("PROXY_LIST_BASE_URL") } -func initDefaultValues() { - defaultValues["CLIENT_CONNECTIONS_COUNT"] = "5" - defaultValues["CLIENT_CONNECTION_TIMEOUT"] = "2" - defaultValues["WORKER_SERVER_ADDRESS"] = "127.0.0.1:1338" - defaultValues["REQUEST_MESSAGE_PREFIX"] = "URL " - defaultValues["PROXY_LIST_BASE_URL"] = "https://www.proxy-list.download/api/v1/get?type=" +func generateServerConfigObject() { + WebServerConfig.Address = getString("WEB_SERVER_ADDRESS") + WebServerConfig.APIKey = getString("WEB_SERVER_API_KEY") + WebServerConfig.Timeout = getInt("WEB_SERVER_TIMEOUT") + + WorkerServerConfig.Address = getString("WORKER_SERVER_ADDRESS") + WorkerServerConfig.WorkersCount = getInt("WORKER_SERVER_WORKERS_COUNT") + WorkerServerConfig.RequestMessagePrefix = getString("WORKER_SERVER_REQUEST_MESSAGE_PREFIX") +} + +func initClientConfigDefaultValues() { + defaultClientConfigValues["CLIENT_CONNECTIONS_COUNT"] = "5" + defaultClientConfigValues["CLIENT_CONNECTION_TIMEOUT"] = "2" + defaultClientConfigValues["WORKER_SERVER_ADDRESS"] = "127.0.0.1:1338" + defaultClientConfigValues["REQUEST_MESSAGE_PREFIX"] = "URL " + defaultClientConfigValues["PROXY_LIST_BASE_URL"] = "https://www.proxy-list.download/api/v1/get?type=" +} + +func initServerConfigDefaultValues() { + defaultServerConfigValues["WEB_SERVER_ADDRESS"] = "127.0.0.1:1337" + defaultServerConfigValues["WEB_SERVER_API_KEY"] = "b8be5a3b639d465039e8fbe7582270a7" + defaultServerConfigValues["WEB_SERVER_TIMEOUT"] = "100" + + defaultServerConfigValues["WORKER_SERVER_ADDRESS"] = "127.0.0.1:1338" + defaultServerConfigValues["WORKER_SERVER_WORKERS_COUNT"] = "50" + defaultServerConfigValues["WORKER_SERVER_REQUEST_MESSAGE_PREFIX"] = "URL " } func getInt(key string) int { value := os.Getenv(key) if len(value) == 0 { - value = defaultValues[key] + value = defaultClientConfigValues[key] } numericalValue, err := strconv.Atoi(value) @@ -53,7 +88,7 @@ func getInt(key string) int { func getString(key string) string { value := os.Getenv(key) if len(value) == 0 { - return defaultValues[key] + return defaultClientConfigValues[key] } return value } diff --git a/structures/structures.go b/structures/structures.go index d254e1e..17605a0 100644 --- a/structures/structures.go +++ b/structures/structures.go @@ -21,6 +21,18 @@ type ProxyServer struct { Address string } +type WebServerConfig struct { + Address string + APIKey string + Timeout int // In seconds +} + +type WorkerServerConfig struct { + Address string + WorkersCount int + RequestMessagePrefix string +} + type ClientConfig struct { ConnectionsCount int ConnectionTimeout int // In seconds From aa982dafb84b01d41d661bb013be38f4ad654f30 Mon Sep 17 00:00:00 2001 From: Bilal Date: Fri, 8 May 2020 10:42:11 +0200 Subject: [PATCH 8/9] Use config values instead of hardcoded values --- webserver/webserver.go | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/webserver/webserver.go b/webserver/webserver.go index 09bf000..43df5e8 100644 --- a/webserver/webserver.go +++ b/webserver/webserver.go @@ -1,6 +1,7 @@ package webserver import ( + c "gitlab.com/saburly/kiviscraplib/config" "gitlab.com/saburly/kiviscraplib/structures" "gitlab.com/saburly/kiviscraplib/utils" "log" @@ -13,7 +14,7 @@ var requests chan structures.Request func ServeHTTP(queue chan structures.Request, end chan<- string) { requests = queue http.HandleFunc("/", httpHandler) - err := http.ListenAndServe("127.0.0.1:1337", nil) + err := http.ListenAndServe(c.WebServerConfig.Address, nil) if err != nil { end <- "http server" log.Fatal(err) @@ -21,7 +22,6 @@ func ServeHTTP(queue chan structures.Request, end chan<- string) { } func httpHandler(w http.ResponseWriter, r *http.Request) { - query := r.URL.Query() api_key, present := query["api_key"] if !present || len(api_key) == 0 { @@ -30,7 +30,7 @@ func httpHandler(w http.ResponseWriter, r *http.Request) { } // todo: do more for the authentication / authorization - if api_key[0] != "b8be5a3b639d465039e8fbe7582270a7" { + if api_key[0] != c.WebServerConfig.APIKey { respondWithError(w, 401, "api_key is wrong") return } @@ -56,10 +56,9 @@ func httpHandler(w http.ResponseWriter, r *http.Request) { log.Printf("request from %s: %s %q", r.RemoteAddr, r.Method, r.URL) log.Printf("waiting for response") - // todo: put timeout in ENV variable timeout := make(chan bool, 1) go func() { - time.Sleep(100 * time.Second) + time.Sleep(time.Duration(c.WebServerConfig.Timeout) * time.Second) timeout <- true }() From 451c3477bc30d8aa1e797f2a436ab09625d588be Mon Sep 17 00:00:00 2001 From: Bilal Date: Fri, 8 May 2020 10:43:55 +0200 Subject: [PATCH 9/9] Use config values instead of hardcoded values --- workerserver/workerserver.go | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/workerserver/workerserver.go b/workerserver/workerserver.go index b7f0c6f..c1a7cb8 100644 --- a/workerserver/workerserver.go +++ b/workerserver/workerserver.go @@ -3,6 +3,7 @@ package workerserver import ( "bufio" b64 "encoding/base64" + c "gitlab.com/saburly/kiviscraplib/config" "gitlab.com/saburly/kiviscraplib/structures" "log" "math/rand" @@ -15,13 +16,12 @@ var requests chan structures.Request var workers chan structures.WorkerDescription func ServeWorkers(queue chan structures.Request, end chan<- string) { - rand.Seed(time.Now().Unix()) - workers = make(chan structures.WorkerDescription, 50) // TODO: move to env var + workers = make(chan structures.WorkerDescription, c.WorkerServerConfig.WorkersCount) requests = queue - listener, err := net.Listen("tcp", "127.0.0.1:1338") + listener, err := net.Listen("tcp", c.WorkerServerConfig.Address) if err != nil { log.Fatal("tcp server listener error:", err) end <- "tcp server" @@ -61,7 +61,7 @@ func handleConnection(conn net.Conn) { for { request := <-workerDescription.Req - requestBase64 := b64.StdEncoding.EncodeToString([]byte("URL " + request.Url)) + requestBase64 := b64.StdEncoding.EncodeToString([]byte(c.WorkerServerConfig.RequestMessagePrefix + request.Url)) _, err := conn.Write([]byte(requestBase64 + "\n")) if err != nil {