Merge branch 'handle-async-proxy-reload' into 'master'

Handle async proxy reload

See merge request saburly/kiviscraplib!5
This commit was merged in pull request #5.
This commit is contained in:
Senad Uka
2020-05-22 15:39:26 +00:00
3 changed files with 55 additions and 20 deletions

View File

@@ -44,6 +44,9 @@ func generateClientConfigObject() {
ClientConfig.WorkerServerAddress = getString("WORKER_SERVER_ADDRESS")
ClientConfig.RequestMessagePrefix = getString("REQUEST_MESSAGE_PREFIX")
ClientConfig.ProxyListBaseURL = getString("PROXY_LIST_BASE_URL")
ClientConfig.ProxyListReloadInterval = getInt("PROXY_LIST_RELOAD_INTERVAL")
ClientConfig.ProxyListTimeout = getInt("PROXY_LIST_TIMEOUT")
ClientConfig.FetchTimeout = getInt("FETCH_TIMEOUT")
customSOCKS5ProxyListString := getString("CUSTOM_SOCKS5_PROXY_LIST")
customSOCKS5ProxyList := strings.Split(customSOCKS5ProxyListString, ",")
@@ -74,6 +77,9 @@ func initClientConfigDefaultValues() {
defaultClientConfigValues["REQUEST_MESSAGE_PREFIX"] = "URL "
defaultClientConfigValues["PROXY_LIST_BASE_URL"] = ""
defaultClientConfigValues["CUSTOM_SOCKS5_PROXY_LIST"] = ""
defaultClientConfigValues["PROXY_LIST_RELOAD_INTERVAL"] = "30"
defaultClientConfigValues["PROXY_LIST_TIMEOUT"] = "10"
defaultClientConfigValues["FETCH_TIMEOUT"] = "60"
}
func initServerConfigDefaultValues() {

View File

@@ -34,11 +34,14 @@ type WorkerServerConfig struct {
}
type ClientConfig struct {
ConnectionsCount int
ConnectionTimeout int // In seconds
WaitingTimeout int // In seconds
WorkerServerAddress string
RequestMessagePrefix string
ProxyListBaseURL string
CustomSOCKS5ProxyList []ProxyServer
ConnectionsCount int
ConnectionTimeout int // In seconds
WaitingTimeout int // In seconds
WorkerServerAddress string
RequestMessagePrefix string
ProxyListBaseURL string
ProxyListTimeout int // In seconds
ProxyListReloadInterval int // In minutes
CustomSOCKS5ProxyList []ProxyServer
FetchTimeout int // In seconds
}

View File

@@ -3,6 +3,7 @@ package workerclient
import (
"bufio"
b64 "encoding/base64"
"errors"
c "gitlab.com/saburly/kiviscraplib/config"
"gitlab.com/saburly/kiviscraplib/structures"
"golang.org/x/net/proxy"
@@ -16,17 +17,30 @@ import (
"time"
)
var proxyList []structures.ProxyServer
func StartClientConnections() {
rand.Seed(time.Now().Unix())
reloadProxyList(false)
numberOfConnections := c.ClientConfig.ConnectionsCount
for i := 0; i < numberOfConnections; i++ {
go startSingleConnection(i)
}
// TODO: Make one initial global proxy list load
// TODO: Make goroutine for periodic proxy reload and health check
go reloadProxyList(true)
}
func reloadProxyList(async bool) {
for {
proxyList = getAllProxies()
log.Printf("[CLIENT] proxy list reloaded with %d servers : \n", len(proxyList))
// TODO: Add proxy health check
if !async {
return
}
time.Sleep(time.Duration(c.ClientConfig.ProxyListReloadInterval) * time.Minute)
}
}
// startSingleConnection starts one single connection waiting for request message from load balancer server
@@ -154,11 +168,12 @@ func fetchPage(url string, connectionId int) (string, error) {
// getHttpClient creates client with proxy connection; Proxy is selected randomly from list of proxies
func getHttpClient(connectionId int) (*http.Client, error) {
proxyList := getAllProxies()
// setup a http client
httpTransport := &http.Transport{}
httpClient := &http.Client{Transport: httpTransport}
httpClient := &http.Client{
Transport: httpTransport,
Timeout: time.Duration(c.ClientConfig.FetchTimeout) * time.Second,
}
if len(proxyList) == 0 {
log.Printf("(%d) [PROXY] No proxy found, will continue without proxy!\n", connectionId)
@@ -166,9 +181,14 @@ func getHttpClient(connectionId int) (*http.Client, error) {
}
// get random proxy from the list
selectedProxy := proxyList[rand.Intn(len(proxyList))]
proxyListLength := len(proxyList)
var selectedProxy structures.ProxyServer
if proxyListLength > 0 {
selectedProxy = proxyList[rand.Intn(proxyListLength)]
}
if selectedProxy.Type == "https" {
switch selectedProxy.Type {
case "https":
proxyUrl, err := url.Parse("http://" + selectedProxy.Address)
if err != nil {
log.Printf("(%d) [PROXY] Cannot parse proxy address (%s) %s : %s\n",
@@ -176,10 +196,8 @@ func getHttpClient(connectionId int) (*http.Client, error) {
return nil, err
}
httpTransport.Proxy = http.ProxyURL(proxyUrl)
//myClient := &http.Client{Transport: &http.Transport{Proxy: http.ProxyURL(proxyUrl)}}
}
if selectedProxy.Type == "socks5" {
break
case "socks5":
dialer, err := proxy.SOCKS5("tcp", selectedProxy.Address, nil, proxy.Direct)
if err != nil {
log.Printf("(%d) [PROXY] Cannot create connection to the proxy (%s) %s : %s\n",
@@ -187,6 +205,10 @@ func getHttpClient(connectionId int) (*http.Client, error) {
return nil, err
}
httpTransport.Dial = dialer.Dial
break
default:
log.Printf("(%d) [PROXY] Failed to select proxy\n", connectionId)
return nil, errors.New("failed to select proxy")
}
log.Printf("(%d) [PROXY] Selected proxy (%s) %s\n", connectionId, selectedProxy.Type, selectedProxy.Address)
@@ -223,7 +245,11 @@ func getProxiesList(proxyType string) []structures.ProxyServer {
proxyListUrl := c.ClientConfig.ProxyListBaseURL + proxyType
resp, err := http.Get(proxyListUrl)
client := http.Client{
Timeout: time.Duration(c.ClientConfig.ProxyListTimeout) * time.Second,
}
resp, err := client.Get(proxyListUrl)
if err != nil {
log.Printf("Cannot get list of proxies [%s] : %s", proxyType, err)
return []structures.ProxyServer{}