Merge branch 'handle-async-proxy-reload' into 'master'
Handle async proxy reload See merge request saburly/kiviscraplib!5
This commit was merged in pull request #5.
This commit is contained in:
@@ -44,6 +44,9 @@ func generateClientConfigObject() {
|
|||||||
ClientConfig.WorkerServerAddress = getString("WORKER_SERVER_ADDRESS")
|
ClientConfig.WorkerServerAddress = getString("WORKER_SERVER_ADDRESS")
|
||||||
ClientConfig.RequestMessagePrefix = getString("REQUEST_MESSAGE_PREFIX")
|
ClientConfig.RequestMessagePrefix = getString("REQUEST_MESSAGE_PREFIX")
|
||||||
ClientConfig.ProxyListBaseURL = getString("PROXY_LIST_BASE_URL")
|
ClientConfig.ProxyListBaseURL = getString("PROXY_LIST_BASE_URL")
|
||||||
|
ClientConfig.ProxyListReloadInterval = getInt("PROXY_LIST_RELOAD_INTERVAL")
|
||||||
|
ClientConfig.ProxyListTimeout = getInt("PROXY_LIST_TIMEOUT")
|
||||||
|
ClientConfig.FetchTimeout = getInt("FETCH_TIMEOUT")
|
||||||
|
|
||||||
customSOCKS5ProxyListString := getString("CUSTOM_SOCKS5_PROXY_LIST")
|
customSOCKS5ProxyListString := getString("CUSTOM_SOCKS5_PROXY_LIST")
|
||||||
customSOCKS5ProxyList := strings.Split(customSOCKS5ProxyListString, ",")
|
customSOCKS5ProxyList := strings.Split(customSOCKS5ProxyListString, ",")
|
||||||
@@ -74,6 +77,9 @@ func initClientConfigDefaultValues() {
|
|||||||
defaultClientConfigValues["REQUEST_MESSAGE_PREFIX"] = "URL "
|
defaultClientConfigValues["REQUEST_MESSAGE_PREFIX"] = "URL "
|
||||||
defaultClientConfigValues["PROXY_LIST_BASE_URL"] = ""
|
defaultClientConfigValues["PROXY_LIST_BASE_URL"] = ""
|
||||||
defaultClientConfigValues["CUSTOM_SOCKS5_PROXY_LIST"] = ""
|
defaultClientConfigValues["CUSTOM_SOCKS5_PROXY_LIST"] = ""
|
||||||
|
defaultClientConfigValues["PROXY_LIST_RELOAD_INTERVAL"] = "30"
|
||||||
|
defaultClientConfigValues["PROXY_LIST_TIMEOUT"] = "10"
|
||||||
|
defaultClientConfigValues["FETCH_TIMEOUT"] = "60"
|
||||||
}
|
}
|
||||||
|
|
||||||
func initServerConfigDefaultValues() {
|
func initServerConfigDefaultValues() {
|
||||||
|
|||||||
@@ -34,11 +34,14 @@ type WorkerServerConfig struct {
|
|||||||
}
|
}
|
||||||
|
|
||||||
type ClientConfig struct {
|
type ClientConfig struct {
|
||||||
ConnectionsCount int
|
ConnectionsCount int
|
||||||
ConnectionTimeout int // In seconds
|
ConnectionTimeout int // In seconds
|
||||||
WaitingTimeout int // In seconds
|
WaitingTimeout int // In seconds
|
||||||
WorkerServerAddress string
|
WorkerServerAddress string
|
||||||
RequestMessagePrefix string
|
RequestMessagePrefix string
|
||||||
ProxyListBaseURL string
|
ProxyListBaseURL string
|
||||||
CustomSOCKS5ProxyList []ProxyServer
|
ProxyListTimeout int // In seconds
|
||||||
|
ProxyListReloadInterval int // In minutes
|
||||||
|
CustomSOCKS5ProxyList []ProxyServer
|
||||||
|
FetchTimeout int // In seconds
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -3,6 +3,7 @@ package workerclient
|
|||||||
import (
|
import (
|
||||||
"bufio"
|
"bufio"
|
||||||
b64 "encoding/base64"
|
b64 "encoding/base64"
|
||||||
|
"errors"
|
||||||
c "gitlab.com/saburly/kiviscraplib/config"
|
c "gitlab.com/saburly/kiviscraplib/config"
|
||||||
"gitlab.com/saburly/kiviscraplib/structures"
|
"gitlab.com/saburly/kiviscraplib/structures"
|
||||||
"golang.org/x/net/proxy"
|
"golang.org/x/net/proxy"
|
||||||
@@ -16,17 +17,30 @@ import (
|
|||||||
"time"
|
"time"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
var proxyList []structures.ProxyServer
|
||||||
|
|
||||||
func StartClientConnections() {
|
func StartClientConnections() {
|
||||||
rand.Seed(time.Now().Unix())
|
rand.Seed(time.Now().Unix())
|
||||||
|
reloadProxyList(false)
|
||||||
numberOfConnections := c.ClientConfig.ConnectionsCount
|
numberOfConnections := c.ClientConfig.ConnectionsCount
|
||||||
|
|
||||||
for i := 0; i < numberOfConnections; i++ {
|
for i := 0; i < numberOfConnections; i++ {
|
||||||
go startSingleConnection(i)
|
go startSingleConnection(i)
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO: Make one initial global proxy list load
|
go reloadProxyList(true)
|
||||||
// TODO: Make goroutine for periodic proxy reload and health check
|
}
|
||||||
|
|
||||||
|
func reloadProxyList(async bool) {
|
||||||
|
for {
|
||||||
|
proxyList = getAllProxies()
|
||||||
|
log.Printf("[CLIENT] proxy list reloaded with %d servers : \n", len(proxyList))
|
||||||
|
// TODO: Add proxy health check
|
||||||
|
if !async {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
time.Sleep(time.Duration(c.ClientConfig.ProxyListReloadInterval) * time.Minute)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// startSingleConnection starts one single connection waiting for request message from load balancer server
|
// startSingleConnection starts one single connection waiting for request message from load balancer server
|
||||||
@@ -154,11 +168,12 @@ func fetchPage(url string, connectionId int) (string, error) {
|
|||||||
|
|
||||||
// getHttpClient creates client with proxy connection; Proxy is selected randomly from list of proxies
|
// getHttpClient creates client with proxy connection; Proxy is selected randomly from list of proxies
|
||||||
func getHttpClient(connectionId int) (*http.Client, error) {
|
func getHttpClient(connectionId int) (*http.Client, error) {
|
||||||
proxyList := getAllProxies()
|
|
||||||
|
|
||||||
// setup a http client
|
// setup a http client
|
||||||
httpTransport := &http.Transport{}
|
httpTransport := &http.Transport{}
|
||||||
httpClient := &http.Client{Transport: httpTransport}
|
httpClient := &http.Client{
|
||||||
|
Transport: httpTransport,
|
||||||
|
Timeout: time.Duration(c.ClientConfig.FetchTimeout) * time.Second,
|
||||||
|
}
|
||||||
|
|
||||||
if len(proxyList) == 0 {
|
if len(proxyList) == 0 {
|
||||||
log.Printf("(%d) [PROXY] No proxy found, will continue without proxy!\n", connectionId)
|
log.Printf("(%d) [PROXY] No proxy found, will continue without proxy!\n", connectionId)
|
||||||
@@ -166,9 +181,14 @@ func getHttpClient(connectionId int) (*http.Client, error) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// get random proxy from the list
|
// get random proxy from the list
|
||||||
selectedProxy := proxyList[rand.Intn(len(proxyList))]
|
proxyListLength := len(proxyList)
|
||||||
|
var selectedProxy structures.ProxyServer
|
||||||
|
if proxyListLength > 0 {
|
||||||
|
selectedProxy = proxyList[rand.Intn(proxyListLength)]
|
||||||
|
}
|
||||||
|
|
||||||
if selectedProxy.Type == "https" {
|
switch selectedProxy.Type {
|
||||||
|
case "https":
|
||||||
proxyUrl, err := url.Parse("http://" + selectedProxy.Address)
|
proxyUrl, err := url.Parse("http://" + selectedProxy.Address)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Printf("(%d) [PROXY] Cannot parse proxy address (%s) %s : %s\n",
|
log.Printf("(%d) [PROXY] Cannot parse proxy address (%s) %s : %s\n",
|
||||||
@@ -176,10 +196,8 @@ func getHttpClient(connectionId int) (*http.Client, error) {
|
|||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
httpTransport.Proxy = http.ProxyURL(proxyUrl)
|
httpTransport.Proxy = http.ProxyURL(proxyUrl)
|
||||||
//myClient := &http.Client{Transport: &http.Transport{Proxy: http.ProxyURL(proxyUrl)}}
|
break
|
||||||
}
|
case "socks5":
|
||||||
|
|
||||||
if selectedProxy.Type == "socks5" {
|
|
||||||
dialer, err := proxy.SOCKS5("tcp", selectedProxy.Address, nil, proxy.Direct)
|
dialer, err := proxy.SOCKS5("tcp", selectedProxy.Address, nil, proxy.Direct)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Printf("(%d) [PROXY] Cannot create connection to the proxy (%s) %s : %s\n",
|
log.Printf("(%d) [PROXY] Cannot create connection to the proxy (%s) %s : %s\n",
|
||||||
@@ -187,6 +205,10 @@ func getHttpClient(connectionId int) (*http.Client, error) {
|
|||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
httpTransport.Dial = dialer.Dial
|
httpTransport.Dial = dialer.Dial
|
||||||
|
break
|
||||||
|
default:
|
||||||
|
log.Printf("(%d) [PROXY] Failed to select proxy\n", connectionId)
|
||||||
|
return nil, errors.New("failed to select proxy")
|
||||||
}
|
}
|
||||||
|
|
||||||
log.Printf("(%d) [PROXY] Selected proxy (%s) %s\n", connectionId, selectedProxy.Type, selectedProxy.Address)
|
log.Printf("(%d) [PROXY] Selected proxy (%s) %s\n", connectionId, selectedProxy.Type, selectedProxy.Address)
|
||||||
@@ -223,7 +245,11 @@ func getProxiesList(proxyType string) []structures.ProxyServer {
|
|||||||
|
|
||||||
proxyListUrl := c.ClientConfig.ProxyListBaseURL + proxyType
|
proxyListUrl := c.ClientConfig.ProxyListBaseURL + proxyType
|
||||||
|
|
||||||
resp, err := http.Get(proxyListUrl)
|
client := http.Client{
|
||||||
|
Timeout: time.Duration(c.ClientConfig.ProxyListTimeout) * time.Second,
|
||||||
|
}
|
||||||
|
|
||||||
|
resp, err := client.Get(proxyListUrl)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Printf("Cannot get list of proxies [%s] : %s", proxyType, err)
|
log.Printf("Cannot get list of proxies [%s] : %s", proxyType, err)
|
||||||
return []structures.ProxyServer{}
|
return []structures.ProxyServer{}
|
||||||
|
|||||||
Reference in New Issue
Block a user