|
|
|
@@ -3,6 +3,7 @@ package workerclient
|
|
|
|
import (
|
|
|
|
import (
|
|
|
|
"bufio"
|
|
|
|
"bufio"
|
|
|
|
b64 "encoding/base64"
|
|
|
|
b64 "encoding/base64"
|
|
|
|
|
|
|
|
"errors"
|
|
|
|
c "gitlab.com/saburly/kiviscraplib/config"
|
|
|
|
c "gitlab.com/saburly/kiviscraplib/config"
|
|
|
|
"gitlab.com/saburly/kiviscraplib/structures"
|
|
|
|
"gitlab.com/saburly/kiviscraplib/structures"
|
|
|
|
"golang.org/x/net/proxy"
|
|
|
|
"golang.org/x/net/proxy"
|
|
|
|
@@ -16,17 +17,30 @@ import (
|
|
|
|
"time"
|
|
|
|
"time"
|
|
|
|
)
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
var proxyList []structures.ProxyServer
|
|
|
|
|
|
|
|
|
|
|
|
func StartClientConnections() {
|
|
|
|
func StartClientConnections() {
|
|
|
|
rand.Seed(time.Now().Unix())
|
|
|
|
rand.Seed(time.Now().Unix())
|
|
|
|
|
|
|
|
reloadProxyList(false)
|
|
|
|
numberOfConnections := c.ClientConfig.ConnectionsCount
|
|
|
|
numberOfConnections := c.ClientConfig.ConnectionsCount
|
|
|
|
|
|
|
|
|
|
|
|
for i := 0; i < numberOfConnections; i++ {
|
|
|
|
for i := 0; i < numberOfConnections; i++ {
|
|
|
|
go startSingleConnection(i)
|
|
|
|
go startSingleConnection(i)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// TODO: Make one initial global proxy list load
|
|
|
|
go reloadProxyList(true)
|
|
|
|
// TODO: Make goroutine for periodic proxy reload and health check
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
func reloadProxyList(async bool) {
|
|
|
|
|
|
|
|
for {
|
|
|
|
|
|
|
|
proxyList = getAllProxies()
|
|
|
|
|
|
|
|
log.Printf("[CLIENT] proxy list reloaded with %d servers : \n", len(proxyList))
|
|
|
|
|
|
|
|
// TODO: Add proxy health check
|
|
|
|
|
|
|
|
if !async {
|
|
|
|
|
|
|
|
return
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
time.Sleep(time.Duration(c.ClientConfig.ProxyListReloadInterval) * time.Minute)
|
|
|
|
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// startSingleConnection starts one single connection waiting for request message from load balancer server
|
|
|
|
// startSingleConnection starts one single connection waiting for request message from load balancer server
|
|
|
|
@@ -154,11 +168,12 @@ func fetchPage(url string, connectionId int) (string, error) {
|
|
|
|
|
|
|
|
|
|
|
|
// getHttpClient creates client with proxy connection; Proxy is selected randomly from list of proxies
|
|
|
|
// getHttpClient creates client with proxy connection; Proxy is selected randomly from list of proxies
|
|
|
|
func getHttpClient(connectionId int) (*http.Client, error) {
|
|
|
|
func getHttpClient(connectionId int) (*http.Client, error) {
|
|
|
|
proxyList := getAllProxies()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
// setup a http client
|
|
|
|
// setup a http client
|
|
|
|
httpTransport := &http.Transport{}
|
|
|
|
httpTransport := &http.Transport{}
|
|
|
|
httpClient := &http.Client{Transport: httpTransport}
|
|
|
|
httpClient := &http.Client{
|
|
|
|
|
|
|
|
Transport: httpTransport,
|
|
|
|
|
|
|
|
Timeout: time.Duration(c.ClientConfig.FetchTimeout) * time.Second,
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
if len(proxyList) == 0 {
|
|
|
|
if len(proxyList) == 0 {
|
|
|
|
log.Printf("(%d) [PROXY] No proxy found, will continue without proxy!\n", connectionId)
|
|
|
|
log.Printf("(%d) [PROXY] No proxy found, will continue without proxy!\n", connectionId)
|
|
|
|
@@ -166,9 +181,14 @@ func getHttpClient(connectionId int) (*http.Client, error) {
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// get random proxy from the list
|
|
|
|
// get random proxy from the list
|
|
|
|
selectedProxy := proxyList[rand.Intn(len(proxyList))]
|
|
|
|
proxyListLength := len(proxyList)
|
|
|
|
|
|
|
|
var selectedProxy structures.ProxyServer
|
|
|
|
|
|
|
|
if proxyListLength > 0 {
|
|
|
|
|
|
|
|
selectedProxy = proxyList[rand.Intn(proxyListLength)]
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
if selectedProxy.Type == "https" {
|
|
|
|
switch selectedProxy.Type {
|
|
|
|
|
|
|
|
case "https":
|
|
|
|
proxyUrl, err := url.Parse("http://" + selectedProxy.Address)
|
|
|
|
proxyUrl, err := url.Parse("http://" + selectedProxy.Address)
|
|
|
|
if err != nil {
|
|
|
|
if err != nil {
|
|
|
|
log.Printf("(%d) [PROXY] Cannot parse proxy address (%s) %s : %s\n",
|
|
|
|
log.Printf("(%d) [PROXY] Cannot parse proxy address (%s) %s : %s\n",
|
|
|
|
@@ -176,10 +196,8 @@ func getHttpClient(connectionId int) (*http.Client, error) {
|
|
|
|
return nil, err
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
}
|
|
|
|
httpTransport.Proxy = http.ProxyURL(proxyUrl)
|
|
|
|
httpTransport.Proxy = http.ProxyURL(proxyUrl)
|
|
|
|
//myClient := &http.Client{Transport: &http.Transport{Proxy: http.ProxyURL(proxyUrl)}}
|
|
|
|
break
|
|
|
|
}
|
|
|
|
case "socks5":
|
|
|
|
|
|
|
|
|
|
|
|
if selectedProxy.Type == "socks5" {
|
|
|
|
|
|
|
|
dialer, err := proxy.SOCKS5("tcp", selectedProxy.Address, nil, proxy.Direct)
|
|
|
|
dialer, err := proxy.SOCKS5("tcp", selectedProxy.Address, nil, proxy.Direct)
|
|
|
|
if err != nil {
|
|
|
|
if err != nil {
|
|
|
|
log.Printf("(%d) [PROXY] Cannot create connection to the proxy (%s) %s : %s\n",
|
|
|
|
log.Printf("(%d) [PROXY] Cannot create connection to the proxy (%s) %s : %s\n",
|
|
|
|
@@ -187,6 +205,10 @@ func getHttpClient(connectionId int) (*http.Client, error) {
|
|
|
|
return nil, err
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
}
|
|
|
|
httpTransport.Dial = dialer.Dial
|
|
|
|
httpTransport.Dial = dialer.Dial
|
|
|
|
|
|
|
|
break
|
|
|
|
|
|
|
|
default:
|
|
|
|
|
|
|
|
log.Printf("(%d) [PROXY] Failed to select proxy\n", connectionId)
|
|
|
|
|
|
|
|
return nil, errors.New("failed to select proxy")
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
log.Printf("(%d) [PROXY] Selected proxy (%s) %s\n", connectionId, selectedProxy.Type, selectedProxy.Address)
|
|
|
|
log.Printf("(%d) [PROXY] Selected proxy (%s) %s\n", connectionId, selectedProxy.Type, selectedProxy.Address)
|
|
|
|
@@ -223,7 +245,11 @@ func getProxiesList(proxyType string) []structures.ProxyServer {
|
|
|
|
|
|
|
|
|
|
|
|
proxyListUrl := c.ClientConfig.ProxyListBaseURL + proxyType
|
|
|
|
proxyListUrl := c.ClientConfig.ProxyListBaseURL + proxyType
|
|
|
|
|
|
|
|
|
|
|
|
resp, err := http.Get(proxyListUrl)
|
|
|
|
client := http.Client{
|
|
|
|
|
|
|
|
Timeout: time.Duration(c.ClientConfig.ProxyListTimeout) * time.Second,
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
resp, err := client.Get(proxyListUrl)
|
|
|
|
if err != nil {
|
|
|
|
if err != nil {
|
|
|
|
log.Printf("Cannot get list of proxies [%s] : %s", proxyType, err)
|
|
|
|
log.Printf("Cannot get list of proxies [%s] : %s", proxyType, err)
|
|
|
|
return []structures.ProxyServer{}
|
|
|
|
return []structures.ProxyServer{}
|
|
|
|
|