blob: db2fb7b17c96e1fa44f4dd452a5284d5f64dc8ca (
plain) (
blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
|
<?php
use Facebook\WebDriver\Chrome\ChromeOptions;
use Facebook\WebDriver\Remote\DesiredCapabilities;
use Facebook\WebDriver\Remote\RemoteWebDriver;
use Facebook\WebDriver\WebDriverCapabilities;
/**
* An alternative abstract class for bridges depending on webdriver
*
* This class is meant a solution for active websites that use
* XMLHttpRequest (XHR) to load content and/or use JavaScript to
* change content. This class depends on a working webdriver setup.
*/
abstract class WebDriverAbstract extends BridgeAbstract
{
/**
* Holds the remote webdriver object, including configuration and
* connection.
*
* @var RemoteWebDriver
*/
protected RemoteWebDriver $driver;
/**
* Holds the uri of the feed's icon.
*
* @var string | null
*/
private $feedIcon;
/**
* Returns the webdriver object.
*
* @return RemoteWebDriver
*/
protected function getDriver(): RemoteWebDriver
{
return $this->driver;
}
/**
* Returns the uri of the feed's icon.
*
* @return string
*/
public function getIcon()
{
return $this->feedIcon ?: parent::getIcon();
}
/**
* Sets the uri of the feed's icon.
*
* @param $iconurl string
*/
protected function setIcon($iconurl)
{
$this->feedIcon = $iconurl;
}
/**
* Returns the ChromeOptions object.
*
* If the configuration parameter 'headless' is set to true, the
* argument '--headless' is added. Override this to change or add
* more options.
*
* @return ChromeOptions
*/
protected function getBrowserOptions()
{
$chromeOptions = new ChromeOptions();
if (Configuration::getConfig('webdriver', 'headless')) {
$chromeOptions->addArguments(['--headless']); // --window-size=1024,1024
}
return $chromeOptions;
}
/**
* Returns the DesiredCapabilities object for the Chrome browser.
*
* The Chrome options are added. Override this to change or add
* more capabilities.
*
* @return WebDriverCapabilities
*/
protected function getDesiredCapabilities(): WebDriverCapabilities
{
$desiredCapabilities = DesiredCapabilities::chrome();
$desiredCapabilities->setCapability(ChromeOptions::CAPABILITY, $this->getBrowserOptions());
return $desiredCapabilities;
}
/**
* Constructs the remote webdriver with the url of the remote (Selenium)
* webdriver server and the desired capabilities.
*
* This should be called in collectData() first.
*/
protected function prepareWebDriver()
{
$server = Configuration::getConfig('webdriver', 'selenium_server_url');
$this->driver = RemoteWebDriver::create($server, $this->getDesiredCapabilities());
}
/**
* Maximizes the remote browser window (often important for reactive sites
* which change their appearance depending on the window size) and opens
* the uri set in the constant URI.
*/
protected function prepareWindow()
{
$this->getDriver()->manage()->window()->maximize();
$this->getDriver()->get($this->getURI());
}
/**
* Closes the remote browser window and shuts down the remote webdriver
* connection.
*
* This must be called at the end of scraping, for example within a
* 'finally' block.
*/
protected function cleanUp()
{
$this->getDriver()->quit();
}
/**
* Do your web scraping here and fill the $items array.
*
* Override this but call parent() first.
* Don't forget to call cleanUp() at the end.
*/
public function collectData()
{
$this->prepareWebDriver();
$this->prepareWindow();
}
}
|