diff options
Diffstat (limited to 'bridges/GoogleSearchBridge.php')
-rw-r--r-- | bridges/GoogleSearchBridge.php | 175 |
1 files changed, 89 insertions, 86 deletions
diff --git a/bridges/GoogleSearchBridge.php b/bridges/GoogleSearchBridge.php index 5370804e..406cf2a9 100644 --- a/bridges/GoogleSearchBridge.php +++ b/bridges/GoogleSearchBridge.php @@ -1,102 +1,105 @@ <?php -class GoogleSearchBridge extends BridgeAbstract { +class GoogleSearchBridge extends BridgeAbstract +{ + const MAINTAINER = 'sebsauvage'; + const NAME = 'Google search'; + const URI = 'https://www.google.com/'; + const CACHE_TIMEOUT = 1800; // 30min + const DESCRIPTION = 'Returns max 100 results from the past year.'; - const MAINTAINER = 'sebsauvage'; - const NAME = 'Google search'; - const URI = 'https://www.google.com/'; - const CACHE_TIMEOUT = 1800; // 30min - const DESCRIPTION = 'Returns max 100 results from the past year.'; + const PARAMETERS = [[ + 'q' => [ + 'name' => 'keyword', + 'required' => true, + 'exampleValue' => 'rss-bridge', + ], + 'verbatim' => [ + 'name' => 'Verbatim', + 'type' => 'checkbox', + 'title' => 'Use literal keyword(s) without making improvements', + ], + ]]; - const PARAMETERS = array(array( - 'q' => array( - 'name' => 'keyword', - 'required' => true, - 'exampleValue' => 'rss-bridge', - ), - 'verbatim' => array( - 'name' => 'Verbatim', - 'type' => 'checkbox', - 'title' => 'Use literal keyword(s) without making improvements', - ), - )); + public function collectData() + { + $dom = getSimpleHTMLDOM($this->getURI(), ['Accept-language: en-US']); + if (!$dom) { + returnServerError('No results for this query.'); + } + $result = $dom->find('div[id=res]', 0); - public function collectData(){ - $dom = getSimpleHTMLDOM($this->getURI(), ['Accept-language: en-US']); - if (!$dom) { - returnServerError('No results for this query.'); - } - $result = $dom->find('div[id=res]', 0); + if (!$result) { + return; + } - if(!$result) { - return; - } + foreach ($result->find('div[class~=g]') as $element) { + $item = []; - foreach ($result->find('div[class~=g]') as $element) { - $item = []; + $url = $element->find('a[href]', 0)->href; + $item['uri'] = htmlspecialchars_decode($url); + $item['title'] = $element->find('h3', 0)->plaintext; - $url = $element->find('a[href]', 0)->href; - $item['uri'] = htmlspecialchars_decode($url); - $item['title'] = $element->find('h3', 0)->plaintext; + $resultDom = $element->find('div[data-content-feature=1]', 0); + if ($resultDom) { + // Split by — or · + $resultParts = preg_split('/( — | · )/', $resultDom->plaintext); + $resultDate = trim($resultParts[0]); + $resultContent = trim($resultParts[1] ?? ''); + } else { + // Some search results don't have this particular dom identifier + $resultDate = null; + $resultContent = null; + } - $resultDom = $element->find('div[data-content-feature=1]', 0); - if ($resultDom) { - // Split by — or · - $resultParts = preg_split('/( — | · )/', $resultDom->plaintext); - $resultDate = trim($resultParts[0]); - $resultContent = trim($resultParts[1] ?? ''); - } else { - // Some search results don't have this particular dom identifier - $resultDate = null; - $resultContent = null; - } + if ($resultDate) { + try { + $createdAt = new \DateTime($resultDate); + // Set to midnight for consistent datetime + $createdAt->setTime(0, 0); + $item['timestamp'] = $createdAt->format('U'); + } catch (\Exception $e) { + $item['timestamp'] = 0; + } + } else { + $item['timestamp'] = 0; + } - if ($resultDate) { - try { - $createdAt = new \DateTime($resultDate); - // Set to midnight for consistent datetime - $createdAt->setTime(0, 0); - $item['timestamp'] = $createdAt->format('U'); - } catch (\Exception $e) { - $item['timestamp'] = 0; - } - } else { - $item['timestamp'] = 0; - } + if ($resultContent) { + $item['content'] = $resultContent; + } - if ($resultContent) { - $item['content'] = $resultContent; - } + $this->items[] = $item; + } + // Sort by descending date + usort($this->items, function ($a, $b) { + return $b['timestamp'] <=> $a['timestamp']; + }); + } - $this->items[] = $item; - } - // Sort by descending date - usort($this->items, function($a, $b) { - return $b['timestamp'] <=> $a['timestamp']; - }); - } + public function getURI() + { + if ($this->getInput('q')) { + $queryParameters = [ + 'q' => $this->getInput('q'), + 'hl' => 'en', + 'num' => '100', // get 100 results + 'complete' => '0', + // in past year, sort by date, optionally verbatim + 'tbs' => 'qdr:y,sbd:1' . ($this->getInput('verbatim') ? ',li:1' : ''), + ]; + return sprintf('https://www.google.com/search?%s', http_build_query($queryParameters)); + } - public function getURI() { - if ($this->getInput('q')) { - $queryParameters = [ - 'q' => $this->getInput('q'), - 'hl' => 'en', - 'num' => '100', // get 100 results - 'complete' => '0', - // in past year, sort by date, optionally verbatim - 'tbs' => 'qdr:y,sbd:1' . ($this->getInput('verbatim') ? ',li:1' : ''), - ]; - return sprintf('https://www.google.com/search?%s', http_build_query($queryParameters)); - } + return parent::getURI(); + } - return parent::getURI(); - } + public function getName() + { + if (!is_null($this->getInput('q'))) { + return $this->getInput('q') . ' - Google search'; + } - public function getName(){ - if(!is_null($this->getInput('q'))) { - return $this->getInput('q') . ' - Google search'; - } - - return parent::getName(); - } + return parent::getName(); + } } |