diff options
Diffstat (limited to 'bridges/CraigslistBridge.php')
-rw-r--r-- | bridges/CraigslistBridge.php | 184 |
1 files changed, 94 insertions, 90 deletions
diff --git a/bridges/CraigslistBridge.php b/bridges/CraigslistBridge.php index 8e677cf4..d56c770e 100644 --- a/bridges/CraigslistBridge.php +++ b/bridges/CraigslistBridge.php @@ -1,106 +1,110 @@ <?php -class CraigslistBridge extends BridgeAbstract { - const NAME = 'Craigslist Bridge'; - const URI = 'https://craigslist.org/'; - const DESCRIPTION = 'Returns craigslist search results'; - const PARAMETERS = array( array( - 'region' => array( - 'name' => 'Region', - 'title' => 'The subdomain before craigslist.org in the URL', - 'exampleValue' => 'sfbay', - 'required' => true - ), - 'search' => array( - 'name' => 'Search Query', - 'title' => 'Everything in the URL after /search/', - 'exampleValue' => 'sya?query=laptop', - 'required' => true - ), - 'limit' => array( - 'name' => 'Number of Posts', - 'type' => 'number', - 'title' => 'The maximum number of posts is 120. Use 0 for unlimited posts.', - 'defaultValue' => '25' - ) - )); +class CraigslistBridge extends BridgeAbstract +{ + const NAME = 'Craigslist Bridge'; + const URI = 'https://craigslist.org/'; + const DESCRIPTION = 'Returns craigslist search results'; - const TEST_DETECT_PARAMETERS = array( - 'https://sfbay.craigslist.org/search/sya?query=laptop' => array( - 'region' => 'sfbay', 'search' => 'sya?query=laptop' - ), - 'https://newyork.craigslist.org/search/sss?query=32gb+flash+drive&bundleDuplicates=1&max_price=20' => array( - 'region' => 'newyork', 'search' => 'sss?query=32gb+flash+drive&bundleDuplicates=1&max_price=20' - ), - ); + const PARAMETERS = [ [ + 'region' => [ + 'name' => 'Region', + 'title' => 'The subdomain before craigslist.org in the URL', + 'exampleValue' => 'sfbay', + 'required' => true + ], + 'search' => [ + 'name' => 'Search Query', + 'title' => 'Everything in the URL after /search/', + 'exampleValue' => 'sya?query=laptop', + 'required' => true + ], + 'limit' => [ + 'name' => 'Number of Posts', + 'type' => 'number', + 'title' => 'The maximum number of posts is 120. Use 0 for unlimited posts.', + 'defaultValue' => '25' + ] + ]]; - const URL_REGEX = '/^https:\/\/(?<region>\w+).craigslist.org\/search\/(?<search>.+)/'; + const TEST_DETECT_PARAMETERS = [ + 'https://sfbay.craigslist.org/search/sya?query=laptop' => [ + 'region' => 'sfbay', 'search' => 'sya?query=laptop' + ], + 'https://newyork.craigslist.org/search/sss?query=32gb+flash+drive&bundleDuplicates=1&max_price=20' => [ + 'region' => 'newyork', 'search' => 'sss?query=32gb+flash+drive&bundleDuplicates=1&max_price=20' + ], + ]; - public function detectParameters($url) { - if(preg_match(self::URL_REGEX, $url, $matches)) { - $params = array(); - $params['region'] = $matches['region']; - $params['search'] = $matches['search']; - return $params; - } - } + const URL_REGEX = '/^https:\/\/(?<region>\w+).craigslist.org\/search\/(?<search>.+)/'; - public function getURI() { - if (!is_null($this->getInput('region'))) { - $domain = 'https://' . $this->getInput('region') . '.craigslist.org/search/'; - return urljoin($domain, $this->getInput('search')); - } - return parent::getURI(); - } + public function detectParameters($url) + { + if (preg_match(self::URL_REGEX, $url, $matches)) { + $params = []; + $params['region'] = $matches['region']; + $params['search'] = $matches['search']; + return $params; + } + } - public function collectData() { - $uri = $this->getURI(); - $html = getSimpleHTMLDOM($uri); + public function getURI() + { + if (!is_null($this->getInput('region'))) { + $domain = 'https://' . $this->getInput('region') . '.craigslist.org/search/'; + return urljoin($domain, $this->getInput('search')); + } + return parent::getURI(); + } - // Check if no results page is shown (nearby results) - if ($html->find('.displaycountShow', 0)->plaintext == '0') { - return; - } + public function collectData() + { + $uri = $this->getURI(); + $html = getSimpleHTMLDOM($uri); - // Search for "more from nearby areas" banner in order to skip those results - $results = $html->find('.result-row, h4.nearby'); + // Check if no results page is shown (nearby results) + if ($html->find('.displaycountShow', 0)->plaintext == '0') { + return; + } - // Limit the number of posts - if ($this->getInput('limit') > 0) { - $results = array_slice($results, 0, $this->getInput('limit')); - } + // Search for "more from nearby areas" banner in order to skip those results + $results = $html->find('.result-row, h4.nearby'); - foreach($results as $post) { + // Limit the number of posts + if ($this->getInput('limit') > 0) { + $results = array_slice($results, 0, $this->getInput('limit')); + } - // Skip "nearby results" banner and results - // This only appears when searchNearby is not specified - if ($post->tag == 'h4') { - break; - } + foreach ($results as $post) { + // Skip "nearby results" banner and results + // This only appears when searchNearby is not specified + if ($post->tag == 'h4') { + break; + } - $item = array(); + $item = []; - $heading = $post->find('.result-heading a', 0); - $item['uri'] = $heading->href; - $item['title'] = $heading->plaintext; - $item['timestamp'] = $post->find('.result-date', 0)->datetime; - $item['uid'] = $heading->id; - $item['content'] = $post->find('.result-price', 0)->plaintext . ' ' - // Find the location (local and nearby results if searchNearby=1) - . $post->find('.result-hood, span.nearby', 0)->plaintext; + $heading = $post->find('.result-heading a', 0); + $item['uri'] = $heading->href; + $item['title'] = $heading->plaintext; + $item['timestamp'] = $post->find('.result-date', 0)->datetime; + $item['uid'] = $heading->id; + $item['content'] = $post->find('.result-price', 0)->plaintext . ' ' + // Find the location (local and nearby results if searchNearby=1) + . $post->find('.result-hood, span.nearby', 0)->plaintext; - $images = $post->find('.result-image[data-ids]', 0); - if (!is_null($images)) { - $item['content'] .= '<br>'; - foreach(explode(',', $images->getAttribute('data-ids')) as $image) { - // Remove leading 3: from each image id - $id = substr($image, 2); - $image_uri = 'https://images.craigslist.org/' . $id . '_300x300.jpg'; - $item['content'] .= '<img src="' . $image_uri . '">'; - $item['enclosures'][] = $image_uri; - } - } - $this->items[] = $item; - } - } + $images = $post->find('.result-image[data-ids]', 0); + if (!is_null($images)) { + $item['content'] .= '<br>'; + foreach (explode(',', $images->getAttribute('data-ids')) as $image) { + // Remove leading 3: from each image id + $id = substr($image, 2); + $image_uri = 'https://images.craigslist.org/' . $id . '_300x300.jpg'; + $item['content'] .= '<img src="' . $image_uri . '">'; + $item['enclosures'][] = $image_uri; + } + } + $this->items[] = $item; + } + } } |