aboutsummaryrefslogtreecommitdiff
path: root/bridges/CraigslistBridge.php
diff options
context:
space:
mode:
Diffstat (limited to 'bridges/CraigslistBridge.php')
-rw-r--r--bridges/CraigslistBridge.php184
1 files changed, 94 insertions, 90 deletions
diff --git a/bridges/CraigslistBridge.php b/bridges/CraigslistBridge.php
index 8e677cf4..d56c770e 100644
--- a/bridges/CraigslistBridge.php
+++ b/bridges/CraigslistBridge.php
@@ -1,106 +1,110 @@
<?php
-class CraigslistBridge extends BridgeAbstract {
- const NAME = 'Craigslist Bridge';
- const URI = 'https://craigslist.org/';
- const DESCRIPTION = 'Returns craigslist search results';
- const PARAMETERS = array( array(
- 'region' => array(
- 'name' => 'Region',
- 'title' => 'The subdomain before craigslist.org in the URL',
- 'exampleValue' => 'sfbay',
- 'required' => true
- ),
- 'search' => array(
- 'name' => 'Search Query',
- 'title' => 'Everything in the URL after /search/',
- 'exampleValue' => 'sya?query=laptop',
- 'required' => true
- ),
- 'limit' => array(
- 'name' => 'Number of Posts',
- 'type' => 'number',
- 'title' => 'The maximum number of posts is 120. Use 0 for unlimited posts.',
- 'defaultValue' => '25'
- )
- ));
+class CraigslistBridge extends BridgeAbstract
+{
+ const NAME = 'Craigslist Bridge';
+ const URI = 'https://craigslist.org/';
+ const DESCRIPTION = 'Returns craigslist search results';
- const TEST_DETECT_PARAMETERS = array(
- 'https://sfbay.craigslist.org/search/sya?query=laptop' => array(
- 'region' => 'sfbay', 'search' => 'sya?query=laptop'
- ),
- 'https://newyork.craigslist.org/search/sss?query=32gb+flash+drive&bundleDuplicates=1&max_price=20' => array(
- 'region' => 'newyork', 'search' => 'sss?query=32gb+flash+drive&bundleDuplicates=1&max_price=20'
- ),
- );
+ const PARAMETERS = [ [
+ 'region' => [
+ 'name' => 'Region',
+ 'title' => 'The subdomain before craigslist.org in the URL',
+ 'exampleValue' => 'sfbay',
+ 'required' => true
+ ],
+ 'search' => [
+ 'name' => 'Search Query',
+ 'title' => 'Everything in the URL after /search/',
+ 'exampleValue' => 'sya?query=laptop',
+ 'required' => true
+ ],
+ 'limit' => [
+ 'name' => 'Number of Posts',
+ 'type' => 'number',
+ 'title' => 'The maximum number of posts is 120. Use 0 for unlimited posts.',
+ 'defaultValue' => '25'
+ ]
+ ]];
- const URL_REGEX = '/^https:\/\/(?<region>\w+).craigslist.org\/search\/(?<search>.+)/';
+ const TEST_DETECT_PARAMETERS = [
+ 'https://sfbay.craigslist.org/search/sya?query=laptop' => [
+ 'region' => 'sfbay', 'search' => 'sya?query=laptop'
+ ],
+ 'https://newyork.craigslist.org/search/sss?query=32gb+flash+drive&bundleDuplicates=1&max_price=20' => [
+ 'region' => 'newyork', 'search' => 'sss?query=32gb+flash+drive&bundleDuplicates=1&max_price=20'
+ ],
+ ];
- public function detectParameters($url) {
- if(preg_match(self::URL_REGEX, $url, $matches)) {
- $params = array();
- $params['region'] = $matches['region'];
- $params['search'] = $matches['search'];
- return $params;
- }
- }
+ const URL_REGEX = '/^https:\/\/(?<region>\w+).craigslist.org\/search\/(?<search>.+)/';
- public function getURI() {
- if (!is_null($this->getInput('region'))) {
- $domain = 'https://' . $this->getInput('region') . '.craigslist.org/search/';
- return urljoin($domain, $this->getInput('search'));
- }
- return parent::getURI();
- }
+ public function detectParameters($url)
+ {
+ if (preg_match(self::URL_REGEX, $url, $matches)) {
+ $params = [];
+ $params['region'] = $matches['region'];
+ $params['search'] = $matches['search'];
+ return $params;
+ }
+ }
- public function collectData() {
- $uri = $this->getURI();
- $html = getSimpleHTMLDOM($uri);
+ public function getURI()
+ {
+ if (!is_null($this->getInput('region'))) {
+ $domain = 'https://' . $this->getInput('region') . '.craigslist.org/search/';
+ return urljoin($domain, $this->getInput('search'));
+ }
+ return parent::getURI();
+ }
- // Check if no results page is shown (nearby results)
- if ($html->find('.displaycountShow', 0)->plaintext == '0') {
- return;
- }
+ public function collectData()
+ {
+ $uri = $this->getURI();
+ $html = getSimpleHTMLDOM($uri);
- // Search for "more from nearby areas" banner in order to skip those results
- $results = $html->find('.result-row, h4.nearby');
+ // Check if no results page is shown (nearby results)
+ if ($html->find('.displaycountShow', 0)->plaintext == '0') {
+ return;
+ }
- // Limit the number of posts
- if ($this->getInput('limit') > 0) {
- $results = array_slice($results, 0, $this->getInput('limit'));
- }
+ // Search for "more from nearby areas" banner in order to skip those results
+ $results = $html->find('.result-row, h4.nearby');
- foreach($results as $post) {
+ // Limit the number of posts
+ if ($this->getInput('limit') > 0) {
+ $results = array_slice($results, 0, $this->getInput('limit'));
+ }
- // Skip "nearby results" banner and results
- // This only appears when searchNearby is not specified
- if ($post->tag == 'h4') {
- break;
- }
+ foreach ($results as $post) {
+ // Skip "nearby results" banner and results
+ // This only appears when searchNearby is not specified
+ if ($post->tag == 'h4') {
+ break;
+ }
- $item = array();
+ $item = [];
- $heading = $post->find('.result-heading a', 0);
- $item['uri'] = $heading->href;
- $item['title'] = $heading->plaintext;
- $item['timestamp'] = $post->find('.result-date', 0)->datetime;
- $item['uid'] = $heading->id;
- $item['content'] = $post->find('.result-price', 0)->plaintext . ' '
- // Find the location (local and nearby results if searchNearby=1)
- . $post->find('.result-hood, span.nearby', 0)->plaintext;
+ $heading = $post->find('.result-heading a', 0);
+ $item['uri'] = $heading->href;
+ $item['title'] = $heading->plaintext;
+ $item['timestamp'] = $post->find('.result-date', 0)->datetime;
+ $item['uid'] = $heading->id;
+ $item['content'] = $post->find('.result-price', 0)->plaintext . ' '
+ // Find the location (local and nearby results if searchNearby=1)
+ . $post->find('.result-hood, span.nearby', 0)->plaintext;
- $images = $post->find('.result-image[data-ids]', 0);
- if (!is_null($images)) {
- $item['content'] .= '<br>';
- foreach(explode(',', $images->getAttribute('data-ids')) as $image) {
- // Remove leading 3: from each image id
- $id = substr($image, 2);
- $image_uri = 'https://images.craigslist.org/' . $id . '_300x300.jpg';
- $item['content'] .= '<img src="' . $image_uri . '">';
- $item['enclosures'][] = $image_uri;
- }
- }
- $this->items[] = $item;
- }
- }
+ $images = $post->find('.result-image[data-ids]', 0);
+ if (!is_null($images)) {
+ $item['content'] .= '<br>';
+ foreach (explode(',', $images->getAttribute('data-ids')) as $image) {
+ // Remove leading 3: from each image id
+ $id = substr($image, 2);
+ $image_uri = 'https://images.craigslist.org/' . $id . '_300x300.jpg';
+ $item['content'] .= '<img src="' . $image_uri . '">';
+ $item['enclosures'][] = $image_uri;
+ }
+ }
+ $this->items[] = $item;
+ }
+ }
}