diff options
Diffstat (limited to 'bridges/IPBBridge.php')
-rw-r--r-- | bridges/IPBBridge.php | 612 |
1 files changed, 314 insertions, 298 deletions
diff --git a/bridges/IPBBridge.php b/bridges/IPBBridge.php index af2ed390..d5db0111 100644 --- a/bridges/IPBBridge.php +++ b/bridges/IPBBridge.php @@ -1,309 +1,325 @@ <?php -class IPBBridge extends FeedExpander { - - const NAME = 'IPB Bridge'; - const URI = 'https://www.invisionpower.com'; - const DESCRIPTION = 'Returns feeds for forums powered by IPB'; - const MAINTAINER = 'logmanoriginal'; - const PARAMETERS = array( - array( - 'uri' => array( - 'name' => 'URI', - 'type' => 'text', - 'required' => true, - 'title' => 'Insert forum, subforum or topic URI', - 'exampleValue' => 'https://invisioncommunity.com/forums/forum/499-feedback-and-ideas/' - ), - 'limit' => array( - 'name' => 'Limit', - 'type' => 'number', - 'required' => false, - 'title' => 'Specifies the number of items to return on each request (-1: all)', - 'defaultValue' => 10 - ) - ) - ); - const CACHE_TIMEOUT = 3600; - - // Constants for internal use - const FORUM_TYPE_LIST_FILTER = '.cForumTopicTable'; - const FORUM_TYPE_TABLE_FILTER = '#forum_table'; - - const TOPIC_TYPE_ARTICLE = 'article'; - const TOPIC_TYPE_DIV = 'div.post_block'; - - public function getURI(){ - return $this->getInput('uri') ?: parent::getURI(); - } - - public function collectData(){ - // The URI cannot be the mainpage (or anything related) - switch(parse_url($this->getInput('uri'), PHP_URL_PATH)) { - case null: - case '/index.php': - returnClientError('Provided URI is invalid!'); - break; - default: - break; - } - - // Sanitize the URI (because else it won't work) - $uri = rtrim($this->getInput('uri'), '/'); // No trailing slashes! - - // Forums might provide feeds, though that's optional *facepalm* - // Let's check if there is a valid feed available - $headers = get_headers($uri . '.xml'); - - if($headers[0] === 'HTTP/1.1 200 OK') { // Heureka! It's a valid feed! - return $this->collectExpandableDatas($uri . '.xml'); - } - - // No valid feed, so do it the hard way - $html = getSimpleHTMLDOM($uri); - - $limit = $this->getInput('limit'); - - // Determine if this is a topic or a forum - switch(true) { - case $this->isTopic($html): - $this->collectTopic($html, $limit); - break; - case $this->isForum($html): - $this->collectForum($html); - break; - default: - returnClientError('Unknown type!'); - break; - } - } - - private function isForum($html){ - return !is_null($html->find('div[data-controller*=forums.front.forum.forumPage]', 0)) - || !is_null($html->find(static::FORUM_TYPE_TABLE_FILTER, 0)); - } - - private function isTopic($html){ - return !is_null($html->find('div[data-controller*=core.front.core.commentFeed]', 0)) - || !is_null($html->find(static::TOPIC_TYPE_DIV, 0)); - } - - private function collectForum($html){ - // There are multiple forum designs in use (depends on version?) - // 1 - Uses an ordered list (based on https://invisioncommunity.com/forums) - // 2 - Uses a table (based on https://onehallyu.com) - - switch(true) { - case !is_null($html->find(static::FORUM_TYPE_LIST_FILTER, 0)): - $this->collectForumList($html); - break; - case !is_null($html->find(static::FORUM_TYPE_TABLE_FILTER, 0)): - $this->collectForumTable($html); - break; - default: - returnClientError('Unknown forum format!'); - break; - } - } - - private function collectForumList($html){ - foreach($html->find(static::FORUM_TYPE_LIST_FILTER, 0)->children() as $row) { - // Columns: Title, Statistics, Last modified - $item = array(); - - $item['uri'] = $row->find('a', 0)->href; - $item['title'] = $row->find('a', 0)->title; - $item['author'] = $row->find('a', 1)->innertext; - $item['timestamp'] = strtotime($row->find('time', 0)->getAttribute('datetime')); - - $this->items[] = $item; - } - } - - private function collectForumTable($html){ - foreach($html->find(static::FORUM_TYPE_TABLE_FILTER, 0)->children() as $row) { - // Columns: Icon, Content, Preview, Statistics, Last modified - $item = array(); - - // Skip header row - if(!is_null($row->find('th', 0))) continue; - - $item['uri'] = $row->find('a', 0)->href; - $item['title'] = $row->find('.title', 0)->plaintext; - $item['timestamp'] = strtotime($row->find('[itemprop=dateCreated]', 0)->plaintext); - - $this->items[] = $item; - } - } - - private function collectTopic($html, $limit){ - // There are multiple topic designs in use (depends on version?) - // 1 - Uses articles (based on https://invisioncommunity.com/forums) - // 2 - Uses divs (based on https://onehallyu.com) - - switch(true) { - case !is_null($html->find(static::TOPIC_TYPE_ARTICLE, 0)): - $this->collectTopicHistory($html, $limit, 'collectTopicArticle'); - break; - case !is_null($html->find(static::TOPIC_TYPE_DIV, 0)): - $this->collectTopicHistory($html, $limit, 'collectTopicDiv'); - break; - default: - returnClientError('Unknown topic format!'); - break; - } - } - - private function collectTopicHistory($html, $limit, $callback){ - // Make sure the callback is valid! - if(!method_exists($this, $callback)) - returnServerError('Unknown function (\'' . $callback . '\')!'); - - $next = null; // Holds the URI of the next page - - while(true) { - $next = $this->$callback($html, is_null($next)); - - if(is_null($next) || ($limit > 0 && count($this->items) >= $limit)) { - break; - } - - $html = getSimpleHTMLDOMCached($next); - } - - // We might have more items than specified, remove excess - $this->items = array_slice($this->items, 0, $limit); - } - - private function collectTopicArticle($html, $firstrun = true){ - $title = $html->find('h1.ipsType_pageTitle', 0)->plaintext; - - // Are we on last page? - if($firstrun && !is_null($html->find('.ipsPagination', 0))) { - $last = $html->find('.ipsPagination_last a', 0)->{'data-page'}; - $active = $html->find('.ipsPagination_active a', 0)->{'data-page'}; - - if($active !== $last) { - // Load last page into memory (cached) - $html = getSimpleHTMLDOMCached($html->find('.ipsPagination_last a', 0)->href); - } - } - - foreach(array_reverse($html->find(static::TOPIC_TYPE_ARTICLE)) as $article) { - $item = array(); - - $item['uri'] = $article->find('time', 0)->parent()->href; - $item['author'] = $article->find('aside a', 0)->plaintext; - $item['title'] = $item['author'] . ' - ' . $title; - $item['timestamp'] = strtotime($article->find('time', 0)->getAttribute('datetime')); - - $content = $article->find('[data-role=commentContent]', 0); - $content = $this->scaleImages($content); - $item['content'] = $this->fixContent($content); - $item['enclosures'] = $this->findImages($article->find('[data-role=commentContent]', 0)) ?: null; - - $this->items[] = $item; - } - - // Return whatever page comes next (previous, as we add in inverse order) - // Do we have a previous page? (inactive means no) - if(!is_null($html->find('li[class=ipsPagination_prev ipsPagination_inactive]', 0))) { - return null; // No, or no more - } elseif(!is_null($html->find('li[class=ipsPagination_prev]', 0))) { - return $html->find('.ipsPagination_prev a', 0)->href; - } - - return null; - } - - private function collectTopicDiv($html, $firstrun = true){ - $title = $html->find('h1.ipsType_pagetitle', 0)->plaintext; - - // Are we on last page? - if($firstrun && !is_null($html->find('.pagination', 0))) { - - $active = $html->find('li[class=page active]', 0)->plaintext; - - // There are two ways the 'last' page is displayed: - // - With a distict 'last' button (only if there are enough pages) - // - With a button for each page (use last button) - if(!is_null($html->find('li.last', 0))) { - $last = $html->find('li.last a', 0); - } else { - $last = $html->find('li[class=page] a', -1); - } - - if($active !== $last->plaintext) { - // Load last page into memory (cached) - $html = getSimpleHTMLDOMCached($last->href); - } - } - - foreach(array_reverse($html->find(static::TOPIC_TYPE_DIV)) as $article) { - $item = array(); - - $item['uri'] = $article->find('a[rel=bookmark]', 0)->href; - $item['author'] = $article->find('.author', 0)->plaintext; - $item['title'] = $item['author'] . ' - ' . $title; - $item['timestamp'] = strtotime($article->find('.published', 0)->getAttribute('title')); - - $content = $article->find('[itemprop=commentText]', 0); - $content = $this->scaleImages($content); - $item['content'] = $this->fixContent($content); - - $item['enclosures'] = $this->findImages($article->find('.post_body', 0)) ?: null; - $this->items[] = $item; - } - - // Return whatever page comes next (previous, as we add in inverse order) - // Do we have a previous page? - if(!is_null($html->find('li.prev', 0))) { - return $html->find('li.prev a', 0)->href; - } - - return null; - } - - /** Returns all images from the provide HTML DOM */ - private function findImages($html){ - $images = array(); - - foreach($html->find('img') as $img) { - $images[] = $img->src; - } - - return $images; - } - - /** Sets the maximum width and height for all images */ - private function scaleImages($html, $width = 400, $height = 400){ - foreach($html->find('img') as $img) { - $img->style = "max-width: {$width}px; max-height: {$height}px;"; - } - - return $html; - } - - /** Removes all unnecessary tags and adds formatting */ - private function fixContent($html){ - - // Restore quote highlighting - foreach($html->find('blockquote') as $quote) { - $quote->style = <<<EOD +class IPBBridge extends FeedExpander +{ + const NAME = 'IPB Bridge'; + const URI = 'https://www.invisionpower.com'; + const DESCRIPTION = 'Returns feeds for forums powered by IPB'; + const MAINTAINER = 'logmanoriginal'; + const PARAMETERS = [ + [ + 'uri' => [ + 'name' => 'URI', + 'type' => 'text', + 'required' => true, + 'title' => 'Insert forum, subforum or topic URI', + 'exampleValue' => 'https://invisioncommunity.com/forums/forum/499-feedback-and-ideas/' + ], + 'limit' => [ + 'name' => 'Limit', + 'type' => 'number', + 'required' => false, + 'title' => 'Specifies the number of items to return on each request (-1: all)', + 'defaultValue' => 10 + ] + ] + ]; + const CACHE_TIMEOUT = 3600; + + // Constants for internal use + const FORUM_TYPE_LIST_FILTER = '.cForumTopicTable'; + const FORUM_TYPE_TABLE_FILTER = '#forum_table'; + + const TOPIC_TYPE_ARTICLE = 'article'; + const TOPIC_TYPE_DIV = 'div.post_block'; + + public function getURI() + { + return $this->getInput('uri') ?: parent::getURI(); + } + + public function collectData() + { + // The URI cannot be the mainpage (or anything related) + switch (parse_url($this->getInput('uri'), PHP_URL_PATH)) { + case null: + case '/index.php': + returnClientError('Provided URI is invalid!'); + break; + default: + break; + } + + // Sanitize the URI (because else it won't work) + $uri = rtrim($this->getInput('uri'), '/'); // No trailing slashes! + + // Forums might provide feeds, though that's optional *facepalm* + // Let's check if there is a valid feed available + $headers = get_headers($uri . '.xml'); + + if ($headers[0] === 'HTTP/1.1 200 OK') { // Heureka! It's a valid feed! + return $this->collectExpandableDatas($uri . '.xml'); + } + + // No valid feed, so do it the hard way + $html = getSimpleHTMLDOM($uri); + + $limit = $this->getInput('limit'); + + // Determine if this is a topic or a forum + switch (true) { + case $this->isTopic($html): + $this->collectTopic($html, $limit); + break; + case $this->isForum($html): + $this->collectForum($html); + break; + default: + returnClientError('Unknown type!'); + break; + } + } + + private function isForum($html) + { + return !is_null($html->find('div[data-controller*=forums.front.forum.forumPage]', 0)) + || !is_null($html->find(static::FORUM_TYPE_TABLE_FILTER, 0)); + } + + private function isTopic($html) + { + return !is_null($html->find('div[data-controller*=core.front.core.commentFeed]', 0)) + || !is_null($html->find(static::TOPIC_TYPE_DIV, 0)); + } + + private function collectForum($html) + { + // There are multiple forum designs in use (depends on version?) + // 1 - Uses an ordered list (based on https://invisioncommunity.com/forums) + // 2 - Uses a table (based on https://onehallyu.com) + + switch (true) { + case !is_null($html->find(static::FORUM_TYPE_LIST_FILTER, 0)): + $this->collectForumList($html); + break; + case !is_null($html->find(static::FORUM_TYPE_TABLE_FILTER, 0)): + $this->collectForumTable($html); + break; + default: + returnClientError('Unknown forum format!'); + break; + } + } + + private function collectForumList($html) + { + foreach ($html->find(static::FORUM_TYPE_LIST_FILTER, 0)->children() as $row) { + // Columns: Title, Statistics, Last modified + $item = []; + + $item['uri'] = $row->find('a', 0)->href; + $item['title'] = $row->find('a', 0)->title; + $item['author'] = $row->find('a', 1)->innertext; + $item['timestamp'] = strtotime($row->find('time', 0)->getAttribute('datetime')); + + $this->items[] = $item; + } + } + + private function collectForumTable($html) + { + foreach ($html->find(static::FORUM_TYPE_TABLE_FILTER, 0)->children() as $row) { + // Columns: Icon, Content, Preview, Statistics, Last modified + $item = []; + + // Skip header row + if (!is_null($row->find('th', 0))) { + continue; + } + + $item['uri'] = $row->find('a', 0)->href; + $item['title'] = $row->find('.title', 0)->plaintext; + $item['timestamp'] = strtotime($row->find('[itemprop=dateCreated]', 0)->plaintext); + + $this->items[] = $item; + } + } + + private function collectTopic($html, $limit) + { + // There are multiple topic designs in use (depends on version?) + // 1 - Uses articles (based on https://invisioncommunity.com/forums) + // 2 - Uses divs (based on https://onehallyu.com) + + switch (true) { + case !is_null($html->find(static::TOPIC_TYPE_ARTICLE, 0)): + $this->collectTopicHistory($html, $limit, 'collectTopicArticle'); + break; + case !is_null($html->find(static::TOPIC_TYPE_DIV, 0)): + $this->collectTopicHistory($html, $limit, 'collectTopicDiv'); + break; + default: + returnClientError('Unknown topic format!'); + break; + } + } + + private function collectTopicHistory($html, $limit, $callback) + { + // Make sure the callback is valid! + if (!method_exists($this, $callback)) { + returnServerError('Unknown function (\'' . $callback . '\')!'); + } + + $next = null; // Holds the URI of the next page + + while (true) { + $next = $this->$callback($html, is_null($next)); + + if (is_null($next) || ($limit > 0 && count($this->items) >= $limit)) { + break; + } + + $html = getSimpleHTMLDOMCached($next); + } + + // We might have more items than specified, remove excess + $this->items = array_slice($this->items, 0, $limit); + } + + private function collectTopicArticle($html, $firstrun = true) + { + $title = $html->find('h1.ipsType_pageTitle', 0)->plaintext; + + // Are we on last page? + if ($firstrun && !is_null($html->find('.ipsPagination', 0))) { + $last = $html->find('.ipsPagination_last a', 0)->{'data-page'}; + $active = $html->find('.ipsPagination_active a', 0)->{'data-page'}; + + if ($active !== $last) { + // Load last page into memory (cached) + $html = getSimpleHTMLDOMCached($html->find('.ipsPagination_last a', 0)->href); + } + } + + foreach (array_reverse($html->find(static::TOPIC_TYPE_ARTICLE)) as $article) { + $item = []; + + $item['uri'] = $article->find('time', 0)->parent()->href; + $item['author'] = $article->find('aside a', 0)->plaintext; + $item['title'] = $item['author'] . ' - ' . $title; + $item['timestamp'] = strtotime($article->find('time', 0)->getAttribute('datetime')); + + $content = $article->find('[data-role=commentContent]', 0); + $content = $this->scaleImages($content); + $item['content'] = $this->fixContent($content); + $item['enclosures'] = $this->findImages($article->find('[data-role=commentContent]', 0)) ?: null; + + $this->items[] = $item; + } + + // Return whatever page comes next (previous, as we add in inverse order) + // Do we have a previous page? (inactive means no) + if (!is_null($html->find('li[class=ipsPagination_prev ipsPagination_inactive]', 0))) { + return null; // No, or no more + } elseif (!is_null($html->find('li[class=ipsPagination_prev]', 0))) { + return $html->find('.ipsPagination_prev a', 0)->href; + } + + return null; + } + + private function collectTopicDiv($html, $firstrun = true) + { + $title = $html->find('h1.ipsType_pagetitle', 0)->plaintext; + + // Are we on last page? + if ($firstrun && !is_null($html->find('.pagination', 0))) { + $active = $html->find('li[class=page active]', 0)->plaintext; + + // There are two ways the 'last' page is displayed: + // - With a distict 'last' button (only if there are enough pages) + // - With a button for each page (use last button) + if (!is_null($html->find('li.last', 0))) { + $last = $html->find('li.last a', 0); + } else { + $last = $html->find('li[class=page] a', -1); + } + + if ($active !== $last->plaintext) { + // Load last page into memory (cached) + $html = getSimpleHTMLDOMCached($last->href); + } + } + + foreach (array_reverse($html->find(static::TOPIC_TYPE_DIV)) as $article) { + $item = []; + + $item['uri'] = $article->find('a[rel=bookmark]', 0)->href; + $item['author'] = $article->find('.author', 0)->plaintext; + $item['title'] = $item['author'] . ' - ' . $title; + $item['timestamp'] = strtotime($article->find('.published', 0)->getAttribute('title')); + + $content = $article->find('[itemprop=commentText]', 0); + $content = $this->scaleImages($content); + $item['content'] = $this->fixContent($content); + + $item['enclosures'] = $this->findImages($article->find('.post_body', 0)) ?: null; + + $this->items[] = $item; + } + + // Return whatever page comes next (previous, as we add in inverse order) + // Do we have a previous page? + if (!is_null($html->find('li.prev', 0))) { + return $html->find('li.prev a', 0)->href; + } + + return null; + } + + /** Returns all images from the provide HTML DOM */ + private function findImages($html) + { + $images = []; + + foreach ($html->find('img') as $img) { + $images[] = $img->src; + } + + return $images; + } + + /** Sets the maximum width and height for all images */ + private function scaleImages($html, $width = 400, $height = 400) + { + foreach ($html->find('img') as $img) { + $img->style = "max-width: {$width}px; max-height: {$height}px;"; + } + + return $html; + } + + /** Removes all unnecessary tags and adds formatting */ + private function fixContent($html) + { + // Restore quote highlighting + foreach ($html->find('blockquote') as $quote) { + $quote->style = <<<EOD padding: 0px 15px; border-width: 1px 1px 1px 2px; border-style: solid; border-color: #ededed #e8e8e8 #dbdbdb #666666; background: #fbfbfb; EOD; - } + } - // Remove unnecessary tags - $content = strip_tags( - $html->innertext, - '<p><a><img><ol><ul><li><table><tr><th><td><strong><blockquote><br><hr><h>' - ); + // Remove unnecessary tags + $content = strip_tags( + $html->innertext, + '<p><a><img><ol><ul><li><table><tr><th><td><strong><blockquote><br><hr><h>' + ); - return $content; - } + return $content; + } } |