diff options
Diffstat (limited to 'bridges/FB2Bridge.php')
-rw-r--r-- | bridges/FB2Bridge.php | 620 |
1 files changed, 318 insertions, 302 deletions
diff --git a/bridges/FB2Bridge.php b/bridges/FB2Bridge.php index 46a92c56..efebd48b 100644 --- a/bridges/FB2Bridge.php +++ b/bridges/FB2Bridge.php @@ -1,311 +1,327 @@ <?php -class FB2Bridge extends BridgeAbstract { - const MAINTAINER = 'teromene'; - const NAME = 'Facebook Bridge | Touch Site'; - const URI = 'https://www.facebook.com/'; - const CACHE_TIMEOUT = 1000; - const DESCRIPTION = 'Input a page title or a profile log. For a profile log, +class FB2Bridge extends BridgeAbstract +{ + const MAINTAINER = 'teromene'; + const NAME = 'Facebook Bridge | Touch Site'; + const URI = 'https://www.facebook.com/'; + const CACHE_TIMEOUT = 1000; + const DESCRIPTION = 'Input a page title or a profile log. For a profile log, please insert the parameter as follow : myExamplePage/132621766841117'; - const PARAMETERS = array( array( - 'u' => array( - 'name' => 'Username', - 'required' => true - ), - 'abbrev_name' => array( - 'name' => 'Abbreviate author name in title', - 'type' => 'checkbox', - 'defaultValue' => true, - ), - )); - - public function getIcon() { - return 'https://static.xx.fbcdn.net/rsrc.php/yo/r/iRmz9lCMBD2.ico'; - } - - public function collectData(){ - - //Utility function for cleaning a Facebook link - $unescape_fb_link = function($matches){ - if(is_array($matches) && count($matches) > 1) { - $link = $matches[1]; - if(strpos($link, '/') === 0) - $link = self::URI . substr($link, 1); - if(strpos($link, 'facebook.com/l.php?u=') !== false) - $link = urldecode(extractFromDelimiters($link, 'facebook.com/l.php?u=', '&')); - return ' href="' . $link . '"'; - } - }; - - //Utility function for converting facebook emoticons - $unescape_fb_emote = function($matches){ - static $facebook_emoticons = array( - 'smile' => ':)', - 'frown' => ':(', - 'tongue' => ':P', - 'grin' => ':D', - 'gasp' => ':O', - 'wink' => ';)', - 'pacman' => ':<', - 'grumpy' => '>_<', - 'unsure' => ':/', - 'cry' => ':\'(', - 'kiki' => '^_^', - 'glasses' => '8-)', - 'sunglasses' => 'B-)', - 'heart' => '<3', - 'devil' => ']:D', - 'angel' => '0:)', - 'squint' => '-_-', - 'confused' => 'o_O', - 'upset' => 'xD', - 'colonthree' => ':3', - 'like' => '👍'); - $len = count($matches); - if ($len > 1) - for ($i = 1; $i < $len; $i++) - foreach ($facebook_emoticons as $name => $emote) - if ($matches[$i] === $name) - return $emote; - return $matches[0]; - }; - - if($this->getInput('u') !== null) { - $page = 'https://touch.facebook.com/' . $this->getInput('u'); - $cookies = $this->getCookies($page); - $pageInfo = $this->getPageInfos($page, $cookies); - - if($pageInfo['userId'] === null) { - returnClientError(<<<EOD + const PARAMETERS = [ [ + 'u' => [ + 'name' => 'Username', + 'required' => true + ], + 'abbrev_name' => [ + 'name' => 'Abbreviate author name in title', + 'type' => 'checkbox', + 'defaultValue' => true, + ], + ]]; + + public function getIcon() + { + return 'https://static.xx.fbcdn.net/rsrc.php/yo/r/iRmz9lCMBD2.ico'; + } + + public function collectData() + { + //Utility function for cleaning a Facebook link + $unescape_fb_link = function ($matches) { + if (is_array($matches) && count($matches) > 1) { + $link = $matches[1]; + if (strpos($link, '/') === 0) { + $link = self::URI . substr($link, 1); + } + if (strpos($link, 'facebook.com/l.php?u=') !== false) { + $link = urldecode(extractFromDelimiters($link, 'facebook.com/l.php?u=', '&')); + } + return ' href="' . $link . '"'; + } + }; + + //Utility function for converting facebook emoticons + $unescape_fb_emote = function ($matches) { + static $facebook_emoticons = [ + 'smile' => ':)', + 'frown' => ':(', + 'tongue' => ':P', + 'grin' => ':D', + 'gasp' => ':O', + 'wink' => ';)', + 'pacman' => ':<', + 'grumpy' => '>_<', + 'unsure' => ':/', + 'cry' => ':\'(', + 'kiki' => '^_^', + 'glasses' => '8-)', + 'sunglasses' => 'B-)', + 'heart' => '<3', + 'devil' => ']:D', + 'angel' => '0:)', + 'squint' => '-_-', + 'confused' => 'o_O', + 'upset' => 'xD', + 'colonthree' => ':3', + 'like' => '👍']; + $len = count($matches); + if ($len > 1) { + for ($i = 1; $i < $len; $i++) { + foreach ($facebook_emoticons as $name => $emote) { + if ($matches[$i] === $name) { + return $emote; + } + } + } + } + return $matches[0]; + }; + + if ($this->getInput('u') !== null) { + $page = 'https://touch.facebook.com/' . $this->getInput('u'); + $cookies = $this->getCookies($page); + $pageInfo = $this->getPageInfos($page, $cookies); + + if ($pageInfo['userId'] === null) { + returnClientError(<<<EOD Unable to get the page id. You should consider getting the ID by hand, then importing it into FB2Bridge EOD - ); - } elseif($pageInfo['userId'] == -1) { - returnClientError(<<<EOD + ); + } elseif ($pageInfo['userId'] == -1) { + returnClientError(<<<EOD This page is not accessible without being logged in. EOD - ); - } - } - - //Build the string for the first request - $requestString = 'https://touch.facebook.com/page_content_list_view/more/?page_id=' - . $pageInfo['userId'] - . '&start_cursor=1&num_to_fetch=105&surface_type=timeline'; - $fileContent = getContents($requestString); - $html = $this->buildContent($fileContent); - $author = $pageInfo['username']; - - foreach($html->find('article') as $content) { - - $item = array(); - - preg_match('/publish_time\\\":([0-9]+),/', $content->getAttribute('data-store', 0), $match); - if(isset($match[1])) - $timestamp = $match[1]; - else - $timestamp = 0; - - $item['uri'] = html_entity_decode('https://touch.facebook.com' - . $content->find("div[class='_52jc _5qc4 _78cz _24u0 _36xo']", 0)->find('a', 0)->getAttribute('href'), ENT_QUOTES); - - //Decode images - $imagecleaned = preg_replace_callback('/<i [^>]* style="[^"]*url\(\'(.*?)\'\).*?><\/i>/m', function ($matches) { - return "<img src='" . str_replace(array('\\3a ', '\\3d ', '\\26 '), array(':', '=', '&'), $matches[1]) . "' />"; - }, $content); - $content = str_get_html($imagecleaned); - - if($content->find('header', 0) !== null) { - $content->find('header', 0)->innertext = ''; - } - - if($content->find('footer', 0) !== null) { - $content->find('footer', 0)->innertext = ''; - } - - // Replace emoticon images by their textual representation (part of the span) - foreach($content->find('span[title*="emoticon"]') as $emoticon) { - $emoticon->innertext = $emoticon->find('span[aria-hidden="true"]', 0)->innertext; - } - - //Remove html nodes, keep only img, links, basic formatting - $content = strip_tags($content, '<a><img><i><u><br><p><h3><h4><section>'); - - //Adapt link hrefs: convert relative links into absolute links and bypass external link redirection - $content = preg_replace_callback('/ href=\"([^"]+)\"/i', $unescape_fb_link, $content); - - //Clean useless html tag properties and fix link closing tags - foreach (array( - 'onmouseover', - 'onclick', - 'target', - 'ajaxify', - 'tabindex', - 'class', - 'data-[^=]*', - 'aria-[^=]*', - 'role', - 'rel', - 'id') as $property_name) - $content = preg_replace('/ ' . $property_name . '=\"[^"]*\"/i', '', $content); - $content = preg_replace('/<\/a [^>]+>/i', '</a>', $content); - - //Convert textual representation of emoticons eg - // "<i><u>smile emoticon</u></i>" back to ASCII emoticons eg ":)" - $content = preg_replace_callback('/<i><u>([^ <>]+) ([^<>]+)<\/u><\/i>/i', $unescape_fb_emote, $content); - - //Remove the "...Plus" tag - $content = preg_replace( - '/… (<span>|)<a href="https:\/\/www\.facebook\.com\/story\.php\?story_fbid=.*?<\/a>/m', - '', $content, 1); - - //Remove tracking images - $content = preg_replace('/<img src=\'.*?safe_image\.php.*?\' \/>/m', '', $content); - - //Remove the double section tags - $content = str_replace( - array('<section><section>', '</section></section>'), - array('<section>', '</section>'), - $content - ); - - //Move the section tag link upper, if it is down - $content = str_get_html($content); - $sectionContent = $content->find('section', 0); - if($sectionContent != null) { - $sectionLink = $sectionContent->nextSibling(); - if($sectionLink != null) { - $fullLink = '<a href="' . $sectionLink->getAttribute('href') . '">' . $sectionContent->innertext . '</a>'; - $sectionContent->innertext = $fullLink; - } - } - - //Move the href tag upper if it is inside the section - foreach($content->find('section > a') as $sectionToFix) { - $sectionLink = $sectionToFix->getAttribute('href'); - $section = $sectionToFix->parent(); - $section->outertext = '<a href="' . $sectionLink . '">' . $section . '</a>'; - } - - $item['content'] = html_entity_decode($content, ENT_QUOTES); - - $title = $author; - if ($this->getInput('abbrev_name') === true) { - if (strlen($title) > 24) - $title = substr($title, 0, strpos(wordwrap($title, 24), "\n")) . '...'; - } - $title = $title . ' | ' . strip_tags($content); - if (strlen($title) > 64) - $title = substr($title, 0, strpos(wordwrap($title, 64), "\n")) . '...'; - - $item['title'] = html_entity_decode($title, ENT_QUOTES); - $item['author'] = html_entity_decode($author, ENT_QUOTES); - $item['timestamp'] = html_entity_decode($timestamp, ENT_QUOTES); - - if($item['timestamp'] != 0) - array_push($this->items, $item); - } - - } - - //Builds the HTML from the encoded JS that Facebook provides. - private function buildContent($pageContent){ - // The html ends with: - // /div>","replaceifexists - $regex = '/\\"html\\":(\".+\/div>"),"replace/'; - preg_match($regex, $pageContent, $result); - - $htmlContent = json_decode($result[1]); - $htmlContent = preg_replace('/(?<!style)="(.*?)"/', '=\'$1\'', $htmlContent); - $htmlContent = html_entity_decode($htmlContent, ENT_QUOTES, 'UTF-8'); - - return str_get_html($htmlContent); - } - - //Builds the cookie from the page, as Facebook sometimes refuses to give - //the page if no cookie is provided. - private function getCookies($pageURL){ - - $ctx = stream_context_create(array( - 'http' => array( - 'user_agent' => Configuration::getConfig('http', 'useragent'), - 'Accept' => 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8' - ) - ) - ); - $a = file_get_contents($pageURL, 0, $ctx); - - //First request to get the cookie - $cookies = ''; - foreach($http_response_header as $hdr) { - if(strpos($hdr, 'Set-Cookie') !== false) { - $cLine = explode(':', $hdr)[1]; - $cLine = explode(';', $cLine)[0]; - $cookies .= ';' . $cLine; - } - } - - return substr($cookies, 1); - } - - //Get the page ID and username from the Facebook page. - private function getPageInfos($page, $cookies){ - - $context = stream_context_create(array( - 'http' => array( - 'user_agent' => Configuration::getConfig('http', 'useragent'), - 'header' => 'Cookie: ' . $cookies - ) - ) - ); - - $pageContent = file_get_contents($page, 0, $context); - - if(strpos($pageContent, 'signup-button') != false) { - return -1; - } - - //Get the username - $usernameRegex = '/data-nt=\"FB:TEXT4\">(.*?)<\/div>/m'; - preg_match($usernameRegex, $pageContent, $usernameMatches); - if(count($usernameMatches) > 0) { - $username = strip_tags($usernameMatches[1]); - } else { - $username = $this->getInput('u'); - } - - //Get the page ID if we don't have a captcha - $regex = '/page_id=([0-9]*)&/'; - preg_match($regex, $pageContent, $matches); - - if(count($matches) > 0) { - return array('userId' => $matches[1], 'username' => $username); - } - - //Get the page ID if we do have a captcha - $regex = '/"pageID":"([0-9]*)"/'; - preg_match($regex, $pageContent, $matches); - - return array('userId' => $matches[1], 'username' => $username); - - } - - public function getName(){ - $username = $this->getInput('u'); - if (isset($username)) { - return $this->getInput('u') . ' | Facebook'; - } else { - return self::NAME; - } - } - - public function getURI(){ - $username = $this->getInput('u'); - if (isset($username)) { - return 'https://facebook.com/' . $this->getInput('u') . '/posts'; - } else { - return self::URI; - } - } + ); + } + } + + //Build the string for the first request + $requestString = 'https://touch.facebook.com/page_content_list_view/more/?page_id=' + . $pageInfo['userId'] + . '&start_cursor=1&num_to_fetch=105&surface_type=timeline'; + $fileContent = getContents($requestString); + $html = $this->buildContent($fileContent); + $author = $pageInfo['username']; + + foreach ($html->find('article') as $content) { + $item = []; + + preg_match('/publish_time\\\":([0-9]+),/', $content->getAttribute('data-store', 0), $match); + if (isset($match[1])) { + $timestamp = $match[1]; + } else { + $timestamp = 0; + } + + $item['uri'] = html_entity_decode('https://touch.facebook.com' + . $content->find("div[class='_52jc _5qc4 _78cz _24u0 _36xo']", 0)->find('a', 0)->getAttribute('href'), ENT_QUOTES); + + //Decode images + $imagecleaned = preg_replace_callback('/<i [^>]* style="[^"]*url\(\'(.*?)\'\).*?><\/i>/m', function ($matches) { + return "<img src='" . str_replace(['\\3a ', '\\3d ', '\\26 '], [':', '=', '&'], $matches[1]) . "' />"; + }, $content); + $content = str_get_html($imagecleaned); + + if ($content->find('header', 0) !== null) { + $content->find('header', 0)->innertext = ''; + } + + if ($content->find('footer', 0) !== null) { + $content->find('footer', 0)->innertext = ''; + } + + // Replace emoticon images by their textual representation (part of the span) + foreach ($content->find('span[title*="emoticon"]') as $emoticon) { + $emoticon->innertext = $emoticon->find('span[aria-hidden="true"]', 0)->innertext; + } + + //Remove html nodes, keep only img, links, basic formatting + $content = strip_tags($content, '<a><img><i><u><br><p><h3><h4><section>'); + + //Adapt link hrefs: convert relative links into absolute links and bypass external link redirection + $content = preg_replace_callback('/ href=\"([^"]+)\"/i', $unescape_fb_link, $content); + + //Clean useless html tag properties and fix link closing tags + foreach ( + [ + 'onmouseover', + 'onclick', + 'target', + 'ajaxify', + 'tabindex', + 'class', + 'data-[^=]*', + 'aria-[^=]*', + 'role', + 'rel', + 'id'] as $property_name + ) { + $content = preg_replace('/ ' . $property_name . '=\"[^"]*\"/i', '', $content); + } + $content = preg_replace('/<\/a [^>]+>/i', '</a>', $content); + + //Convert textual representation of emoticons eg + // "<i><u>smile emoticon</u></i>" back to ASCII emoticons eg ":)" + $content = preg_replace_callback('/<i><u>([^ <>]+) ([^<>]+)<\/u><\/i>/i', $unescape_fb_emote, $content); + + //Remove the "...Plus" tag + $content = preg_replace( + '/… (<span>|)<a href="https:\/\/www\.facebook\.com\/story\.php\?story_fbid=.*?<\/a>/m', + '', + $content, + 1 + ); + + //Remove tracking images + $content = preg_replace('/<img src=\'.*?safe_image\.php.*?\' \/>/m', '', $content); + + //Remove the double section tags + $content = str_replace( + ['<section><section>', '</section></section>'], + ['<section>', '</section>'], + $content + ); + + //Move the section tag link upper, if it is down + $content = str_get_html($content); + $sectionContent = $content->find('section', 0); + if ($sectionContent != null) { + $sectionLink = $sectionContent->nextSibling(); + if ($sectionLink != null) { + $fullLink = '<a href="' . $sectionLink->getAttribute('href') . '">' . $sectionContent->innertext . '</a>'; + $sectionContent->innertext = $fullLink; + } + } + + //Move the href tag upper if it is inside the section + foreach ($content->find('section > a') as $sectionToFix) { + $sectionLink = $sectionToFix->getAttribute('href'); + $section = $sectionToFix->parent(); + $section->outertext = '<a href="' . $sectionLink . '">' . $section . '</a>'; + } + + $item['content'] = html_entity_decode($content, ENT_QUOTES); + + $title = $author; + if ($this->getInput('abbrev_name') === true) { + if (strlen($title) > 24) { + $title = substr($title, 0, strpos(wordwrap($title, 24), "\n")) . '...'; + } + } + $title = $title . ' | ' . strip_tags($content); + if (strlen($title) > 64) { + $title = substr($title, 0, strpos(wordwrap($title, 64), "\n")) . '...'; + } + + $item['title'] = html_entity_decode($title, ENT_QUOTES); + $item['author'] = html_entity_decode($author, ENT_QUOTES); + $item['timestamp'] = html_entity_decode($timestamp, ENT_QUOTES); + + if ($item['timestamp'] != 0) { + array_push($this->items, $item); + } + } + } + + //Builds the HTML from the encoded JS that Facebook provides. + private function buildContent($pageContent) + { + // The html ends with: + // /div>","replaceifexists + $regex = '/\\"html\\":(\".+\/div>"),"replace/'; + preg_match($regex, $pageContent, $result); + + $htmlContent = json_decode($result[1]); + $htmlContent = preg_replace('/(?<!style)="(.*?)"/', '=\'$1\'', $htmlContent); + $htmlContent = html_entity_decode($htmlContent, ENT_QUOTES, 'UTF-8'); + + return str_get_html($htmlContent); + } + + //Builds the cookie from the page, as Facebook sometimes refuses to give + //the page if no cookie is provided. + private function getCookies($pageURL) + { + $ctx = stream_context_create([ + 'http' => [ + 'user_agent' => Configuration::getConfig('http', 'useragent'), + 'Accept' => 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8' + ] + ]); + $a = file_get_contents($pageURL, 0, $ctx); + + //First request to get the cookie + $cookies = ''; + foreach ($http_response_header as $hdr) { + if (strpos($hdr, 'Set-Cookie') !== false) { + $cLine = explode(':', $hdr)[1]; + $cLine = explode(';', $cLine)[0]; + $cookies .= ';' . $cLine; + } + } + + return substr($cookies, 1); + } + + //Get the page ID and username from the Facebook page. + private function getPageInfos($page, $cookies) + { + $context = stream_context_create([ + 'http' => [ + 'user_agent' => Configuration::getConfig('http', 'useragent'), + 'header' => 'Cookie: ' . $cookies + ] + ]); + + $pageContent = file_get_contents($page, 0, $context); + + if (strpos($pageContent, 'signup-button') != false) { + return -1; + } + + //Get the username + $usernameRegex = '/data-nt=\"FB:TEXT4\">(.*?)<\/div>/m'; + preg_match($usernameRegex, $pageContent, $usernameMatches); + if (count($usernameMatches) > 0) { + $username = strip_tags($usernameMatches[1]); + } else { + $username = $this->getInput('u'); + } + + //Get the page ID if we don't have a captcha + $regex = '/page_id=([0-9]*)&/'; + preg_match($regex, $pageContent, $matches); + + if (count($matches) > 0) { + return ['userId' => $matches[1], 'username' => $username]; + } + + //Get the page ID if we do have a captcha + $regex = '/"pageID":"([0-9]*)"/'; + preg_match($regex, $pageContent, $matches); + + return ['userId' => $matches[1], 'username' => $username]; + } + + public function getName() + { + $username = $this->getInput('u'); + if (isset($username)) { + return $this->getInput('u') . ' | Facebook'; + } else { + return self::NAME; + } + } + + public function getURI() + { + $username = $this->getInput('u'); + if (isset($username)) { + return 'https://facebook.com/' . $this->getInput('u') . '/posts'; + } else { + return self::URI; + } + } } |