aboutsummaryrefslogtreecommitdiff
path: root/bridges/VkBridge.php
diff options
context:
space:
mode:
Diffstat (limited to 'bridges/VkBridge.php')
-rw-r--r--bridges/VkBridge.php929
1 files changed, 471 insertions, 458 deletions
diff --git a/bridges/VkBridge.php b/bridges/VkBridge.php
index 1d0f65b0..bb554bc1 100644
--- a/bridges/VkBridge.php
+++ b/bridges/VkBridge.php
@@ -2,463 +2,476 @@
class VkBridge extends BridgeAbstract
{
-
- const MAINTAINER = 'em92';
- // const MAINTAINER = 'pmaziere';
- // const MAINTAINER = 'ahiles3005';
- const NAME = 'VK.com';
- const URI = 'https://vk.com/';
- const CACHE_TIMEOUT = 300; // 5min
- const DESCRIPTION = 'Working with open pages';
- const PARAMETERS = array(
- array(
- 'u' => array(
- 'name' => 'Group or user name',
- 'exampleValue' => 'elonmusk_tech',
- 'required' => true
- ),
- 'hide_reposts' => array(
- 'name' => 'Hide reposts',
- 'type' => 'checkbox',
- )
- )
- );
-
- protected $videos = array();
- protected $pageName;
-
- protected function getAccessToken()
- {
- return 'e69b2db9f6cd4a97c0716893232587165c18be85bc1af1834560125c1d3c8ec281eb407a78cca0ae16776';
- }
-
- public function getURI()
- {
- if (!is_null($this->getInput('u'))) {
- return static::URI . urlencode($this->getInput('u'));
- }
-
- return parent::getURI();
- }
-
- public function getName()
- {
- if ($this->pageName) {
- return $this->pageName;
- }
-
- return parent::getName();
- }
-
- public function collectData()
- {
- $text_html = $this->getContents();
-
- $text_html = iconv('windows-1251', 'utf-8//ignore', $text_html);
- // makes album link generating work correctly
- $text_html = str_replace('"class="page_album_link">', '" class="page_album_link">', $text_html);
- $html = str_get_html($text_html);
- $pageName = $html->find('.page_name', 0);
- if (is_object($pageName)) {
- $pageName = $pageName->plaintext;
- $this->pageName = htmlspecialchars_decode($pageName);
- }
- foreach ($html->find('div.replies') as $comment_block) {
- $comment_block->outertext = '';
- }
- $html->load($html->save());
-
- $pinned_post_item = null;
- $last_post_id = 0;
-
- foreach ($html->find('.post') as $post) {
-
- if ($post->find('.wall_post_text_deleted')) {
- // repost of deleted post
- continue;
- }
-
- defaultLinkTo($post, self::URI);
-
- $post_videos = array();
-
- $is_pinned_post = false;
- if (strpos($post->getAttribute('class'), 'post_fixed') !== false) {
- $is_pinned_post = true;
- }
-
- if (is_object($post->find('a.wall_post_more', 0))) {
- //delete link "show full" in content
- $post->find('a.wall_post_more', 0)->outertext = '';
- }
-
- $content_suffix = '';
-
- // looking for external links
- $external_link_selectors = array(
- 'a.page_media_link_title',
- 'div.page_media_link_title > a',
- 'div.media_desc > a.lnk',
- );
-
- foreach($external_link_selectors as $sel) {
- if (is_object($post->find($sel, 0))) {
- $a = $post->find($sel, 0);
- $innertext = $a->innertext;
- $parsed_url = parse_url($a->getAttribute('href'));
- if (strpos($parsed_url['path'], '/away.php') !== 0) continue;
- parse_str($parsed_url['query'], $parsed_query);
- $content_suffix .= "<br>External link: <a href='" . $parsed_query['to'] . "'>$innertext</a>";
- }
- }
-
- // remove external link from content
- $external_link_selectors_to_remove = array(
- 'div.page_media_thumbed_link',
- 'div.page_media_link_desc_wrap',
- 'div.media_desc > a.lnk',
- );
-
- foreach($external_link_selectors_to_remove as $sel) {
- if (is_object($post->find($sel, 0))) {
- $post->find($sel, 0)->outertext = '';
- }
- }
-
- // looking for article
- $article = $post->find('a.article_snippet', 0);
- if (is_object($article)) {
- if (strpos($article->getAttribute('class'), 'article_snippet_mini') !== false) {
- $article_title_selector = 'div.article_snippet_mini_title';
- $article_author_selector = 'div.article_snippet_mini_info > .mem_link,
+ const MAINTAINER = 'em92';
+ // const MAINTAINER = 'pmaziere';
+ // const MAINTAINER = 'ahiles3005';
+ const NAME = 'VK.com';
+ const URI = 'https://vk.com/';
+ const CACHE_TIMEOUT = 300; // 5min
+ const DESCRIPTION = 'Working with open pages';
+ const PARAMETERS = [
+ [
+ 'u' => [
+ 'name' => 'Group or user name',
+ 'exampleValue' => 'elonmusk_tech',
+ 'required' => true
+ ],
+ 'hide_reposts' => [
+ 'name' => 'Hide reposts',
+ 'type' => 'checkbox',
+ ]
+ ]
+ ];
+
+ protected $videos = [];
+ protected $pageName;
+
+ protected function getAccessToken()
+ {
+ return 'e69b2db9f6cd4a97c0716893232587165c18be85bc1af1834560125c1d3c8ec281eb407a78cca0ae16776';
+ }
+
+ public function getURI()
+ {
+ if (!is_null($this->getInput('u'))) {
+ return static::URI . urlencode($this->getInput('u'));
+ }
+
+ return parent::getURI();
+ }
+
+ public function getName()
+ {
+ if ($this->pageName) {
+ return $this->pageName;
+ }
+
+ return parent::getName();
+ }
+
+ public function collectData()
+ {
+ $text_html = $this->getContents();
+
+ $text_html = iconv('windows-1251', 'utf-8//ignore', $text_html);
+ // makes album link generating work correctly
+ $text_html = str_replace('"class="page_album_link">', '" class="page_album_link">', $text_html);
+ $html = str_get_html($text_html);
+ $pageName = $html->find('.page_name', 0);
+ if (is_object($pageName)) {
+ $pageName = $pageName->plaintext;
+ $this->pageName = htmlspecialchars_decode($pageName);
+ }
+ foreach ($html->find('div.replies') as $comment_block) {
+ $comment_block->outertext = '';
+ }
+ $html->load($html->save());
+
+ $pinned_post_item = null;
+ $last_post_id = 0;
+
+ foreach ($html->find('.post') as $post) {
+ if ($post->find('.wall_post_text_deleted')) {
+ // repost of deleted post
+ continue;
+ }
+
+ defaultLinkTo($post, self::URI);
+
+ $post_videos = [];
+
+ $is_pinned_post = false;
+ if (strpos($post->getAttribute('class'), 'post_fixed') !== false) {
+ $is_pinned_post = true;
+ }
+
+ if (is_object($post->find('a.wall_post_more', 0))) {
+ //delete link "show full" in content
+ $post->find('a.wall_post_more', 0)->outertext = '';
+ }
+
+ $content_suffix = '';
+
+ // looking for external links
+ $external_link_selectors = [
+ 'a.page_media_link_title',
+ 'div.page_media_link_title > a',
+ 'div.media_desc > a.lnk',
+ ];
+
+ foreach ($external_link_selectors as $sel) {
+ if (is_object($post->find($sel, 0))) {
+ $a = $post->find($sel, 0);
+ $innertext = $a->innertext;
+ $parsed_url = parse_url($a->getAttribute('href'));
+ if (strpos($parsed_url['path'], '/away.php') !== 0) {
+ continue;
+ }
+ parse_str($parsed_url['query'], $parsed_query);
+ $content_suffix .= "<br>External link: <a href='" . $parsed_query['to'] . "'>$innertext</a>";
+ }
+ }
+
+ // remove external link from content
+ $external_link_selectors_to_remove = [
+ 'div.page_media_thumbed_link',
+ 'div.page_media_link_desc_wrap',
+ 'div.media_desc > a.lnk',
+ ];
+
+ foreach ($external_link_selectors_to_remove as $sel) {
+ if (is_object($post->find($sel, 0))) {
+ $post->find($sel, 0)->outertext = '';
+ }
+ }
+
+ // looking for article
+ $article = $post->find('a.article_snippet', 0);
+ if (is_object($article)) {
+ if (strpos($article->getAttribute('class'), 'article_snippet_mini') !== false) {
+ $article_title_selector = 'div.article_snippet_mini_title';
+ $article_author_selector = 'div.article_snippet_mini_info > .mem_link,
div.article_snippet_mini_info > .group_link';
- $article_thumb_selector = 'div.article_snippet_mini_thumb';
- } else {
- $article_title_selector = 'div.article_snippet__title';
- $article_author_selector = 'div.article_snippet__author';
- $article_thumb_selector = 'div.article_snippet__image';
- }
- $article_title = $article->find($article_title_selector, 0)->innertext;
- $article_author = $article->find($article_author_selector, 0)->innertext;
- $article_link = $article->getAttribute('href');
- $article_img_element_style = $article->find($article_thumb_selector, 0)->getAttribute('style');
- preg_match('/background-image: url\((.*)\)/', $article_img_element_style, $matches);
- if (count($matches) > 0) {
- $content_suffix .= "<br><img src='" . $matches[1] . "'>";
- }
- $content_suffix .= "<br>Article: <a href='$article_link'>$article_title ($article_author)</a>";
- $article->outertext = '';
- }
-
- // get video on post
- $video = $post->find('div.post_video_desc', 0);
- $main_video_link = '';
- if (is_object($video)) {
- $video_title = $video->find('div.post_video_title', 0)->plaintext;
- $video_link = $video->find('a.lnk', 0)->getAttribute('href');
- $this->appendVideo($video_title, $video_link, $content_suffix, $post_videos);
- $video->outertext = '';
- $main_video_link = $video_link;
- }
-
- // get all other videos
- foreach($post->find('a.page_post_thumb_video') as $a) {
- $video_title = htmlspecialchars_decode($a->getAttribute('aria-label'));
- $video_link = $a->getAttribute('href');
- if ($video_link != $main_video_link) $this->appendVideo($video_title, $video_link, $content_suffix, $post_videos);
- $a->outertext = '';
- }
-
- // get all photos
- foreach($post->find('div.wall_text a.page_post_thumb_wrap') as $a) {
- $result = $this->getPhoto($a);
- if ($result == null) continue;
- $a->outertext = '';
- $content_suffix .= "<br>$result";
- }
-
- // get albums
- foreach($post->find('.page_album_wrap') as $el) {
- $a = $el->find('.page_album_link', 0);
- $album_title = $a->find('.page_album_title_text', 0)->getAttribute('title');
- $album_link = $a->getAttribute('href');
- $el->outertext = '';
- $content_suffix .= "<br>Album: <a href='$album_link'>$album_title</a>";
- }
-
- // get photo documents
- foreach($post->find('a.page_doc_photo_href') as $a) {
- $doc_link = $a->getAttribute('href');
- $doc_gif_label_element = $a->find('.page_gif_label', 0);
- $doc_title_element = $a->find('.doc_label', 0);
-
- if (is_object($doc_gif_label_element)) {
- $gif_preview_img = backgroundToImg($a->find('.page_doc_photo', 0));
- $content_suffix .= "<br>Gif: <a href='$doc_link'>$gif_preview_img</a>";
-
- } else if (is_object($doc_title_element)) {
- $doc_title = $doc_title_element->innertext;
- $content_suffix .= "<br>Doc: <a href='$doc_link'>$doc_title</a>";
-
- } else {
- continue;
-
- }
-
- $a->outertext = '';
- }
-
- // get other documents
- foreach($post->find('div.page_doc_row') as $div) {
- $doc_title_element = $div->find('a.page_doc_title', 0);
-
- if (is_object($doc_title_element)) {
- $doc_title = $doc_title_element->innertext;
- $doc_link = $doc_title_element->getAttribute('href');
- $content_suffix .= "<br>Doc: <a href='$doc_link'>$doc_title</a>";
-
- } else {
- continue;
-
- }
-
- $div->outertext = '';
- }
-
- // get polls
- foreach($post->find('div.page_media_poll_wrap') as $div) {
- $poll_title = $div->find('.page_media_poll_title', 0)->innertext;
- $content_suffix .= "<br>Poll: $poll_title";
- foreach($div->find('div.page_poll_text') as $poll_stat_title) {
- $content_suffix .= '<br>- ' . $poll_stat_title->innertext;
- }
- $div->outertext = '';
- }
-
- // get sign / post author
- $post_author = $pageName;
- $author_selectors = array('a.wall_signed_by', 'a.author');
- foreach($author_selectors as $author_selector) {
- $a = $post->find($author_selector, 0);
- if (is_object($a)) {
- $post_author = $a->innertext;
- $a->outertext = '';
- break;
- }
- }
-
- // fix links and get post hashtags
- $hashtags = array();
- foreach($post->find('a') as $a) {
- $href = $a->getAttribute('href');
- $innertext = $a->innertext;
-
- $hashtag_prefix = '/feed?section=search&q=%23';
- $hashtag = null;
-
- if ($href && substr($href, 0, strlen($hashtag_prefix)) === $hashtag_prefix) {
- $hashtag = urldecode(substr($href, strlen($hashtag_prefix)));
- } else if (substr($innertext, 0, 1) == '#') {
- $hashtag = $innertext;
- }
-
- if ($hashtag) {
- $a->outertext = $innertext;
- $hashtags[] = $hashtag;
- continue;
- }
-
- $parsed_url = parse_url($href);
-
- if (array_key_exists('path', $parsed_url) === false) continue;
-
- if (strpos($parsed_url['path'], '/away.php') === 0) {
- parse_str($parsed_url['query'], $parsed_query);
- $a->setAttribute('href', iconv(
- 'windows-1251',
- 'utf-8//ignore',
- $parsed_query['to']
- ));
- }
- }
-
- $copy_quote = $post->find('div.copy_quote', 0);
- if (is_object($copy_quote)) {
- if ($this->getInput('hide_reposts') === true) {
- continue;
- }
- if ($copy_post_header = $copy_quote->find('div.copy_post_header', 0)) {
- $copy_post_header->outertext = '';
- }
-
- $second_copy_quote = $copy_quote->find('div.published_sec_quote', 0);
- if (is_object($second_copy_quote)) {
- $second_copy_quote_author = $second_copy_quote->find('a.copy_author', 0)->outertext;
- $second_copy_quote_content = $second_copy_quote->find('div.copy_post_date', 0)->outertext;
- $second_copy_quote->outertext = "<br>Reposted ($second_copy_quote_author): $second_copy_quote_content";
- }
- $copy_quote_author = $copy_quote->find('a.copy_author', 0)->outertext;
- $copy_quote_content = $copy_quote->innertext;
- $copy_quote->outertext = "<br>Reposted ($copy_quote_author): <br>$copy_quote_content";
- }
-
- $item = array();
- $item['content'] = strip_tags(backgroundToImg($post->find('div.wall_text', 0)->innertext), '<a><br><img>');
- $item['content'] .= $content_suffix;
- $item['categories'] = $hashtags;
-
- // get post link
- $post_link = $post->find('a.post_link', 0)->getAttribute('href');
- preg_match('/wall-?\d+_(\d+)/', $post_link, $preg_match_result);
- $item['post_id'] = intval($preg_match_result[1]);
- $item['uri'] = $post_link;
- $item['timestamp'] = $this->getTime($post);
- $item['title'] = $this->getTitle($item['content']);
- $item['author'] = $post_author;
- $item['videos'] = $post_videos;
- if ($is_pinned_post) {
- // do not append it now
- $pinned_post_item = $item;
- } else {
- $last_post_id = $item['post_id'];
- $this->items[] = $item;
- }
-
- }
-
- if (!is_null($pinned_post_item)) {
- if (count($this->items) == 0) {
- $this->items[] = $pinned_post_item;
- } else if ($last_post_id < $pinned_post_item['post_id']) {
- $this->items[] = $pinned_post_item;
- usort($this->items, function ($item1, $item2) {
- return $item2['post_id'] - $item1['post_id'];
- });
- }
- }
-
- $this->getCleanVideoLinks();
- }
-
- private function getPhoto($a) {
- $onclick = $a->getAttribute('onclick');
- preg_match('/return showPhoto\(.+?({.*})/', $onclick, $preg_match_result);
- if (count($preg_match_result) == 0) return;
-
- $arg = htmlspecialchars_decode( str_replace('queue:1', '"queue":1', $preg_match_result[1]) );
- $data = json_decode($arg, true);
- if ($data == null) return;
-
- $thumb = $data['temp']['base'] . $data['temp']['x_'][0];
- $original = '';
- foreach(array('y_', 'z_', 'w_') as $key) {
- if (!isset($data['temp'][$key])) continue;
- if (!isset($data['temp'][$key][0])) continue;
- if (substr($data['temp'][$key][0], 0, 4) == 'http') {
- $base = '';
- } else {
- $base = $data['temp']['base'];
- }
- $original = $base . $data['temp'][$key][0];
- }
-
- if ($original) {
- return "<a href='$original'><img src='$thumb'></a>";
- } else {
- return "<img src='$thumb'>";
- }
- }
-
- private function getTitle($content)
- {
- preg_match('/^["\w\ \p{L}\(\)\?#«»-]+/mu', htmlspecialchars_decode($content), $result);
- if (count($result) == 0) return 'untitled';
- return $result[0];
- }
-
- private function getTime($post)
- {
- if ($time = $post->find('span.rel_date', 0)->getAttribute('time')) {
- return $time;
- } else {
- $strdate = $post->find('span.rel_date', 0)->plaintext;
- $strdate = preg_replace('/[\x00-\x1F\x7F-\xFF]/', ' ', $strdate);
-
- $date = date_parse($strdate);
- if (!$date['year']) {
- if (strstr($strdate, 'today') !== false) {
- $strdate = date('d-m-Y') . ' ' . $strdate;
- } elseif (strstr($strdate, 'yesterday ') !== false) {
- $time = time() - 60 * 60 * 24;
- $strdate = date('d-m-Y', $time) . ' ' . $strdate;
- } elseif ($date['month'] && intval(date('m')) < $date['month']) {
- $strdate = $strdate . ' ' . (date('Y') - 1);
- } else {
- $strdate = $strdate . ' ' . date('Y');
- }
-
- $date = date_parse($strdate);
- } elseif ($date['hour'] === false) {
- $date['hour'] = $date['minute'] = '00';
- }
- return strtotime($date['day'] . '-' . $date['month'] . '-' . $date['year'] . ' ' .
- $date['hour'] . ':' . $date['minute']);
- }
-
- }
-
- private function getContents()
- {
- $header = array('Accept-language: en', 'Cookie: remixlang=3');
-
- return getContents($this->getURI(), $header);
- }
-
- protected function appendVideo($video_title, $video_link, &$content_suffix, array &$post_videos)
- {
- if (!$video_title) $video_title = '(empty)';
-
- preg_match('/video([0-9-]+_[0-9]+)/', $video_link, $preg_match_result);
-
- if (count($preg_match_result) > 1) {
- $video_id = $preg_match_result[1];
- $this->videos[ $video_id ] = array(
- 'url' => $video_link,
- 'title' => $video_title,
- );
- $post_videos[] = $video_id;
- } else {
- $content_suffix .= '<br>Video: <a href="' . htmlspecialchars($video_link) . '">' . $video_title . '</a>';
- }
- }
-
- protected function getCleanVideoLinks() {
- $result = $this->api('video.get', array(
- 'videos' => implode(',', array_keys($this->videos)),
- 'count' => 200
- ));
-
- if (!isset($result['error'])) {
- foreach($result['response']['items'] as $item) {
- $video_id = strval($item['owner_id']) . '_' . strval($item['id']);
- $this->videos[$video_id]['url'] = $item['player'];
- }
- }
-
- foreach($this->items as &$item) {
- foreach($item['videos'] as $video_id) {
- $video_link = $this->videos[$video_id]['url'];
- $video_title = $this->videos[$video_id]['title'];
- $item['content'] .= '<br>Video: <a href="' . htmlspecialchars($video_link) . '">' . $video_title . '</a>';
- }
- unset($item['videos']);
- }
- }
-
- protected function api($method, array $params)
- {
- $params['v'] = '5.80';
- $params['access_token'] = $this->getAccessToken();
- return json_decode( getContents('https://api.vk.com/method/' . $method . '?' . http_build_query($params)), true );
- }
+ $article_thumb_selector = 'div.article_snippet_mini_thumb';
+ } else {
+ $article_title_selector = 'div.article_snippet__title';
+ $article_author_selector = 'div.article_snippet__author';
+ $article_thumb_selector = 'div.article_snippet__image';
+ }
+ $article_title = $article->find($article_title_selector, 0)->innertext;
+ $article_author = $article->find($article_author_selector, 0)->innertext;
+ $article_link = $article->getAttribute('href');
+ $article_img_element_style = $article->find($article_thumb_selector, 0)->getAttribute('style');
+ preg_match('/background-image: url\((.*)\)/', $article_img_element_style, $matches);
+ if (count($matches) > 0) {
+ $content_suffix .= "<br><img src='" . $matches[1] . "'>";
+ }
+ $content_suffix .= "<br>Article: <a href='$article_link'>$article_title ($article_author)</a>";
+ $article->outertext = '';
+ }
+
+ // get video on post
+ $video = $post->find('div.post_video_desc', 0);
+ $main_video_link = '';
+ if (is_object($video)) {
+ $video_title = $video->find('div.post_video_title', 0)->plaintext;
+ $video_link = $video->find('a.lnk', 0)->getAttribute('href');
+ $this->appendVideo($video_title, $video_link, $content_suffix, $post_videos);
+ $video->outertext = '';
+ $main_video_link = $video_link;
+ }
+
+ // get all other videos
+ foreach ($post->find('a.page_post_thumb_video') as $a) {
+ $video_title = htmlspecialchars_decode($a->getAttribute('aria-label'));
+ $video_link = $a->getAttribute('href');
+ if ($video_link != $main_video_link) {
+ $this->appendVideo($video_title, $video_link, $content_suffix, $post_videos);
+ }
+ $a->outertext = '';
+ }
+
+ // get all photos
+ foreach ($post->find('div.wall_text a.page_post_thumb_wrap') as $a) {
+ $result = $this->getPhoto($a);
+ if ($result == null) {
+ continue;
+ }
+ $a->outertext = '';
+ $content_suffix .= "<br>$result";
+ }
+
+ // get albums
+ foreach ($post->find('.page_album_wrap') as $el) {
+ $a = $el->find('.page_album_link', 0);
+ $album_title = $a->find('.page_album_title_text', 0)->getAttribute('title');
+ $album_link = $a->getAttribute('href');
+ $el->outertext = '';
+ $content_suffix .= "<br>Album: <a href='$album_link'>$album_title</a>";
+ }
+
+ // get photo documents
+ foreach ($post->find('a.page_doc_photo_href') as $a) {
+ $doc_link = $a->getAttribute('href');
+ $doc_gif_label_element = $a->find('.page_gif_label', 0);
+ $doc_title_element = $a->find('.doc_label', 0);
+
+ if (is_object($doc_gif_label_element)) {
+ $gif_preview_img = backgroundToImg($a->find('.page_doc_photo', 0));
+ $content_suffix .= "<br>Gif: <a href='$doc_link'>$gif_preview_img</a>";
+ } elseif (is_object($doc_title_element)) {
+ $doc_title = $doc_title_element->innertext;
+ $content_suffix .= "<br>Doc: <a href='$doc_link'>$doc_title</a>";
+ } else {
+ continue;
+ }
+
+ $a->outertext = '';
+ }
+
+ // get other documents
+ foreach ($post->find('div.page_doc_row') as $div) {
+ $doc_title_element = $div->find('a.page_doc_title', 0);
+
+ if (is_object($doc_title_element)) {
+ $doc_title = $doc_title_element->innertext;
+ $doc_link = $doc_title_element->getAttribute('href');
+ $content_suffix .= "<br>Doc: <a href='$doc_link'>$doc_title</a>";
+ } else {
+ continue;
+ }
+
+ $div->outertext = '';
+ }
+
+ // get polls
+ foreach ($post->find('div.page_media_poll_wrap') as $div) {
+ $poll_title = $div->find('.page_media_poll_title', 0)->innertext;
+ $content_suffix .= "<br>Poll: $poll_title";
+ foreach ($div->find('div.page_poll_text') as $poll_stat_title) {
+ $content_suffix .= '<br>- ' . $poll_stat_title->innertext;
+ }
+ $div->outertext = '';
+ }
+
+ // get sign / post author
+ $post_author = $pageName;
+ $author_selectors = ['a.wall_signed_by', 'a.author'];
+ foreach ($author_selectors as $author_selector) {
+ $a = $post->find($author_selector, 0);
+ if (is_object($a)) {
+ $post_author = $a->innertext;
+ $a->outertext = '';
+ break;
+ }
+ }
+
+ // fix links and get post hashtags
+ $hashtags = [];
+ foreach ($post->find('a') as $a) {
+ $href = $a->getAttribute('href');
+ $innertext = $a->innertext;
+
+ $hashtag_prefix = '/feed?section=search&q=%23';
+ $hashtag = null;
+
+ if ($href && substr($href, 0, strlen($hashtag_prefix)) === $hashtag_prefix) {
+ $hashtag = urldecode(substr($href, strlen($hashtag_prefix)));
+ } elseif (substr($innertext, 0, 1) == '#') {
+ $hashtag = $innertext;
+ }
+
+ if ($hashtag) {
+ $a->outertext = $innertext;
+ $hashtags[] = $hashtag;
+ continue;
+ }
+
+ $parsed_url = parse_url($href);
+
+ if (array_key_exists('path', $parsed_url) === false) {
+ continue;
+ }
+
+ if (strpos($parsed_url['path'], '/away.php') === 0) {
+ parse_str($parsed_url['query'], $parsed_query);
+ $a->setAttribute('href', iconv(
+ 'windows-1251',
+ 'utf-8//ignore',
+ $parsed_query['to']
+ ));
+ }
+ }
+
+ $copy_quote = $post->find('div.copy_quote', 0);
+ if (is_object($copy_quote)) {
+ if ($this->getInput('hide_reposts') === true) {
+ continue;
+ }
+ if ($copy_post_header = $copy_quote->find('div.copy_post_header', 0)) {
+ $copy_post_header->outertext = '';
+ }
+
+ $second_copy_quote = $copy_quote->find('div.published_sec_quote', 0);
+ if (is_object($second_copy_quote)) {
+ $second_copy_quote_author = $second_copy_quote->find('a.copy_author', 0)->outertext;
+ $second_copy_quote_content = $second_copy_quote->find('div.copy_post_date', 0)->outertext;
+ $second_copy_quote->outertext = "<br>Reposted ($second_copy_quote_author): $second_copy_quote_content";
+ }
+ $copy_quote_author = $copy_quote->find('a.copy_author', 0)->outertext;
+ $copy_quote_content = $copy_quote->innertext;
+ $copy_quote->outertext = "<br>Reposted ($copy_quote_author): <br>$copy_quote_content";
+ }
+
+ $item = [];
+ $item['content'] = strip_tags(backgroundToImg($post->find('div.wall_text', 0)->innertext), '<a><br><img>');
+ $item['content'] .= $content_suffix;
+ $item['categories'] = $hashtags;
+
+ // get post link
+ $post_link = $post->find('a.post_link', 0)->getAttribute('href');
+ preg_match('/wall-?\d+_(\d+)/', $post_link, $preg_match_result);
+ $item['post_id'] = intval($preg_match_result[1]);
+ $item['uri'] = $post_link;
+ $item['timestamp'] = $this->getTime($post);
+ $item['title'] = $this->getTitle($item['content']);
+ $item['author'] = $post_author;
+ $item['videos'] = $post_videos;
+ if ($is_pinned_post) {
+ // do not append it now
+ $pinned_post_item = $item;
+ } else {
+ $last_post_id = $item['post_id'];
+ $this->items[] = $item;
+ }
+ }
+
+ if (!is_null($pinned_post_item)) {
+ if (count($this->items) == 0) {
+ $this->items[] = $pinned_post_item;
+ } elseif ($last_post_id < $pinned_post_item['post_id']) {
+ $this->items[] = $pinned_post_item;
+ usort($this->items, function ($item1, $item2) {
+ return $item2['post_id'] - $item1['post_id'];
+ });
+ }
+ }
+
+ $this->getCleanVideoLinks();
+ }
+
+ private function getPhoto($a)
+ {
+ $onclick = $a->getAttribute('onclick');
+ preg_match('/return showPhoto\(.+?({.*})/', $onclick, $preg_match_result);
+ if (count($preg_match_result) == 0) {
+ return;
+ }
+
+ $arg = htmlspecialchars_decode(str_replace('queue:1', '"queue":1', $preg_match_result[1]));
+ $data = json_decode($arg, true);
+ if ($data == null) {
+ return;
+ }
+
+ $thumb = $data['temp']['base'] . $data['temp']['x_'][0];
+ $original = '';
+ foreach (['y_', 'z_', 'w_'] as $key) {
+ if (!isset($data['temp'][$key])) {
+ continue;
+ }
+ if (!isset($data['temp'][$key][0])) {
+ continue;
+ }
+ if (substr($data['temp'][$key][0], 0, 4) == 'http') {
+ $base = '';
+ } else {
+ $base = $data['temp']['base'];
+ }
+ $original = $base . $data['temp'][$key][0];
+ }
+
+ if ($original) {
+ return "<a href='$original'><img src='$thumb'></a>";
+ } else {
+ return "<img src='$thumb'>";
+ }
+ }
+
+ private function getTitle($content)
+ {
+ preg_match('/^["\w\ \p{L}\(\)\?#«»-]+/mu', htmlspecialchars_decode($content), $result);
+ if (count($result) == 0) {
+ return 'untitled';
+ }
+ return $result[0];
+ }
+
+ private function getTime($post)
+ {
+ if ($time = $post->find('span.rel_date', 0)->getAttribute('time')) {
+ return $time;
+ } else {
+ $strdate = $post->find('span.rel_date', 0)->plaintext;
+ $strdate = preg_replace('/[\x00-\x1F\x7F-\xFF]/', ' ', $strdate);
+
+ $date = date_parse($strdate);
+ if (!$date['year']) {
+ if (strstr($strdate, 'today') !== false) {
+ $strdate = date('d-m-Y') . ' ' . $strdate;
+ } elseif (strstr($strdate, 'yesterday ') !== false) {
+ $time = time() - 60 * 60 * 24;
+ $strdate = date('d-m-Y', $time) . ' ' . $strdate;
+ } elseif ($date['month'] && intval(date('m')) < $date['month']) {
+ $strdate = $strdate . ' ' . (date('Y') - 1);
+ } else {
+ $strdate = $strdate . ' ' . date('Y');
+ }
+
+ $date = date_parse($strdate);
+ } elseif ($date['hour'] === false) {
+ $date['hour'] = $date['minute'] = '00';
+ }
+ return strtotime($date['day'] . '-' . $date['month'] . '-' . $date['year'] . ' ' .
+ $date['hour'] . ':' . $date['minute']);
+ }
+ }
+
+ private function getContents()
+ {
+ $header = ['Accept-language: en', 'Cookie: remixlang=3'];
+
+ return getContents($this->getURI(), $header);
+ }
+
+ protected function appendVideo($video_title, $video_link, &$content_suffix, array &$post_videos)
+ {
+ if (!$video_title) {
+ $video_title = '(empty)';
+ }
+
+ preg_match('/video([0-9-]+_[0-9]+)/', $video_link, $preg_match_result);
+
+ if (count($preg_match_result) > 1) {
+ $video_id = $preg_match_result[1];
+ $this->videos[ $video_id ] = [
+ 'url' => $video_link,
+ 'title' => $video_title,
+ ];
+ $post_videos[] = $video_id;
+ } else {
+ $content_suffix .= '<br>Video: <a href="' . htmlspecialchars($video_link) . '">' . $video_title . '</a>';
+ }
+ }
+
+ protected function getCleanVideoLinks()
+ {
+ $result = $this->api('video.get', [
+ 'videos' => implode(',', array_keys($this->videos)),
+ 'count' => 200
+ ]);
+
+ if (!isset($result['error'])) {
+ foreach ($result['response']['items'] as $item) {
+ $video_id = strval($item['owner_id']) . '_' . strval($item['id']);
+ $this->videos[$video_id]['url'] = $item['player'];
+ }
+ }
+
+ foreach ($this->items as &$item) {
+ foreach ($item['videos'] as $video_id) {
+ $video_link = $this->videos[$video_id]['url'];
+ $video_title = $this->videos[$video_id]['title'];
+ $item['content'] .= '<br>Video: <a href="' . htmlspecialchars($video_link) . '">' . $video_title . '</a>';
+ }
+ unset($item['videos']);
+ }
+ }
+
+ protected function api($method, array $params)
+ {
+ $params['v'] = '5.80';
+ $params['access_token'] = $this->getAccessToken();
+ return json_decode(getContents('https://api.vk.com/method/' . $method . '?' . http_build_query($params)), true);
+ }
}