diff options
author | 2018-08-02 09:31:56 +0500 | |
---|---|---|
committer | 2018-08-02 06:31:56 +0200 | |
commit | df58f5bbdbdc18ff4e0620a9a46b6efc8a63fa28 (patch) | |
tree | 9e7c04e0704a92f9dada71e6222304ca8be3ed0c | |
parent | 9d0452d11b48333fa79db9aa3c9508668e7843c5 (diff) | |
download | rss-bridge-df58f5bbdbdc18ff4e0620a9a46b6efc8a63fa28.tar.gz rss-bridge-df58f5bbdbdc18ff4e0620a9a46b6efc8a63fa28.tar.zst rss-bridge-df58f5bbdbdc18ff4e0620a9a46b6efc8a63fa28.zip |
[core] Add urljoin (#756)
Adds php-urljoin from https://github.com/fluffy-critter/php-urljoin to replace the custom implementation of 'defaultLinkTo'
-rw-r--r-- | lib/RssBridge.php | 11 | ||||
-rw-r--r-- | lib/html.php | 11 | ||||
-rw-r--r-- | vendor/php-urljoin/src/urljoin.php | 131 |
3 files changed, 144 insertions, 9 deletions
diff --git a/lib/RssBridge.php b/lib/RssBridge.php index b570076d..1d66e737 100644 --- a/lib/RssBridge.php +++ b/lib/RssBridge.php @@ -34,6 +34,17 @@ if(!file_exists($vendorLibSimpleHtmlDom)) { } require_once $vendorLibSimpleHtmlDom; +$vendorLibPhpUrlJoin = __DIR__ . PATH_VENDOR . '/php-urljoin/src/urljoin.php'; +if(!file_exists($vendorLibPhpUrlJoin)) { + throw new \HttpException('"php-urljoin" library is missing. + Get it from https://github.com/fluffy-critter/php-urljoin and place the script "urljoin.php" in ' + . substr(PATH_VENDOR, 4) + . '/php-urljoin/src/', + 500); +} +require_once $vendorLibPhpUrlJoin; + + /* Example use require_once __DIR__ . '/lib/RssBridge.php'; diff --git a/lib/html.php b/lib/html.php index 3214eef8..5098f6cb 100644 --- a/lib/html.php +++ b/lib/html.php @@ -42,18 +42,11 @@ function backgroundToImg($htmlContent) { function defaultLinkTo($content, $server){ foreach($content->find('img') as $image) { - if(strpos($image->src, 'http') === false - && strpos($image->src, '//') === false - && strpos($image->src, 'data:') === false) - $image->src = $server . $image->src; + $image->src = urljoin($server, $image->src); } foreach($content->find('a') as $anchor) { - if(strpos($anchor->href, 'http') === false - && strpos($anchor->href, '//') === false - && strpos($anchor->href, '#') !== 0 - && strpos($anchor->href, '?') !== 0) - $anchor->href = $server . $anchor->href; + $anchor->href = urljoin($server, $anchor->href); } return $content; diff --git a/vendor/php-urljoin/src/urljoin.php b/vendor/php-urljoin/src/urljoin.php new file mode 100644 index 00000000..4f62f906 --- /dev/null +++ b/vendor/php-urljoin/src/urljoin.php @@ -0,0 +1,131 @@ +<?php + +/* + +A spiritual port of Python's urlparse.urljoin() function to PHP. Why this isn't in the standard library is anyone's guess. + +Author: fluffy, http://beesbuzz.biz/ +Latest version at: https://github.com/plaidfluff/php-urljoin + + */ + +function urljoin($base, $rel) { + if (!$base) { + return $rel; + } + + if (!$rel) { + return $base; + } + + $uses_relative = array('', 'ftp', 'http', 'gopher', 'nntp', 'imap', + 'wais', 'file', 'https', 'shttp', 'mms', + 'prospero', 'rtsp', 'rtspu', 'sftp', + 'svn', 'svn+ssh', 'ws', 'wss'); + + $pbase = parse_url($base); + $prel = parse_url($rel); + + if (array_key_exists('path', $pbase) && $pbase['path'] === '/') { + unset($pbase['path']); + } + + if (isset($prel['scheme'])) { + if ($prel['scheme'] != $pbase['scheme'] || in_array($prel['scheme'], $uses_relative) == false) { + return $rel; + } + } + + $merged = array_merge($pbase, $prel); + + // Handle relative paths: + // 'path/to/file.ext' + // './path/to/file.ext' + if (array_key_exists('path', $prel) && substr($prel['path'], 0, 1) != '/') { + + // Normalize: './path/to/file.ext' => 'path/to/file.ext' + if (substr($prel['path'], 0, 2) === './') { + $prel['path'] = substr($prel['path'], 2); + } + + if (array_key_exists('path', $pbase)) { + $dir = preg_replace('@/[^/]*$@', '', $pbase['path']); + $merged['path'] = $dir . '/' . $prel['path']; + } else { + $merged['path'] = '/' . $prel['path']; + } + + } + + if(array_key_exists('path', $merged)) { + // Get the path components, and remove the initial empty one + $pathParts = explode('/', $merged['path']); + array_shift($pathParts); + + $path = []; + $prevPart = ''; + foreach ($pathParts as $part) { + if ($part == '..' && count($path) > 0) { + // Cancel out the parent directory (if there's a parent to cancel) + $parent = array_pop($path); + // But if it was also a parent directory, leave it in + if ($parent == '..') { + array_push($path, $parent); + array_push($path, $part); + } + } else if ($prevPart != '' || ($part != '.' && $part != '')) { + // Don't include empty or current-directory components + if ($part == '.') { + $part = ''; + } + array_push($path, $part); + } + $prevPart = $part; + } + $merged['path'] = '/' . implode('/', $path); + } + + $ret = ''; + if (isset($merged['scheme'])) { + $ret .= $merged['scheme'] . ':'; + } + + if (isset($merged['scheme']) || isset($merged['host'])) { + $ret .= '//'; + } + + if (isset($prel['host'])) { + $hostSource = $prel; + } else { + $hostSource = $pbase; + } + + // username, password, and port are associated with the hostname, not merged + if (isset($hostSource['host'])) { + if (isset($hostSource['user'])) { + $ret .= $hostSource['user']; + if (isset($hostSource['pass'])) { + $ret .= ':' . $hostSource['pass']; + } + $ret .= '@'; + } + $ret .= $hostSource['host']; + if (isset($hostSource['port'])) { + $ret .= ':' . $hostSource['port']; + } + } + + if (isset($merged['path'])) { + $ret .= $merged['path']; + } + + if (isset($prel['query'])) { + $ret .= '?' . $prel['query']; + } + + if (isset($prel['fragment'])) { + $ret .= '#' . $prel['fragment']; + } + + return $ret; +} |