aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGravatar Eugene Molotov <eugene.molotov@yandex.ru> 2018-08-02 09:31:56 +0500
committerGravatar LogMANOriginal <LogMANOriginal@users.noreply.github.com> 2018-08-02 06:31:56 +0200
commitdf58f5bbdbdc18ff4e0620a9a46b6efc8a63fa28 (patch)
tree9e7c04e0704a92f9dada71e6222304ca8be3ed0c
parent9d0452d11b48333fa79db9aa3c9508668e7843c5 (diff)
downloadrss-bridge-df58f5bbdbdc18ff4e0620a9a46b6efc8a63fa28.tar.gz
rss-bridge-df58f5bbdbdc18ff4e0620a9a46b6efc8a63fa28.tar.zst
rss-bridge-df58f5bbdbdc18ff4e0620a9a46b6efc8a63fa28.zip
[core] Add urljoin (#756)
Adds php-urljoin from https://github.com/fluffy-critter/php-urljoin to replace the custom implementation of 'defaultLinkTo'
-rw-r--r--lib/RssBridge.php11
-rw-r--r--lib/html.php11
-rw-r--r--vendor/php-urljoin/src/urljoin.php131
3 files changed, 144 insertions, 9 deletions
diff --git a/lib/RssBridge.php b/lib/RssBridge.php
index b570076d..1d66e737 100644
--- a/lib/RssBridge.php
+++ b/lib/RssBridge.php
@@ -34,6 +34,17 @@ if(!file_exists($vendorLibSimpleHtmlDom)) {
}
require_once $vendorLibSimpleHtmlDom;
+$vendorLibPhpUrlJoin = __DIR__ . PATH_VENDOR . '/php-urljoin/src/urljoin.php';
+if(!file_exists($vendorLibPhpUrlJoin)) {
+ throw new \HttpException('"php-urljoin" library is missing.
+ Get it from https://github.com/fluffy-critter/php-urljoin and place the script "urljoin.php" in '
+ . substr(PATH_VENDOR, 4)
+ . '/php-urljoin/src/',
+ 500);
+}
+require_once $vendorLibPhpUrlJoin;
+
+
/* Example use
require_once __DIR__ . '/lib/RssBridge.php';
diff --git a/lib/html.php b/lib/html.php
index 3214eef8..5098f6cb 100644
--- a/lib/html.php
+++ b/lib/html.php
@@ -42,18 +42,11 @@ function backgroundToImg($htmlContent) {
function defaultLinkTo($content, $server){
foreach($content->find('img') as $image) {
- if(strpos($image->src, 'http') === false
- && strpos($image->src, '//') === false
- && strpos($image->src, 'data:') === false)
- $image->src = $server . $image->src;
+ $image->src = urljoin($server, $image->src);
}
foreach($content->find('a') as $anchor) {
- if(strpos($anchor->href, 'http') === false
- && strpos($anchor->href, '//') === false
- && strpos($anchor->href, '#') !== 0
- && strpos($anchor->href, '?') !== 0)
- $anchor->href = $server . $anchor->href;
+ $anchor->href = urljoin($server, $anchor->href);
}
return $content;
diff --git a/vendor/php-urljoin/src/urljoin.php b/vendor/php-urljoin/src/urljoin.php
new file mode 100644
index 00000000..4f62f906
--- /dev/null
+++ b/vendor/php-urljoin/src/urljoin.php
@@ -0,0 +1,131 @@
+<?php
+
+/*
+
+A spiritual port of Python's urlparse.urljoin() function to PHP. Why this isn't in the standard library is anyone's guess.
+
+Author: fluffy, http://beesbuzz.biz/
+Latest version at: https://github.com/plaidfluff/php-urljoin
+
+ */
+
+function urljoin($base, $rel) {
+ if (!$base) {
+ return $rel;
+ }
+
+ if (!$rel) {
+ return $base;
+ }
+
+ $uses_relative = array('', 'ftp', 'http', 'gopher', 'nntp', 'imap',
+ 'wais', 'file', 'https', 'shttp', 'mms',
+ 'prospero', 'rtsp', 'rtspu', 'sftp',
+ 'svn', 'svn+ssh', 'ws', 'wss');
+
+ $pbase = parse_url($base);
+ $prel = parse_url($rel);
+
+ if (array_key_exists('path', $pbase) && $pbase['path'] === '/') {
+ unset($pbase['path']);
+ }
+
+ if (isset($prel['scheme'])) {
+ if ($prel['scheme'] != $pbase['scheme'] || in_array($prel['scheme'], $uses_relative) == false) {
+ return $rel;
+ }
+ }
+
+ $merged = array_merge($pbase, $prel);
+
+ // Handle relative paths:
+ // 'path/to/file.ext'
+ // './path/to/file.ext'
+ if (array_key_exists('path', $prel) && substr($prel['path'], 0, 1) != '/') {
+
+ // Normalize: './path/to/file.ext' => 'path/to/file.ext'
+ if (substr($prel['path'], 0, 2) === './') {
+ $prel['path'] = substr($prel['path'], 2);
+ }
+
+ if (array_key_exists('path', $pbase)) {
+ $dir = preg_replace('@/[^/]*$@', '', $pbase['path']);
+ $merged['path'] = $dir . '/' . $prel['path'];
+ } else {
+ $merged['path'] = '/' . $prel['path'];
+ }
+
+ }
+
+ if(array_key_exists('path', $merged)) {
+ // Get the path components, and remove the initial empty one
+ $pathParts = explode('/', $merged['path']);
+ array_shift($pathParts);
+
+ $path = [];
+ $prevPart = '';
+ foreach ($pathParts as $part) {
+ if ($part == '..' && count($path) > 0) {
+ // Cancel out the parent directory (if there's a parent to cancel)
+ $parent = array_pop($path);
+ // But if it was also a parent directory, leave it in
+ if ($parent == '..') {
+ array_push($path, $parent);
+ array_push($path, $part);
+ }
+ } else if ($prevPart != '' || ($part != '.' && $part != '')) {
+ // Don't include empty or current-directory components
+ if ($part == '.') {
+ $part = '';
+ }
+ array_push($path, $part);
+ }
+ $prevPart = $part;
+ }
+ $merged['path'] = '/' . implode('/', $path);
+ }
+
+ $ret = '';
+ if (isset($merged['scheme'])) {
+ $ret .= $merged['scheme'] . ':';
+ }
+
+ if (isset($merged['scheme']) || isset($merged['host'])) {
+ $ret .= '//';
+ }
+
+ if (isset($prel['host'])) {
+ $hostSource = $prel;
+ } else {
+ $hostSource = $pbase;
+ }
+
+ // username, password, and port are associated with the hostname, not merged
+ if (isset($hostSource['host'])) {
+ if (isset($hostSource['user'])) {
+ $ret .= $hostSource['user'];
+ if (isset($hostSource['pass'])) {
+ $ret .= ':' . $hostSource['pass'];
+ }
+ $ret .= '@';
+ }
+ $ret .= $hostSource['host'];
+ if (isset($hostSource['port'])) {
+ $ret .= ':' . $hostSource['port'];
+ }
+ }
+
+ if (isset($merged['path'])) {
+ $ret .= $merged['path'];
+ }
+
+ if (isset($prel['query'])) {
+ $ret .= '?' . $prel['query'];
+ }
+
+ if (isset($prel['fragment'])) {
+ $ret .= '#' . $prel['fragment'];
+ }
+
+ return $ret;
+}