aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--bridges/RedditBridge.php23
-rw-r--r--lib/bootstrap.php1
-rw-r--r--lib/url.php145
-rw-r--r--tests/UrlTest.php47
4 files changed, 207 insertions, 9 deletions
diff --git a/bridges/RedditBridge.php b/bridges/RedditBridge.php
index 8d46f7bd..f761afaa 100644
--- a/bridges/RedditBridge.php
+++ b/bridges/RedditBridge.php
@@ -305,25 +305,30 @@ class RedditBridge extends BridgeAbstract
public function detectParameters($url)
{
- $parsed_url = parse_url($url);
+ try {
+ $urlObject = Url::fromString($url);
+ } catch (UrlException $e) {
+ return null;
+ }
- $host = $parsed_url['host'] ?? null;
+ $host = $urlObject->getHost();
+ $path = $urlObject->getPath();
- if ($host != 'www.reddit.com' && $host != 'old.reddit.com') {
+ $pathSegments = explode('/', $path);
+
+ if ($host !== 'www.reddit.com' && $host !== 'old.reddit.com') {
return null;
}
- $path = explode('/', $parsed_url['path']);
-
- if ($path[1] == 'r') {
+ if ($pathSegments[1] == 'r') {
return [
'context' => 'single',
- 'r' => $path[2]
+ 'r' => $pathSegments[2],
];
- } elseif ($path[1] == 'user') {
+ } elseif ($pathSegments[1] == 'user') {
return [
'context' => 'user',
- 'u' => $path[2]
+ 'u' => $pathSegments[2],
];
} else {
return null;
diff --git a/lib/bootstrap.php b/lib/bootstrap.php
index c8cf4e99..dc1c0f04 100644
--- a/lib/bootstrap.php
+++ b/lib/bootstrap.php
@@ -44,6 +44,7 @@ $files = [
__DIR__ . '/../lib/utils.php',
__DIR__ . '/../lib/http.php',
__DIR__ . '/../lib/logger.php',
+ __DIR__ . '/../lib/url.php',
// Vendor
__DIR__ . '/../vendor/parsedown/Parsedown.php',
__DIR__ . '/../vendor/php-urljoin/src/urljoin.php',
diff --git a/lib/url.php b/lib/url.php
new file mode 100644
index 00000000..2dcbbba5
--- /dev/null
+++ b/lib/url.php
@@ -0,0 +1,145 @@
+<?php
+
+declare(strict_types=1);
+
+final class UrlException extends \Exception
+{
+}
+
+/**
+ * Intentionally restrictive url parser
+ */
+final class Url
+{
+ private string $scheme;
+ private string $host;
+ private int $port;
+ private string $path;
+ private ?string $queryString;
+
+ private function __construct()
+ {
+ }
+
+ public static function fromString(string $url): self
+ {
+ if (!self::validate($url)) {
+ throw new UrlException(sprintf('Illegal url: "%s"', $url));
+ }
+
+ $parts = parse_url($url);
+ if ($parts === false) {
+ throw new UrlException(sprintf('Invalid url %s', $url));
+ }
+
+ return (new self())
+ ->withScheme($parts['scheme'] ?? '')
+ ->withHost($parts['host'])
+ ->withPort($parts['port'] ?? 80)
+ ->withPath($parts['path'] ?? '/')
+ ->withQueryString($parts['query'] ?? null);
+ }
+
+ public static function validate(string $url): bool
+ {
+ if (strlen($url) > 1500) {
+ return false;
+ }
+ $pattern = '#^https?://' // scheme
+ . '([a-z0-9-]+\.?)+' // one or more domain names
+ . '(\.[a-z]{1,24})?' // optional global tld
+ . '(:\d+)?' // optional port
+ . '($|/|\?)#i'; // end of string or slash or question mark
+
+ return preg_match($pattern, $url) === 1;
+ }
+
+ public function getScheme(): string
+ {
+ return $this->scheme;
+ }
+
+ public function getHost(): string
+ {
+ return $this->host;
+ }
+
+ public function getPort(): int
+ {
+ return $this->port;
+ }
+
+ public function getPath(): string
+ {
+ return $this->path;
+ }
+
+ public function getQueryString(): string
+ {
+ return $this->queryString;
+ }
+
+ public function withScheme(string $scheme): self
+ {
+ if (!in_array($scheme, ['http', 'https'])) {
+ throw new UrlException(sprintf('Invalid scheme %s', $scheme));
+ }
+ $clone = clone $this;
+ $clone->scheme = $scheme;
+ return $clone;
+ }
+
+ public function withHost(string $host): self
+ {
+ $clone = clone $this;
+ $clone->host = $host;
+ return $clone;
+ }
+
+ public function withPort(int $port)
+ {
+ $clone = clone $this;
+ $clone->port = $port;
+ return $clone;
+ }
+
+ public function withPath(string $path): self
+ {
+ if (!str_starts_with($path, '/')) {
+ throw new UrlException(sprintf('Path must start with forward slash: %s', $path));
+ }
+ $clone = clone $this;
+ $clone->path = $path;
+ return $clone;
+ }
+
+ public function withQueryString(?string $queryString): self
+ {
+ $clone = clone $this;
+ $clone->queryString = $queryString;
+ return $clone;
+ }
+
+ public function __toString()
+ {
+ if ($this->port === 80) {
+ $port = '';
+ } else {
+ $port = ':' . $this->port;
+ }
+ if ($this->queryString) {
+ $queryString = '?' . $this->queryString;
+ } else {
+ $queryString = '';
+ }
+
+ return sprintf(
+ '%s://%s%s%s%s',
+ $this->scheme,
+ $this->host,
+ $port,
+ $this->path,
+ $queryString
+ );
+ }
+}
diff --git a/tests/UrlTest.php b/tests/UrlTest.php
new file mode 100644
index 00000000..d45f319b
--- /dev/null
+++ b/tests/UrlTest.php
@@ -0,0 +1,47 @@
+<?php
+
+declare(strict_types=1);
+
+namespace RssBridge\Tests;
+
+use PHPUnit\Framework\TestCase;
+use Url;
+
+class UrlTest extends TestCase
+{
+ public function testBasicUsages()
+ {
+ $urls = [
+ 'http://example.com/',
+ 'http://example.com:9000/',
+ 'https://example.com/',
+ 'https://example.com/?foo',
+ 'https://example.com/?foo=bar',
+ ];
+ foreach ($urls as $url) {
+ $this->assertSame($url, Url::fromString($url)->__toString());
+ }
+ }
+
+ public function testNormalization()
+ {
+ $urls = [
+ 'http://example.com' => 'http://example.com/',
+ 'https://example.com/?' => 'https://example.com/',
+ 'https://example.com/foo?' => 'https://example.com/foo',
+ 'http://example.com:80/' => 'http://example.com/',
+ ];
+ foreach ($urls as $from => $to) {
+ $this->assertSame($to, Url::fromString($from)->__toString());
+ }
+ }
+
+ public function testMutation()
+ {
+ $this->assertSame('http://example.com/foo', (Url::fromString('http://example.com/'))->withPath('/foo')->__toString());
+ $this->assertSame('http://example.com/foo?a=b', (Url::fromString('http://example.com/?a=b'))->withPath('/foo')->__toString());
+ $this->assertSame('http://example.com/', (Url::fromString('http://example.com/'))->withPath('/')->__toString());
+ $this->assertSame('http://example.com/qqq?foo=bar', (Url::fromString('http://example.com/qqq'))->withQueryString('foo=bar')->__toString());
+ $this->assertSame('http://example.net/qqq?foo=bar', (Url::fromString('http://example.com/qqq?foo=bar'))->withHost('example.net')->__toString());
+ }
+}