diff options
Diffstat (limited to 'lib')
-rw-r--r-- | lib/BridgeAbstract.php | 213 | ||||
-rw-r--r-- | lib/BridgeCard.php | 6 | ||||
-rw-r--r-- | lib/BridgeFactory.php | 2 | ||||
-rw-r--r-- | lib/BridgeInterface.php | 145 | ||||
-rw-r--r-- | lib/FeedExpander.php | 2 | ||||
-rw-r--r-- | lib/RssBridge.php | 21 | ||||
-rw-r--r-- | lib/contents.php | 31 | ||||
-rw-r--r-- | lib/http.php | 17 |
8 files changed, 89 insertions, 348 deletions
diff --git a/lib/BridgeAbstract.php b/lib/BridgeAbstract.php index a69552fc..f51fe893 100644 --- a/lib/BridgeAbstract.php +++ b/lib/BridgeAbstract.php @@ -1,76 +1,15 @@ <?php -/** - * This file is part of RSS-Bridge, a PHP project capable of generating RSS and - * Atom feeds for websites that don't have one. - * - * For the full license information, please view the UNLICENSE file distributed - * with this source code. - * - * @package Core - * @license http://unlicense.org/ UNLICENSE - * @link https://github.com/rss-bridge/rss-bridge - */ - -abstract class BridgeAbstract implements BridgeInterface +abstract class BridgeAbstract { - /** - * Name of the bridge - * - * Use {@see BridgeAbstract::getName()} to read this parameter - */ const NAME = 'Unnamed bridge'; - - /** - * URI to the site the bridge is intended to be used for. - * - * Use {@see BridgeAbstract::getURI()} to read this parameter - */ const URI = ''; - - /** - * Donation URI to the site the bridge is intended to be used for. - * - * Use {@see BridgeAbstract::getDonationURI()} to read this parameter - */ const DONATION_URI = ''; - - /** - * A brief description of what the bridge can do - * - * Use {@see BridgeAbstract::getDescription()} to read this parameter - */ const DESCRIPTION = 'No description provided'; - - /** - * The name of the maintainer. Multiple maintainers can be separated by comma - * - * Use {@see BridgeAbstract::getMaintainer()} to read this parameter - */ const MAINTAINER = 'No maintainer'; - - /** - * The default cache timeout for the bridge - * - * Use {@see BridgeAbstract::getCacheTimeout()} to read this parameter - */ const CACHE_TIMEOUT = 3600; - - /** - * Configuration for the bridge - */ const CONFIGURATION = []; - - /** - * Parameters for the bridge - * - * Use {@see BridgeAbstract::getParameters()} to read this parameter - */ const PARAMETERS = []; - - /** - * Test cases for detectParameters for the bridge - */ const TEST_DETECT_PARAMETERS = []; /** @@ -83,49 +22,67 @@ abstract class BridgeAbstract implements BridgeInterface 'title' => 'Maximum number of items to return', ]; - /** - * Holds the list of items collected by the bridge - * - * Items must be collected by {@see BridgeInterface::collectData()} - * - * Use {@see BridgeAbstract::getItems()} to access items. - * - * @var array - */ protected array $items = []; - - /** - * Holds the list of input parameters used by the bridge - * - * Do not access this parameter directly! - * Use {@see BridgeAbstract::setInputs()} and {@see BridgeAbstract::getInput()} instead! - * - * @var array - */ protected array $inputs = []; - - /** - * Holds the name of the queried context - * - * @var string - */ - protected $queriedContext = ''; - - /** - * Holds the list of bridge-specific configurations from config.ini.php, used by the bridge. - */ + protected string $queriedContext = ''; private array $configuration = []; public function __construct() { } - /** {@inheritdoc} */ + abstract public function collectData(); + public function getItems() { return $this->items; } + public function getOption(string $name) + { + return $this->configuration[$name] ?? null; + } + + public function getDescription() + { + return static::DESCRIPTION; + } + + public function getMaintainer(): string + { + return static::MAINTAINER; + } + + public function getName() + { + return static::NAME; + } + + public function getIcon() + { + return static::URI . '/favicon.ico'; + } + + public function getParameters(): array + { + return static::PARAMETERS; + } + + public function getURI() + { + return static::URI; + } + + public function getDonationURI(): string + { + return static::DONATION_URI; + } + + public function getCacheTimeout() + { + return static::CACHE_TIMEOUT; + } + /** * Sets the input values for a given context. * @@ -299,10 +256,7 @@ abstract class BridgeAbstract implements BridgeInterface */ protected function getInput($input) { - if (!isset($this->inputs[$this->queriedContext][$input]['value'])) { - return null; - } - return $this->inputs[$this->queriedContext][$input]['value']; + return $this->inputs[$this->queriedContext][$input]['value'] ?? null; } /** @@ -340,63 +294,6 @@ abstract class BridgeAbstract implements BridgeInterface } } - /** - * Get bridge configuration value - */ - public function getOption($name) - { - return $this->configuration[$name] ?? null; - } - - /** {@inheritdoc} */ - public function getDescription() - { - return static::DESCRIPTION; - } - - /** {@inheritdoc} */ - public function getMaintainer() - { - return static::MAINTAINER; - } - - /** {@inheritdoc} */ - public function getName() - { - return static::NAME; - } - - /** {@inheritdoc} */ - public function getIcon() - { - return static::URI . '/favicon.ico'; - } - - /** {@inheritdoc} */ - public function getParameters() - { - return static::PARAMETERS; - } - - /** {@inheritdoc} */ - public function getURI() - { - return static::URI; - } - - /** {@inheritdoc} */ - public function getDonationURI() - { - return static::DONATION_URI; - } - - /** {@inheritdoc} */ - public function getCacheTimeout() - { - return static::CACHE_TIMEOUT; - } - - /** {@inheritdoc} */ public function detectParameters($url) { $regex = '/^(https?:\/\/)?(www\.)?(.+?)(\/)?$/'; @@ -411,11 +308,6 @@ abstract class BridgeAbstract implements BridgeInterface return null; } - /** - * Loads a cached value for the specified key - * - * @return mixed Cached value or null if the key doesn't exist or has expired - */ protected function loadCacheValue(string $key) { $cache = RssBridge::getCache(); @@ -423,11 +315,6 @@ abstract class BridgeAbstract implements BridgeInterface return $cache->get($cacheKey); } - /** - * Stores a value to cache with the specified key - * - * @param mixed $value Value to cache - */ protected function saveCacheValue(string $key, $value, $ttl = 86400) { $cache = RssBridge::getCache(); diff --git a/lib/BridgeCard.php b/lib/BridgeCard.php index 6eef3879..99c44fff 100644 --- a/lib/BridgeCard.php +++ b/lib/BridgeCard.php @@ -25,7 +25,7 @@ final class BridgeCard /** * Gets a single bridge card * - * @param class-string<BridgeInterface> $bridgeClassName The bridge name + * @param class-string<BridgeAbstract> $bridgeClassName The bridge name * @param array $formats A list of formats * @param bool $isActive Indicates if the bridge is active or not * @return string The bridge card @@ -116,7 +116,7 @@ CARD; /** * Get the form header for a bridge card * - * @param class-string<BridgeInterface> $bridgeClassName The bridge name + * @param class-string<BridgeAbstract> $bridgeClassName The bridge name * @param bool $isHttps If disabled, adds a warning to the form * @return string The form header */ @@ -143,7 +143,7 @@ This bridge is not fetching its content through a secure connection</div>'; /** * Get the form body for a bridge * - * @param class-string<BridgeInterface> $bridgeClassName The bridge name + * @param class-string<BridgeAbstract> $bridgeClassName The bridge name * @param array $formats A list of supported formats * @param bool $isActive Indicates if a bridge is enabled or not * @param bool $isHttps Indicates if a bridge uses HTTPS or not diff --git a/lib/BridgeFactory.php b/lib/BridgeFactory.php index f302a27a..12565d92 100644 --- a/lib/BridgeFactory.php +++ b/lib/BridgeFactory.php @@ -34,7 +34,7 @@ final class BridgeFactory } } - public function create(string $name): BridgeInterface + public function create(string $name): BridgeAbstract { return new $name(); } diff --git a/lib/BridgeInterface.php b/lib/BridgeInterface.php deleted file mode 100644 index 63bc7b70..00000000 --- a/lib/BridgeInterface.php +++ /dev/null @@ -1,145 +0,0 @@ -<?php - -/** - * This file is part of RSS-Bridge, a PHP project capable of generating RSS and - * Atom feeds for websites that don't have one. - * - * For the full license information, please view the UNLICENSE file distributed - * with this source code. - * - * @package Core - * @license http://unlicense.org/ UNLICENSE - * @link https://github.com/rss-bridge/rss-bridge - */ - -/** - * The bridge interface - * - * A bridge is a class that is responsible for collecting and transforming data - * from one hosting provider into an internal representation of feed data, that - * can later be transformed into different feed formats (see {@see FormatInterface}). - * - * For this purpose, all bridges need to perform three common operations: - * - * 1. Collect data from a remote site. - * 2. Extract the required contents. - * 3. Add the contents to the internal data structure. - * - * Bridges can optionally specify parameters to customize bridge behavior based - * on user input. For example, a user could specify how many items to return in - * the feed and where to get them. - * - * In order to present a bridge on the home page, and for the purpose of bridge - * specific behaviour, additional information must be provided by the bridge: - * - * * **Name** - * The name of the bridge that can be displayed to users. - * - * * **Description** - * A brief description for the bridge that can be displayed to users. - * - * * **URI** - * A link to the hosting provider. - * - * * **Maintainer** - * The GitHub username of the bridge maintainer - * - * * **Parameters** - * A list of parameters for customization - * - * * **Icon** - * A link to the favicon of the hosting provider - * - * * **Cache timeout** - * The default cache timeout for the bridge. - */ -interface BridgeInterface -{ - /** - * Collects data from the site - * - * @return void - */ - public function collectData(); - - /** - * Returns the value for the selected configuration - * - * @param string $input The option name - * @return mixed|null The option value or null if the input is not defined - */ - public function getOption($name); - - /** - * Returns the description - * - * @return string Description - */ - public function getDescription(); - - /** - * Returns an array of collected items - * - * @return array Associative array of items - */ - public function getItems(); - - /** - * Returns the bridge maintainer - * - * @return string Bridge maintainer - */ - public function getMaintainer(); - - /** - * Returns the bridge name - * - * @return string Bridge name - */ - public function getName(); - - /** - * Returns the bridge icon - * - * @return string Bridge icon - */ - public function getIcon(); - - /** - * Returns the bridge parameters - * - * @return array Bridge parameters - */ - public function getParameters(); - - /** - * Returns the bridge URI - * - * @return string Bridge URI - */ - public function getURI(); - - /** - * Returns the bridge Donation URI - * - * @return string Bridge Donation URI - */ - public function getDonationURI(); - - /** - * Returns the cache timeout - * - * @return int Cache timeout - */ - public function getCacheTimeout(); - - /** - * Returns parameters from given URL or null if URL is not applicable - * - * @param string $url URL to extract parameters from - * @return array|null List of bridge parameters or null if detection failed. - */ - public function detectParameters($url); - - public function getShortName(): string; -} diff --git a/lib/FeedExpander.php b/lib/FeedExpander.php index be467336..af06cc16 100644 --- a/lib/FeedExpander.php +++ b/lib/FeedExpander.php @@ -74,7 +74,7 @@ abstract class FeedExpander extends BridgeAbstract /** * Collects data from an existing feed. * - * Children should call this function in {@see BridgeInterface::collectData()} + * Children should call this function in {@see BridgeAbstract::collectData()} * to extract a feed. * * @param string $url URL to the feed. diff --git a/lib/RssBridge.php b/lib/RssBridge.php index 1c6ce464..32dad269 100644 --- a/lib/RssBridge.php +++ b/lib/RssBridge.php @@ -15,6 +15,9 @@ final class RssBridge } Configuration::loadConfiguration($customConfig, getenv()); + // Consider: ini_set('error_reporting', E_ALL & ~E_DEPRECATED); + date_default_timezone_set(Configuration::getConfig('system', 'timezone')); + set_exception_handler(function (\Throwable $e) { Logger::error('Uncaught Exception', ['e' => $e]); http_response_code(500); @@ -57,9 +60,6 @@ final class RssBridge } }); - // Consider: ini_set('error_reporting', E_ALL & ~E_DEPRECATED); - date_default_timezone_set(Configuration::getConfig('system', 'timezone')); - self::$httpClient = new CurlHttpClient(); $cacheFactory = new CacheFactory(); @@ -68,11 +68,6 @@ final class RssBridge } else { self::$cache = $cacheFactory->create(); } - - if (Configuration::getConfig('authentication', 'enable')) { - $authenticationMiddleware = new AuthenticationMiddleware(); - $authenticationMiddleware(); - } } public function main(array $argv = []): void @@ -81,6 +76,10 @@ final class RssBridge parse_str(implode('&', array_slice($argv, 1)), $cliArgs); $request = $cliArgs; } else { + if (Configuration::getConfig('authentication', 'enable')) { + $authenticationMiddleware = new AuthenticationMiddleware(); + $authenticationMiddleware(); + } $request = array_merge($_GET, $_POST); } @@ -124,10 +123,4 @@ final class RssBridge { return self::$cache ?? new NullCache(); } - - public function clearCache() - { - $cache = self::getCache(); - $cache->clear(); - } } diff --git a/lib/contents.php b/lib/contents.php index c1847758..e173b542 100644 --- a/lib/contents.php +++ b/lib/contents.php @@ -16,6 +16,13 @@ function getContents( ) { $httpClient = RssBridge::getHttpClient(); + $httpHeadersNormalized = []; + foreach ($httpHeaders as $httpHeader) { + $parts = explode(':', $httpHeader); + $headerName = trim($parts[0]); + $headerValue = trim(implode(':', array_slice($parts, 1))); + $httpHeadersNormalized[$headerName] = $headerValue; + } // Snagged from https://github.com/lwthiker/curl-impersonate/blob/main/firefox/curl_ff102 $defaultHttpHeaders = [ 'Accept' => 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8', @@ -27,13 +34,6 @@ function getContents( 'Sec-Fetch-User' => '?1', 'TE' => 'trailers', ]; - $httpHeadersNormalized = []; - foreach ($httpHeaders as $httpHeader) { - $parts = explode(':', $httpHeader); - $headerName = trim($parts[0]); - $headerValue = trim(implode(':', array_slice($parts, 1))); - $httpHeadersNormalized[$headerName] = $headerValue; - } $config = [ 'useragent' => Configuration::getConfig('http', 'useragent'), 'timeout' => Configuration::getConfig('http', 'timeout'), @@ -43,7 +43,7 @@ function getContents( $maxFileSize = Configuration::getConfig('http', 'max_filesize'); if ($maxFileSize) { - // Multiply with 2^20 (1M) to the value in bytes + // Convert from MB to B by multiplying with 2^20 (1M) $config['max_filesize'] = $maxFileSize * 2 ** 20; } @@ -57,7 +57,6 @@ function getContents( /** @var Response $cachedResponse */ $cachedResponse = $cache->get($cacheKey); if ($cachedResponse) { - // considering popping $cachedLastModified = $cachedResponse->getHeader('last-modified'); if ($cachedLastModified) { $cachedLastModified = new \DateTimeImmutable($cachedLastModified); @@ -101,21 +100,13 @@ function getContents( Debug::isEnabled() ? mb_substr($response->getBody(), 0, 500) : '', ); - // The following code must be extracted if it grows too much - $cloudflareTitles = [ - '<title>Just a moment...', - '<title>Please Wait...', - '<title>Attention Required!', - '<title>Security | Glassdoor', - ]; - foreach ($cloudflareTitles as $cloudflareTitle) { - if (str_contains($response->getBody(), $cloudflareTitle)) { - throw new CloudFlareException($exceptionMessage, $response->getCode()); - } + if (CloudFlareException::isCloudFlareResponse($response)) { + throw new CloudFlareException($exceptionMessage, $response->getCode()); } throw new HttpException(trim($exceptionMessage), $response->getCode()); } if ($returnFull === true) { + // todo: return the actual response object return [ 'code' => $response->getCode(), 'headers' => $response->getHeaders(), diff --git a/lib/http.php b/lib/http.php index c5e65e77..cc1d0e22 100644 --- a/lib/http.php +++ b/lib/http.php @@ -6,6 +6,21 @@ class HttpException extends \Exception final class CloudFlareException extends HttpException { + public static function isCloudFlareResponse(Response $response): bool + { + $cloudflareTitles = [ + '<title>Just a moment...', + '<title>Please Wait...', + '<title>Attention Required!', + '<title>Security | Glassdoor', + ]; + foreach ($cloudflareTitles as $cloudflareTitle) { + if (str_contains($response->getBody(), $cloudflareTitle)) { + return true; + } + } + return false; + } } interface HttpClient @@ -119,7 +134,7 @@ final class CurlHttpClient implements HttpClient } } - $statusCode = curl_getinfo($ch, CURLINFO_HTTP_CODE); + $statusCode = curl_getinfo($ch, CURLINFO_RESPONSE_CODE); curl_close($ch); return new Response($data, $statusCode, $responseHeaders); } |