diff options
Diffstat (limited to 'lib')
-rw-r--r-- | lib/ActionFactory.php | 43 | ||||
-rw-r--r-- | lib/ActionInterface.php | 26 | ||||
-rw-r--r-- | lib/Authentication.php | 108 | ||||
-rw-r--r-- | lib/BridgeAbstract.php | 802 | ||||
-rw-r--r-- | lib/BridgeCard.php | 682 | ||||
-rw-r--r-- | lib/BridgeFactory.php | 144 | ||||
-rw-r--r-- | lib/BridgeInterface.php | 162 | ||||
-rw-r--r-- | lib/BridgeList.php | 275 | ||||
-rw-r--r-- | lib/CacheFactory.php | 95 | ||||
-rw-r--r-- | lib/CacheInterface.php | 96 | ||||
-rw-r--r-- | lib/Configuration.php | 610 | ||||
-rw-r--r-- | lib/Debug.php | 184 | ||||
-rw-r--r-- | lib/Exceptions.php | 162 | ||||
-rw-r--r-- | lib/FactoryAbstract.php | 99 | ||||
-rw-r--r-- | lib/FeedExpander.php | 857 | ||||
-rw-r--r-- | lib/FeedItem.php | 1046 | ||||
-rw-r--r-- | lib/FormatAbstract.php | 260 | ||||
-rw-r--r-- | lib/FormatFactory.php | 102 | ||||
-rw-r--r-- | lib/FormatInterface.php | 118 | ||||
-rw-r--r-- | lib/ParameterValidator.php | 476 | ||||
-rw-r--r-- | lib/XPathAbstract.php | 1162 | ||||
-rw-r--r-- | lib/contents.php | 570 | ||||
-rw-r--r-- | lib/error.php | 61 | ||||
-rw-r--r-- | lib/html.php | 202 | ||||
-rw-r--r-- | lib/php8backports.php | 28 | ||||
-rw-r--r-- | lib/rssbridge.php | 33 |
26 files changed, 4341 insertions, 4062 deletions
diff --git a/lib/ActionFactory.php b/lib/ActionFactory.php index bd1297b4..5a413767 100644 --- a/lib/ActionFactory.php +++ b/lib/ActionFactory.php @@ -1,4 +1,5 @@ <?php + /** * This file is part of RSS-Bridge, a PHP project capable of generating RSS and * Atom feeds for websites that don't have one. @@ -6,31 +7,31 @@ * For the full license information, please view the UNLICENSE file distributed * with this source code. * - * @package Core - * @license http://unlicense.org/ UNLICENSE - * @link https://github.com/rss-bridge/rss-bridge + * @package Core + * @license http://unlicense.org/ UNLICENSE + * @link https://github.com/rss-bridge/rss-bridge */ class ActionFactory { - private $folder; + private $folder; - public function __construct(string $folder = PATH_LIB_ACTIONS) - { - $this->folder = $folder; - } + public function __construct(string $folder = PATH_LIB_ACTIONS) + { + $this->folder = $folder; + } - /** - * @param string $name The name of the action e.g. "Display", "List", or "Connectivity" - */ - public function create(string $name): ActionInterface - { - $name = ucfirst(strtolower($name)) . 'Action'; - $filePath = $this->folder . $name . '.php'; - if(!file_exists($filePath)) { - throw new \Exception('Invalid action'); - } - $className = '\\' . $name; - return new $className(); - } + /** + * @param string $name The name of the action e.g. "Display", "List", or "Connectivity" + */ + public function create(string $name): ActionInterface + { + $name = ucfirst(strtolower($name)) . 'Action'; + $filePath = $this->folder . $name . '.php'; + if (!file_exists($filePath)) { + throw new \Exception('Invalid action'); + } + $className = '\\' . $name; + return new $className(); + } } diff --git a/lib/ActionInterface.php b/lib/ActionInterface.php index c8684d52..78284ab4 100644 --- a/lib/ActionInterface.php +++ b/lib/ActionInterface.php @@ -1,4 +1,5 @@ <?php + /** * This file is part of RSS-Bridge, a PHP project capable of generating RSS and * Atom feeds for websites that don't have one. @@ -6,21 +7,22 @@ * For the full license information, please view the UNLICENSE file distributed * with this source code. * - * @package Core - * @license http://unlicense.org/ UNLICENSE - * @link https://github.com/rss-bridge/rss-bridge + * @package Core + * @license http://unlicense.org/ UNLICENSE + * @link https://github.com/rss-bridge/rss-bridge */ /** * Interface for action objects. */ -interface ActionInterface { - /** - * Execute the action. - * - * Note: This function directly outputs data to the user. - * - * @return void - */ - public function execute(); +interface ActionInterface +{ + /** + * Execute the action. + * + * Note: This function directly outputs data to the user. + * + * @return void + */ + public function execute(); } diff --git a/lib/Authentication.php b/lib/Authentication.php index ac8ea96a..1ae26edf 100644 --- a/lib/Authentication.php +++ b/lib/Authentication.php @@ -1,4 +1,5 @@ <?php + /** * This file is part of RSS-Bridge, a PHP project capable of generating RSS and * Atom feeds for websites that don't have one. @@ -6,9 +7,9 @@ * For the full license information, please view the UNLICENSE file distributed * with this source code. * - * @package Core - * @license http://unlicense.org/ UNLICENSE - * @link https://github.com/rss-bridge/rss-bridge + * @package Core + * @license http://unlicense.org/ UNLICENSE + * @link https://github.com/rss-bridge/rss-bridge */ /** @@ -30,56 +31,57 @@ * @todo Add functions to detect if a user is authenticated or not. This can be * utilized for limiting access to authorized users only. */ -class Authentication { - /** - * Throw an exception when trying to create a new instance of this class. - * Use {@see Authentication::showPromptIfNeeded()} instead! - * - * @throws \LogicException if called. - */ - public function __construct(){ - throw new \LogicException('Use ' . __CLASS__ . '::showPromptIfNeeded()!'); - } - - /** - * Requests the user for login credentials if necessary. - * - * Responds to an authentication request or returns the `WWW-Authenticate` - * header if authentication is enabled in the configuration of RSS-Bridge - * (`[authentication] enable = true`). - * - * @return void - */ - public static function showPromptIfNeeded() { - - if(Configuration::getConfig('authentication', 'enable') === true) { - if(!Authentication::verifyPrompt()) { - header('WWW-Authenticate: Basic realm="RSS-Bridge"', true, 401); - die('Please authenticate in order to access this instance !'); - } - - } - - } - - /** - * Verifies if an authentication request was received and compares the - * provided username and password to the configuration of RSS-Bridge - * (`[authentication] username` and `[authentication] password`). - * - * @return bool True if authentication succeeded. - */ - public static function verifyPrompt() { +class Authentication +{ + /** + * Throw an exception when trying to create a new instance of this class. + * Use {@see Authentication::showPromptIfNeeded()} instead! + * + * @throws \LogicException if called. + */ + public function __construct() + { + throw new \LogicException('Use ' . __CLASS__ . '::showPromptIfNeeded()!'); + } - if(isset($_SERVER['PHP_AUTH_USER']) && isset($_SERVER['PHP_AUTH_PW'])) { - if(Configuration::getConfig('authentication', 'username') === $_SERVER['PHP_AUTH_USER'] - && Configuration::getConfig('authentication', 'password') === $_SERVER['PHP_AUTH_PW']) { - return true; - } else { - error_log('[RSS-Bridge] Failed authentication attempt from ' . $_SERVER['REMOTE_ADDR']); - } - } - return false; + /** + * Requests the user for login credentials if necessary. + * + * Responds to an authentication request or returns the `WWW-Authenticate` + * header if authentication is enabled in the configuration of RSS-Bridge + * (`[authentication] enable = true`). + * + * @return void + */ + public static function showPromptIfNeeded() + { + if (Configuration::getConfig('authentication', 'enable') === true) { + if (!Authentication::verifyPrompt()) { + header('WWW-Authenticate: Basic realm="RSS-Bridge"', true, 401); + die('Please authenticate in order to access this instance !'); + } + } + } - } + /** + * Verifies if an authentication request was received and compares the + * provided username and password to the configuration of RSS-Bridge + * (`[authentication] username` and `[authentication] password`). + * + * @return bool True if authentication succeeded. + */ + public static function verifyPrompt() + { + if (isset($_SERVER['PHP_AUTH_USER']) && isset($_SERVER['PHP_AUTH_PW'])) { + if ( + Configuration::getConfig('authentication', 'username') === $_SERVER['PHP_AUTH_USER'] + && Configuration::getConfig('authentication', 'password') === $_SERVER['PHP_AUTH_PW'] + ) { + return true; + } else { + error_log('[RSS-Bridge] Failed authentication attempt from ' . $_SERVER['REMOTE_ADDR']); + } + } + return false; + } } diff --git a/lib/BridgeAbstract.php b/lib/BridgeAbstract.php index 38e3da03..c479f53e 100644 --- a/lib/BridgeAbstract.php +++ b/lib/BridgeAbstract.php @@ -1,4 +1,5 @@ <?php + /** * This file is part of RSS-Bridge, a PHP project capable of generating RSS and * Atom feeds for websites that don't have one. @@ -6,9 +7,9 @@ * For the full license information, please view the UNLICENSE file distributed * with this source code. * - * @package Core - * @license http://unlicense.org/ UNLICENSE - * @link https://github.com/rss-bridge/rss-bridge + * @package Core + * @license http://unlicense.org/ UNLICENSE + * @link https://github.com/rss-bridge/rss-bridge */ /** @@ -24,393 +25,410 @@ * @todo Add specification for PARAMETERS () * @todo Add specification for $items */ -abstract class BridgeAbstract implements BridgeInterface { - - /** - * Name of the bridge - * - * Use {@see BridgeAbstract::getName()} to read this parameter - */ - const NAME = 'Unnamed bridge'; - - /** - * URI to the site the bridge is intended to be used for. - * - * Use {@see BridgeAbstract::getURI()} to read this parameter - */ - const URI = ''; - - /** - * Donation URI to the site the bridge is intended to be used for. - * - * Use {@see BridgeAbstract::getDonationURI()} to read this parameter - */ - const DONATION_URI = ''; - - /** - * A brief description of what the bridge can do - * - * Use {@see BridgeAbstract::getDescription()} to read this parameter - */ - const DESCRIPTION = 'No description provided'; - - /** - * The name of the maintainer. Multiple maintainers can be separated by comma - * - * Use {@see BridgeAbstract::getMaintainer()} to read this parameter - */ - const MAINTAINER = 'No maintainer'; - - /** - * The default cache timeout for the bridge - * - * Use {@see BridgeAbstract::getCacheTimeout()} to read this parameter - */ - const CACHE_TIMEOUT = 3600; - - /** - * Configuration for the bridge - * - * Use {@see BridgeAbstract::getConfiguration()} to read this parameter - */ - const CONFIGURATION = array(); - - /** - * Parameters for the bridge - * - * Use {@see BridgeAbstract::getParameters()} to read this parameter - */ - const PARAMETERS = array(); - - /** - * Test cases for detectParameters for the bridge - */ - const TEST_DETECT_PARAMETERS = array(); - - /** - * This is a convenient const for the limit option in bridge contexts. - * Can be inlined and modified if necessary. - */ - protected const LIMIT = [ - 'name' => 'Limit', - 'type' => 'number', - 'title' => 'Maximum number of items to return', - ]; - - /** - * Holds the list of items collected by the bridge - * - * Items must be collected by {@see BridgeInterface::collectData()} - * - * Use {@see BridgeAbstract::getItems()} to access items. - * - * @var array - */ - protected $items = array(); - - /** - * Holds the list of input parameters used by the bridge - * - * Do not access this parameter directly! - * Use {@see BridgeAbstract::setInputs()} and {@see BridgeAbstract::getInput()} instead! - * - * @var array - */ - protected $inputs = array(); - - /** - * Holds the name of the queried context - * - * @var string - */ - protected $queriedContext = ''; - - /** {@inheritdoc} */ - public function getItems(){ - return $this->items; - } - - /** - * Sets the input values for a given context. - * - * @param array $inputs Associative array of inputs - * @param string $queriedContext The context name - * @return void - */ - protected function setInputs(array $inputs, $queriedContext){ - // Import and assign all inputs to their context - foreach($inputs as $name => $value) { - foreach(static::PARAMETERS as $context => $set) { - if(array_key_exists($name, static::PARAMETERS[$context])) { - $this->inputs[$context][$name]['value'] = $value; - } - } - } - - // Apply default values to missing data - $contexts = array($queriedContext); - if(array_key_exists('global', static::PARAMETERS)) { - $contexts[] = 'global'; - } - - foreach($contexts as $context) { - foreach(static::PARAMETERS[$context] as $name => $properties) { - if(isset($this->inputs[$context][$name]['value'])) { - continue; - } - - $type = isset($properties['type']) ? $properties['type'] : 'text'; - - switch($type) { - case 'checkbox': - if(!isset($properties['defaultValue'])) { - $this->inputs[$context][$name]['value'] = false; - } else { - $this->inputs[$context][$name]['value'] = $properties['defaultValue']; - } - break; - case 'list': - if(!isset($properties['defaultValue'])) { - $firstItem = reset($properties['values']); - if(is_array($firstItem)) { - $firstItem = reset($firstItem); - } - $this->inputs[$context][$name]['value'] = $firstItem; - } else { - $this->inputs[$context][$name]['value'] = $properties['defaultValue']; - } - break; - default: - if(isset($properties['defaultValue'])) { - $this->inputs[$context][$name]['value'] = $properties['defaultValue']; - } - break; - } - } - } - - // Copy global parameter values to the guessed context - if(array_key_exists('global', static::PARAMETERS)) { - foreach(static::PARAMETERS['global'] as $name => $properties) { - if(isset($inputs[$name])) { - $value = $inputs[$name]; - } elseif(isset($properties['defaultValue'])) { - $value = $properties['defaultValue']; - } else { - continue; - } - $this->inputs[$queriedContext][$name]['value'] = $value; - } - } - - // Only keep guessed context parameters values - if(isset($this->inputs[$queriedContext])) { - $this->inputs = array($queriedContext => $this->inputs[$queriedContext]); - } else { - $this->inputs = array(); - } - } - - /** - * Set inputs for the bridge - * - * Returns errors and aborts execution if the provided input parameters are - * invalid. - * - * @param array List of input parameters. Each element in this list must - * relate to an item in {@see BridgeAbstract::PARAMETERS} - * @return void - */ - public function setDatas(array $inputs){ - - if(isset($inputs['context'])) { // Context hinting (optional) - $this->queriedContext = $inputs['context']; - unset($inputs['context']); - } - - if(empty(static::PARAMETERS)) { - - if(!empty($inputs)) { - returnClientError('Invalid parameters value(s)'); - } - - return; - - } - - $validator = new ParameterValidator(); - - if(!$validator->validateData($inputs, static::PARAMETERS)) { - $parameters = array_map( - function($i){ return $i['name']; }, // Just display parameter names - $validator->getInvalidParameters() - ); - - returnClientError( - 'Invalid parameters value(s): ' - . implode(', ', $parameters) - ); - } - - // Guess the context from input data - if(empty($this->queriedContext)) { - $this->queriedContext = $validator->getQueriedContext($inputs, static::PARAMETERS); - } - - if(is_null($this->queriedContext)) { - returnClientError('Required parameter(s) missing'); - } elseif($this->queriedContext === false) { - returnClientError('Mixed context parameters'); - } - - $this->setInputs($inputs, $this->queriedContext); - - } - - /** - * Loads configuration for the bridge - * - * Returns errors and aborts execution if the provided configuration is - * invalid. - * - * @return void - */ - public function loadConfiguration() { - foreach(static::CONFIGURATION as $optionName => $optionValue) { - - $configurationOption = Configuration::getConfig(get_class($this), $optionName); - - if($configurationOption !== null) { - $this->configuration[$optionName] = $configurationOption; - continue; - } - - if(isset($optionValue['required']) && $optionValue['required'] === true) { - returnServerError( - 'Missing configuration option: ' - . $optionName - ); - } elseif(isset($optionValue['defaultValue'])) { - $this->configuration[$optionName] = $optionValue['defaultValue']; - } - - } - } - - /** - * Returns the value for the provided input - * - * @param string $input The input name - * @return mixed|null The input value or null if the input is not defined - */ - protected function getInput($input){ - if(!isset($this->inputs[$this->queriedContext][$input]['value'])) { - return null; - } - return $this->inputs[$this->queriedContext][$input]['value']; - } - - /** - * Returns the value for the selected configuration - * - * @param string $input The option name - * @return mixed|null The option value or null if the input is not defined - */ - public function getOption($name){ - if(!isset($this->configuration[$name])) { - return null; - } - return $this->configuration[$name]; - } - - /** {@inheritdoc} */ - public function getDescription(){ - return static::DESCRIPTION; - } - - /** {@inheritdoc} */ - public function getMaintainer(){ - return static::MAINTAINER; - } - - /** {@inheritdoc} */ - public function getName(){ - return static::NAME; - } - - /** {@inheritdoc} */ - public function getIcon(){ - return static::URI . '/favicon.ico'; - } - - /** {@inheritdoc} */ - public function getConfiguration(){ - return static::CONFIGURATION; - } - - /** {@inheritdoc} */ - public function getParameters(){ - return static::PARAMETERS; - } - - /** {@inheritdoc} */ - public function getURI(){ - return static::URI; - } - - /** {@inheritdoc} */ - public function getDonationURI(){ - return static::DONATION_URI; - } - - /** {@inheritdoc} */ - public function getCacheTimeout(){ - return static::CACHE_TIMEOUT; - } - - /** {@inheritdoc} */ - public function detectParameters($url){ - $regex = '/^(https?:\/\/)?(www\.)?(.+?)(\/)?$/'; - if(empty(static::PARAMETERS) - && preg_match($regex, $url, $urlMatches) > 0 - && preg_match($regex, static::URI, $bridgeUriMatches) > 0 - && $urlMatches[3] === $bridgeUriMatches[3]) { - return array(); - } else { - return null; - } - } - - /** - * Loads a cached value for the specified key - * - * @param string $key Key name - * @param int $duration Cache duration (optional, default: 24 hours) - * @return mixed Cached value or null if the key doesn't exist or has expired - */ - protected function loadCacheValue($key, $duration = 86400){ - $cacheFac = new CacheFactory(); - - $cache = $cacheFac->create(Configuration::getConfig('cache', 'type')); - $cache->setScope(get_called_class()); - $cache->setKey($key); - if($cache->getTime() < time() - $duration) - return null; - return $cache->loadData(); - } - - /** - * Stores a value to cache with the specified key - * - * @param string $key Key name - * @param mixed $value Value to cache - */ - protected function saveCacheValue($key, $value){ - $cacheFac = new CacheFactory(); - - $cache = $cacheFac->create(Configuration::getConfig('cache', 'type')); - $cache->setScope(get_called_class()); - $cache->setKey($key); - $cache->saveData($value); - } +abstract class BridgeAbstract implements BridgeInterface +{ + /** + * Name of the bridge + * + * Use {@see BridgeAbstract::getName()} to read this parameter + */ + const NAME = 'Unnamed bridge'; + + /** + * URI to the site the bridge is intended to be used for. + * + * Use {@see BridgeAbstract::getURI()} to read this parameter + */ + const URI = ''; + + /** + * Donation URI to the site the bridge is intended to be used for. + * + * Use {@see BridgeAbstract::getDonationURI()} to read this parameter + */ + const DONATION_URI = ''; + + /** + * A brief description of what the bridge can do + * + * Use {@see BridgeAbstract::getDescription()} to read this parameter + */ + const DESCRIPTION = 'No description provided'; + + /** + * The name of the maintainer. Multiple maintainers can be separated by comma + * + * Use {@see BridgeAbstract::getMaintainer()} to read this parameter + */ + const MAINTAINER = 'No maintainer'; + + /** + * The default cache timeout for the bridge + * + * Use {@see BridgeAbstract::getCacheTimeout()} to read this parameter + */ + const CACHE_TIMEOUT = 3600; + + /** + * Configuration for the bridge + * + * Use {@see BridgeAbstract::getConfiguration()} to read this parameter + */ + const CONFIGURATION = []; + + /** + * Parameters for the bridge + * + * Use {@see BridgeAbstract::getParameters()} to read this parameter + */ + const PARAMETERS = []; + + /** + * Test cases for detectParameters for the bridge + */ + const TEST_DETECT_PARAMETERS = []; + + /** + * This is a convenient const for the limit option in bridge contexts. + * Can be inlined and modified if necessary. + */ + protected const LIMIT = [ + 'name' => 'Limit', + 'type' => 'number', + 'title' => 'Maximum number of items to return', + ]; + + /** + * Holds the list of items collected by the bridge + * + * Items must be collected by {@see BridgeInterface::collectData()} + * + * Use {@see BridgeAbstract::getItems()} to access items. + * + * @var array + */ + protected $items = []; + + /** + * Holds the list of input parameters used by the bridge + * + * Do not access this parameter directly! + * Use {@see BridgeAbstract::setInputs()} and {@see BridgeAbstract::getInput()} instead! + * + * @var array + */ + protected $inputs = []; + + /** + * Holds the name of the queried context + * + * @var string + */ + protected $queriedContext = ''; + + /** {@inheritdoc} */ + public function getItems() + { + return $this->items; + } + + /** + * Sets the input values for a given context. + * + * @param array $inputs Associative array of inputs + * @param string $queriedContext The context name + * @return void + */ + protected function setInputs(array $inputs, $queriedContext) + { + // Import and assign all inputs to their context + foreach ($inputs as $name => $value) { + foreach (static::PARAMETERS as $context => $set) { + if (array_key_exists($name, static::PARAMETERS[$context])) { + $this->inputs[$context][$name]['value'] = $value; + } + } + } + + // Apply default values to missing data + $contexts = [$queriedContext]; + if (array_key_exists('global', static::PARAMETERS)) { + $contexts[] = 'global'; + } + + foreach ($contexts as $context) { + foreach (static::PARAMETERS[$context] as $name => $properties) { + if (isset($this->inputs[$context][$name]['value'])) { + continue; + } + + $type = isset($properties['type']) ? $properties['type'] : 'text'; + + switch ($type) { + case 'checkbox': + if (!isset($properties['defaultValue'])) { + $this->inputs[$context][$name]['value'] = false; + } else { + $this->inputs[$context][$name]['value'] = $properties['defaultValue']; + } + break; + case 'list': + if (!isset($properties['defaultValue'])) { + $firstItem = reset($properties['values']); + if (is_array($firstItem)) { + $firstItem = reset($firstItem); + } + $this->inputs[$context][$name]['value'] = $firstItem; + } else { + $this->inputs[$context][$name]['value'] = $properties['defaultValue']; + } + break; + default: + if (isset($properties['defaultValue'])) { + $this->inputs[$context][$name]['value'] = $properties['defaultValue']; + } + break; + } + } + } + + // Copy global parameter values to the guessed context + if (array_key_exists('global', static::PARAMETERS)) { + foreach (static::PARAMETERS['global'] as $name => $properties) { + if (isset($inputs[$name])) { + $value = $inputs[$name]; + } elseif (isset($properties['defaultValue'])) { + $value = $properties['defaultValue']; + } else { + continue; + } + $this->inputs[$queriedContext][$name]['value'] = $value; + } + } + + // Only keep guessed context parameters values + if (isset($this->inputs[$queriedContext])) { + $this->inputs = [$queriedContext => $this->inputs[$queriedContext]]; + } else { + $this->inputs = []; + } + } + + /** + * Set inputs for the bridge + * + * Returns errors and aborts execution if the provided input parameters are + * invalid. + * + * @param array List of input parameters. Each element in this list must + * relate to an item in {@see BridgeAbstract::PARAMETERS} + * @return void + */ + public function setDatas(array $inputs) + { + if (isset($inputs['context'])) { // Context hinting (optional) + $this->queriedContext = $inputs['context']; + unset($inputs['context']); + } + + if (empty(static::PARAMETERS)) { + if (!empty($inputs)) { + returnClientError('Invalid parameters value(s)'); + } + + return; + } + + $validator = new ParameterValidator(); + + if (!$validator->validateData($inputs, static::PARAMETERS)) { + $parameters = array_map( + function ($i) { + return $i['name']; + }, // Just display parameter names + $validator->getInvalidParameters() + ); + + returnClientError( + 'Invalid parameters value(s): ' + . implode(', ', $parameters) + ); + } + + // Guess the context from input data + if (empty($this->queriedContext)) { + $this->queriedContext = $validator->getQueriedContext($inputs, static::PARAMETERS); + } + + if (is_null($this->queriedContext)) { + returnClientError('Required parameter(s) missing'); + } elseif ($this->queriedContext === false) { + returnClientError('Mixed context parameters'); + } + + $this->setInputs($inputs, $this->queriedContext); + } + + /** + * Loads configuration for the bridge + * + * Returns errors and aborts execution if the provided configuration is + * invalid. + * + * @return void + */ + public function loadConfiguration() + { + foreach (static::CONFIGURATION as $optionName => $optionValue) { + $configurationOption = Configuration::getConfig(get_class($this), $optionName); + + if ($configurationOption !== null) { + $this->configuration[$optionName] = $configurationOption; + continue; + } + + if (isset($optionValue['required']) && $optionValue['required'] === true) { + returnServerError( + 'Missing configuration option: ' + . $optionName + ); + } elseif (isset($optionValue['defaultValue'])) { + $this->configuration[$optionName] = $optionValue['defaultValue']; + } + } + } + + /** + * Returns the value for the provided input + * + * @param string $input The input name + * @return mixed|null The input value or null if the input is not defined + */ + protected function getInput($input) + { + if (!isset($this->inputs[$this->queriedContext][$input]['value'])) { + return null; + } + return $this->inputs[$this->queriedContext][$input]['value']; + } + + /** + * Returns the value for the selected configuration + * + * @param string $input The option name + * @return mixed|null The option value or null if the input is not defined + */ + public function getOption($name) + { + if (!isset($this->configuration[$name])) { + return null; + } + return $this->configuration[$name]; + } + + /** {@inheritdoc} */ + public function getDescription() + { + return static::DESCRIPTION; + } + + /** {@inheritdoc} */ + public function getMaintainer() + { + return static::MAINTAINER; + } + + /** {@inheritdoc} */ + public function getName() + { + return static::NAME; + } + + /** {@inheritdoc} */ + public function getIcon() + { + return static::URI . '/favicon.ico'; + } + + /** {@inheritdoc} */ + public function getConfiguration() + { + return static::CONFIGURATION; + } + + /** {@inheritdoc} */ + public function getParameters() + { + return static::PARAMETERS; + } + + /** {@inheritdoc} */ + public function getURI() + { + return static::URI; + } + + /** {@inheritdoc} */ + public function getDonationURI() + { + return static::DONATION_URI; + } + + /** {@inheritdoc} */ + public function getCacheTimeout() + { + return static::CACHE_TIMEOUT; + } + + /** {@inheritdoc} */ + public function detectParameters($url) + { + $regex = '/^(https?:\/\/)?(www\.)?(.+?)(\/)?$/'; + if ( + empty(static::PARAMETERS) + && preg_match($regex, $url, $urlMatches) > 0 + && preg_match($regex, static::URI, $bridgeUriMatches) > 0 + && $urlMatches[3] === $bridgeUriMatches[3] + ) { + return []; + } else { + return null; + } + } + + /** + * Loads a cached value for the specified key + * + * @param string $key Key name + * @param int $duration Cache duration (optional, default: 24 hours) + * @return mixed Cached value or null if the key doesn't exist or has expired + */ + protected function loadCacheValue($key, $duration = 86400) + { + $cacheFac = new CacheFactory(); + + $cache = $cacheFac->create(Configuration::getConfig('cache', 'type')); + $cache->setScope(get_called_class()); + $cache->setKey($key); + if ($cache->getTime() < time() - $duration) { + return null; + } + return $cache->loadData(); + } + + /** + * Stores a value to cache with the specified key + * + * @param string $key Key name + * @param mixed $value Value to cache + */ + protected function saveCacheValue($key, $value) + { + $cacheFac = new CacheFactory(); + + $cache = $cacheFac->create(Configuration::getConfig('cache', 'type')); + $cache->setScope(get_called_class()); + $cache->setKey($key); + $cache->saveData($value); + } } diff --git a/lib/BridgeCard.php b/lib/BridgeCard.php index 22520170..78132776 100644 --- a/lib/BridgeCard.php +++ b/lib/BridgeCard.php @@ -1,4 +1,5 @@ <?php + /** * This file is part of RSS-Bridge, a PHP project capable of generating RSS and * Atom feeds for websites that don't have one. @@ -6,9 +7,9 @@ * For the full license information, please view the UNLICENSE file distributed * with this source code. * - * @package Core - * @license http://unlicense.org/ UNLICENSE - * @link https://github.com/rss-bridge/rss-bridge + * @package Core + * @license http://unlicense.org/ UNLICENSE + * @link https://github.com/rss-bridge/rss-bridge */ /** @@ -19,310 +20,326 @@ * * @todo Return error if a caller creates an object of this class. */ -final class BridgeCard { - /** - * Get the form header for a bridge card - * - * @param string $bridgeName The bridge name - * @param bool $isHttps If disabled, adds a warning to the form - * @return string The form header - */ - private static function getFormHeader($bridgeName, $isHttps = false, $parameterName = '') { - $form = <<<EOD +final class BridgeCard +{ + /** + * Get the form header for a bridge card + * + * @param string $bridgeName The bridge name + * @param bool $isHttps If disabled, adds a warning to the form + * @return string The form header + */ + private static function getFormHeader($bridgeName, $isHttps = false, $parameterName = '') + { + $form = <<<EOD <form method="GET" action="?"> <input type="hidden" name="action" value="display" /> <input type="hidden" name="bridge" value="{$bridgeName}" /> EOD; - if(!empty($parameterName)) { - $form .= <<<EOD + if (!empty($parameterName)) { + $form .= <<<EOD <input type="hidden" name="context" value="{$parameterName}" /> EOD; - } + } - if(!$isHttps) { - $form .= '<div class="secure-warning">Warning : + if (!$isHttps) { + $form .= '<div class="secure-warning">Warning : This bridge is not fetching its content through a secure connection</div>'; - } - - return $form; - } - - /** - * Get the form body for a bridge - * - * @param string $bridgeName The bridge name - * @param array $formats A list of supported formats - * @param bool $isActive Indicates if a bridge is enabled or not - * @param bool $isHttps Indicates if a bridge uses HTTPS or not - * @param string $parameterName Sets the bridge context for the current form - * @param array $parameters The bridge parameters - * @return string The form body - */ - private static function getForm($bridgeName, - $formats, - $isActive = false, - $isHttps = false, - $parameterName = '', - $parameters = array()) { - $form = self::getFormHeader($bridgeName, $isHttps, $parameterName); - - if(count($parameters) > 0) { - - $form .= '<div class="parameters">'; - - foreach($parameters as $id => $inputEntry) { - if(!isset($inputEntry['exampleValue'])) - $inputEntry['exampleValue'] = ''; - - if(!isset($inputEntry['defaultValue'])) - $inputEntry['defaultValue'] = ''; - - $idArg = 'arg-' - . urlencode($bridgeName) - . '-' - . urlencode($parameterName) - . '-' - . urlencode($id); - - $form .= '<label for="' - . $idArg - . '">' - . filter_var($inputEntry['name'], FILTER_SANITIZE_FULL_SPECIAL_CHARS) - . '</label>' - . PHP_EOL; - - if(!isset($inputEntry['type']) || $inputEntry['type'] === 'text') { - $form .= self::getTextInput($inputEntry, $idArg, $id); - } elseif($inputEntry['type'] === 'number') { - $form .= self::getNumberInput($inputEntry, $idArg, $id); - } else if($inputEntry['type'] === 'list') { - $form .= self::getListInput($inputEntry, $idArg, $id); - } elseif($inputEntry['type'] === 'checkbox') { - $form .= self::getCheckboxInput($inputEntry, $idArg, $id); - } - - if(isset($inputEntry['title'])) { - $title_filtered = filter_var($inputEntry['title'], FILTER_SANITIZE_FULL_SPECIAL_CHARS); - $form .= '<i class="info" title="' . $title_filtered . '">i</i>'; - } else { - $form .= '<i class="no-info"></i>'; - } - } - - $form .= '</div>'; - - } - - if($isActive) { - $form .= '<button type="submit" name="format" formtarget="_blank" value="Html">Generate feed</button>'; - } else { - $form .= '<span style="font-weight: bold;">Inactive</span>'; - } - - return $form . '</form>' . PHP_EOL; - } - - /** - * Get input field attributes - * - * @param array $entry The current entry - * @return string The input field attributes - */ - private static function getInputAttributes($entry) { - $retVal = ''; - - if(isset($entry['required']) && $entry['required'] === true) - $retVal .= ' required'; - - if(isset($entry['pattern'])) - $retVal .= ' pattern="' . $entry['pattern'] . '"'; - - return $retVal; - } - - /** - * Get text input - * - * @param array $entry The current entry - * @param string $id The field ID - * @param string $name The field name - * @return string The text input field - */ - private static function getTextInput($entry, $id, $name) { - return '<input ' - . self::getInputAttributes($entry) - . ' id="' - . $id - . '" type="text" value="' - . filter_var($entry['defaultValue'], FILTER_SANITIZE_FULL_SPECIAL_CHARS) - . '" placeholder="' - . filter_var($entry['exampleValue'], FILTER_SANITIZE_FULL_SPECIAL_CHARS) - . '" name="' - . $name - . '" />' - . PHP_EOL; - } - - /** - * Get number input - * - * @param array $entry The current entry - * @param string $id The field ID - * @param string $name The field name - * @return string The number input field - */ - private static function getNumberInput($entry, $id, $name) { - return '<input ' - . self::getInputAttributes($entry) - . ' id="' - . $id - . '" type="number" value="' - . filter_var($entry['defaultValue'], FILTER_SANITIZE_NUMBER_INT) - . '" placeholder="' - . filter_var($entry['exampleValue'], FILTER_SANITIZE_NUMBER_INT) - . '" name="' - . $name - . '" />' - . PHP_EOL; - } - - /** - * Get list input - * - * @param array $entry The current entry - * @param string $id The field ID - * @param string $name The field name - * @return string The list input field - */ - private static function getListInput($entry, $id, $name) { - if(isset($entry['required']) && $entry['required'] === true) { - Debug::log('The "required" attribute is not supported for lists.'); - unset($entry['required']); - } - - $list = '<select ' - . self::getInputAttributes($entry) - . ' id="' - . $id - . '" name="' - . $name - . '" >'; - - foreach($entry['values'] as $name => $value) { - if(is_array($value)) { - $list .= '<optgroup label="' . htmlentities($name) . '">'; - foreach($value as $subname => $subvalue) { - if($entry['defaultValue'] === $subname - || $entry['defaultValue'] === $subvalue) { - $list .= '<option value="' - . $subvalue - . '" selected>' - . $subname - . '</option>'; - } else { - $list .= '<option value="' - . $subvalue - . '">' - . $subname - . '</option>'; - } - } - $list .= '</optgroup>'; - } else { - if($entry['defaultValue'] === $name - || $entry['defaultValue'] === $value) { - $list .= '<option value="' - . $value - . '" selected>' - . $name - . '</option>'; - } else { - $list .= '<option value="' - . $value - . '">' - . $name - . '</option>'; - } - } - } - - $list .= '</select>'; - - return $list; - } - - /** - * Get checkbox input - * - * @param array $entry The current entry - * @param string $id The field ID - * @param string $name The field name - * @return string The checkbox input field - */ - private static function getCheckboxInput($entry, $id, $name) { - if(isset($entry['required']) && $entry['required'] === true) { - Debug::log('The "required" attribute is not supported for checkboxes.'); - unset($entry['required']); - } - - return '<input ' - . self::getInputAttributes($entry) - . ' id="' - . $id - . '" type="checkbox" name="' - . $name - . '" ' - . ($entry['defaultValue'] === 'checked' ? 'checked' : '') - . ' />' - . PHP_EOL; - } - - /** - * Gets a single bridge card - * - * @param string $bridgeName The bridge name - * @param array $formats A list of formats - * @param bool $isActive Indicates if the bridge is active or not - * @return string The bridge card - */ - public static function displayBridgeCard($bridgeName, $formats, $isActive = true){ - - $bridgeFac = new \BridgeFactory(); - - $bridge = $bridgeFac->create($bridgeName); - - if($bridge == false) - return ''; - - $isHttps = strpos($bridge->getURI(), 'https') === 0; - - $uri = $bridge->getURI(); - $name = $bridge->getName(); - $icon = $bridge->getIcon(); - $description = $bridge->getDescription(); - $parameters = $bridge->getParameters(); - $donationUri = $bridge->getDonationURI(); - $maintainer = $bridge->getMaintainer(); - - $donationsAllowed = Configuration::getConfig('admin', 'donations'); - - if(defined('PROXY_URL') && PROXY_BYBRIDGE) { - $parameters['global']['_noproxy'] = array( - 'name' => 'Disable proxy (' . ((defined('PROXY_NAME') && PROXY_NAME) ? PROXY_NAME : PROXY_URL) . ')', - 'type' => 'checkbox' - ); - } - - if(CUSTOM_CACHE_TIMEOUT) { - $parameters['global']['_cache_timeout'] = array( - 'name' => 'Cache timeout in seconds', - 'type' => 'number', - 'defaultValue' => $bridge->getCacheTimeout() - ); - } - - $card = <<<CARD + } + + return $form; + } + + /** + * Get the form body for a bridge + * + * @param string $bridgeName The bridge name + * @param array $formats A list of supported formats + * @param bool $isActive Indicates if a bridge is enabled or not + * @param bool $isHttps Indicates if a bridge uses HTTPS or not + * @param string $parameterName Sets the bridge context for the current form + * @param array $parameters The bridge parameters + * @return string The form body + */ + private static function getForm( + $bridgeName, + $formats, + $isActive = false, + $isHttps = false, + $parameterName = '', + $parameters = [] + ) { + $form = self::getFormHeader($bridgeName, $isHttps, $parameterName); + + if (count($parameters) > 0) { + $form .= '<div class="parameters">'; + + foreach ($parameters as $id => $inputEntry) { + if (!isset($inputEntry['exampleValue'])) { + $inputEntry['exampleValue'] = ''; + } + + if (!isset($inputEntry['defaultValue'])) { + $inputEntry['defaultValue'] = ''; + } + + $idArg = 'arg-' + . urlencode($bridgeName) + . '-' + . urlencode($parameterName) + . '-' + . urlencode($id); + + $form .= '<label for="' + . $idArg + . '">' + . filter_var($inputEntry['name'], FILTER_SANITIZE_FULL_SPECIAL_CHARS) + . '</label>' + . PHP_EOL; + + if (!isset($inputEntry['type']) || $inputEntry['type'] === 'text') { + $form .= self::getTextInput($inputEntry, $idArg, $id); + } elseif ($inputEntry['type'] === 'number') { + $form .= self::getNumberInput($inputEntry, $idArg, $id); + } elseif ($inputEntry['type'] === 'list') { + $form .= self::getListInput($inputEntry, $idArg, $id); + } elseif ($inputEntry['type'] === 'checkbox') { + $form .= self::getCheckboxInput($inputEntry, $idArg, $id); + } + + if (isset($inputEntry['title'])) { + $title_filtered = filter_var($inputEntry['title'], FILTER_SANITIZE_FULL_SPECIAL_CHARS); + $form .= '<i class="info" title="' . $title_filtered . '">i</i>'; + } else { + $form .= '<i class="no-info"></i>'; + } + } + + $form .= '</div>'; + } + + if ($isActive) { + $form .= '<button type="submit" name="format" formtarget="_blank" value="Html">Generate feed</button>'; + } else { + $form .= '<span style="font-weight: bold;">Inactive</span>'; + } + + return $form . '</form>' . PHP_EOL; + } + + /** + * Get input field attributes + * + * @param array $entry The current entry + * @return string The input field attributes + */ + private static function getInputAttributes($entry) + { + $retVal = ''; + + if (isset($entry['required']) && $entry['required'] === true) { + $retVal .= ' required'; + } + + if (isset($entry['pattern'])) { + $retVal .= ' pattern="' . $entry['pattern'] . '"'; + } + + return $retVal; + } + + /** + * Get text input + * + * @param array $entry The current entry + * @param string $id The field ID + * @param string $name The field name + * @return string The text input field + */ + private static function getTextInput($entry, $id, $name) + { + return '<input ' + . self::getInputAttributes($entry) + . ' id="' + . $id + . '" type="text" value="' + . filter_var($entry['defaultValue'], FILTER_SANITIZE_FULL_SPECIAL_CHARS) + . '" placeholder="' + . filter_var($entry['exampleValue'], FILTER_SANITIZE_FULL_SPECIAL_CHARS) + . '" name="' + . $name + . '" />' + . PHP_EOL; + } + + /** + * Get number input + * + * @param array $entry The current entry + * @param string $id The field ID + * @param string $name The field name + * @return string The number input field + */ + private static function getNumberInput($entry, $id, $name) + { + return '<input ' + . self::getInputAttributes($entry) + . ' id="' + . $id + . '" type="number" value="' + . filter_var($entry['defaultValue'], FILTER_SANITIZE_NUMBER_INT) + . '" placeholder="' + . filter_var($entry['exampleValue'], FILTER_SANITIZE_NUMBER_INT) + . '" name="' + . $name + . '" />' + . PHP_EOL; + } + + /** + * Get list input + * + * @param array $entry The current entry + * @param string $id The field ID + * @param string $name The field name + * @return string The list input field + */ + private static function getListInput($entry, $id, $name) + { + if (isset($entry['required']) && $entry['required'] === true) { + Debug::log('The "required" attribute is not supported for lists.'); + unset($entry['required']); + } + + $list = '<select ' + . self::getInputAttributes($entry) + . ' id="' + . $id + . '" name="' + . $name + . '" >'; + + foreach ($entry['values'] as $name => $value) { + if (is_array($value)) { + $list .= '<optgroup label="' . htmlentities($name) . '">'; + foreach ($value as $subname => $subvalue) { + if ( + $entry['defaultValue'] === $subname + || $entry['defaultValue'] === $subvalue + ) { + $list .= '<option value="' + . $subvalue + . '" selected>' + . $subname + . '</option>'; + } else { + $list .= '<option value="' + . $subvalue + . '">' + . $subname + . '</option>'; + } + } + $list .= '</optgroup>'; + } else { + if ( + $entry['defaultValue'] === $name + || $entry['defaultValue'] === $value + ) { + $list .= '<option value="' + . $value + . '" selected>' + . $name + . '</option>'; + } else { + $list .= '<option value="' + . $value + . '">' + . $name + . '</option>'; + } + } + } + + $list .= '</select>'; + + return $list; + } + + /** + * Get checkbox input + * + * @param array $entry The current entry + * @param string $id The field ID + * @param string $name The field name + * @return string The checkbox input field + */ + private static function getCheckboxInput($entry, $id, $name) + { + if (isset($entry['required']) && $entry['required'] === true) { + Debug::log('The "required" attribute is not supported for checkboxes.'); + unset($entry['required']); + } + + return '<input ' + . self::getInputAttributes($entry) + . ' id="' + . $id + . '" type="checkbox" name="' + . $name + . '" ' + . ($entry['defaultValue'] === 'checked' ? 'checked' : '') + . ' />' + . PHP_EOL; + } + + /** + * Gets a single bridge card + * + * @param string $bridgeName The bridge name + * @param array $formats A list of formats + * @param bool $isActive Indicates if the bridge is active or not + * @return string The bridge card + */ + public static function displayBridgeCard($bridgeName, $formats, $isActive = true) + { + $bridgeFac = new \BridgeFactory(); + + $bridge = $bridgeFac->create($bridgeName); + + if ($bridge == false) { + return ''; + } + + $isHttps = strpos($bridge->getURI(), 'https') === 0; + + $uri = $bridge->getURI(); + $name = $bridge->getName(); + $icon = $bridge->getIcon(); + $description = $bridge->getDescription(); + $parameters = $bridge->getParameters(); + $donationUri = $bridge->getDonationURI(); + $maintainer = $bridge->getMaintainer(); + + $donationsAllowed = Configuration::getConfig('admin', 'donations'); + + if (defined('PROXY_URL') && PROXY_BYBRIDGE) { + $parameters['global']['_noproxy'] = [ + 'name' => 'Disable proxy (' . ((defined('PROXY_NAME') && PROXY_NAME) ? PROXY_NAME : PROXY_URL) . ')', + 'type' => 'checkbox' + ]; + } + + if (CUSTOM_CACHE_TIMEOUT) { + $parameters['global']['_cache_timeout'] = [ + 'name' => 'Cache timeout in seconds', + 'type' => 'number', + 'defaultValue' => $bridge->getCacheTimeout() + ]; + } + + $card = <<<CARD <section id="bridge-{$bridgeName}" data-ref="{$name}"> <h2><a href="{$uri}">{$name}</a></h2> <p class="description">{$description}</p> @@ -330,38 +347,39 @@ This bridge is not fetching its content through a secure connection</div>'; <label class="showmore" for="showmore-{$bridgeName}">Show more</label> CARD; - // If we don't have any parameter for the bridge, we print a generic form to load it. - if (count($parameters) === 0) { - $card .= self::getForm($bridgeName, $formats, $isActive, $isHttps); - - // Display form with cache timeout and/or noproxy options (if enabled) when bridge has no parameters - } else if (count($parameters) === 1 && array_key_exists('global', $parameters)) { - $card .= self::getForm($bridgeName, $formats, $isActive, $isHttps, '', $parameters['global']); - } else { - - foreach($parameters as $parameterName => $parameter) { - if(!is_numeric($parameterName) && $parameterName === 'global') - continue; - - if(array_key_exists('global', $parameters)) - $parameter = array_merge($parameter, $parameters['global']); - - if(!is_numeric($parameterName)) - $card .= '<h5>' . $parameterName . '</h5>' . PHP_EOL; - - $card .= self::getForm($bridgeName, $formats, $isActive, $isHttps, $parameterName, $parameter); - } - - } - - $card .= '<label class="showless" for="showmore-' . $bridgeName . '">Show less</label>'; - if($donationUri !== '' && $donationsAllowed) { - $card .= '<p class="maintainer">' . $maintainer . ' ~ <a href="' . $donationUri . '">Donate</a></p>'; - } else { - $card .= '<p class="maintainer">' . $maintainer . '</p>'; - } - $card .= '</section>'; - - return $card; - } + // If we don't have any parameter for the bridge, we print a generic form to load it. + if (count($parameters) === 0) { + $card .= self::getForm($bridgeName, $formats, $isActive, $isHttps); + + // Display form with cache timeout and/or noproxy options (if enabled) when bridge has no parameters + } elseif (count($parameters) === 1 && array_key_exists('global', $parameters)) { + $card .= self::getForm($bridgeName, $formats, $isActive, $isHttps, '', $parameters['global']); + } else { + foreach ($parameters as $parameterName => $parameter) { + if (!is_numeric($parameterName) && $parameterName === 'global') { + continue; + } + + if (array_key_exists('global', $parameters)) { + $parameter = array_merge($parameter, $parameters['global']); + } + + if (!is_numeric($parameterName)) { + $card .= '<h5>' . $parameterName . '</h5>' . PHP_EOL; + } + + $card .= self::getForm($bridgeName, $formats, $isActive, $isHttps, $parameterName, $parameter); + } + } + + $card .= '<label class="showless" for="showmore-' . $bridgeName . '">Show less</label>'; + if ($donationUri !== '' && $donationsAllowed) { + $card .= '<p class="maintainer">' . $maintainer . ' ~ <a href="' . $donationUri . '">Donate</a></p>'; + } else { + $card .= '<p class="maintainer">' . $maintainer . '</p>'; + } + $card .= '</section>'; + + return $card; + } } diff --git a/lib/BridgeFactory.php b/lib/BridgeFactory.php index f435261c..3e355b7a 100644 --- a/lib/BridgeFactory.php +++ b/lib/BridgeFactory.php @@ -1,87 +1,87 @@ <?php -final class BridgeFactory { +final class BridgeFactory +{ + private $folder; + private $bridgeNames = []; + private $whitelist = []; - private $folder; - private $bridgeNames = []; - private $whitelist = []; + public function __construct(string $folder = PATH_LIB_BRIDGES) + { + $this->folder = $folder; - public function __construct(string $folder = PATH_LIB_BRIDGES) - { - $this->folder = $folder; + // create names + foreach (scandir($this->folder) as $file) { + if (preg_match('/^([^.]+)Bridge\.php$/U', $file, $m)) { + $this->bridgeNames[] = $m[1]; + } + } - // create names - foreach(scandir($this->folder) as $file) { - if(preg_match('/^([^.]+)Bridge\.php$/U', $file, $m)) { - $this->bridgeNames[] = $m[1]; - } - } + // create whitelist + if (file_exists(WHITELIST)) { + $contents = trim(file_get_contents(WHITELIST)); + } elseif (file_exists(WHITELIST_DEFAULT)) { + $contents = trim(file_get_contents(WHITELIST_DEFAULT)); + } else { + $contents = ''; + } + if ($contents === '*') { // Whitelist all bridges + $this->whitelist = $this->getBridgeNames(); + } else { + foreach (explode("\n", $contents) as $bridgeName) { + $this->whitelist[] = $this->sanitizeBridgeName($bridgeName); + } + } + } - // create whitelist - if (file_exists(WHITELIST)) { - $contents = trim(file_get_contents(WHITELIST)); - } elseif (file_exists(WHITELIST_DEFAULT)) { - $contents = trim(file_get_contents(WHITELIST_DEFAULT)); - } else { - $contents = ''; - } - if ($contents === '*') { // Whitelist all bridges - $this->whitelist = $this->getBridgeNames(); - } else { - foreach (explode("\n", $contents) as $bridgeName) { - $this->whitelist[] = $this->sanitizeBridgeName($bridgeName); - } - } - } + public function create(string $name): BridgeInterface + { + if (preg_match('/^[A-Z][a-zA-Z0-9-]*$/', $name)) { + $className = sprintf('%sBridge', $this->sanitizeBridgeName($name)); + return new $className(); + } + throw new \InvalidArgumentException('Bridge name invalid!'); + } - public function create(string $name): BridgeInterface - { - if(preg_match('/^[A-Z][a-zA-Z0-9-]*$/', $name)) { - $className = sprintf('%sBridge', $this->sanitizeBridgeName($name)); - return new $className(); - } - throw new \InvalidArgumentException('Bridge name invalid!'); - } + public function getBridgeNames(): array + { + return $this->bridgeNames; + } - public function getBridgeNames(): array - { - return $this->bridgeNames; - } + public function isWhitelisted($name): bool + { + return in_array($this->sanitizeBridgeName($name), $this->whitelist); + } - public function isWhitelisted($name): bool - { - return in_array($this->sanitizeBridgeName($name), $this->whitelist); - } + private function sanitizeBridgeName($name) + { + if (!is_string($name)) { + return null; + } - private function sanitizeBridgeName($name) { + // Trim trailing '.php' if exists + if (preg_match('/(.+)(?:\.php)/', $name, $matches)) { + $name = $matches[1]; + } - if(!is_string($name)) { - return null; - } + // Trim trailing 'Bridge' if exists + if (preg_match('/(.+)(?:Bridge)/i', $name, $matches)) { + $name = $matches[1]; + } - // Trim trailing '.php' if exists - if (preg_match('/(.+)(?:\.php)/', $name, $matches)) { - $name = $matches[1]; - } + // Improve performance for correctly written bridge names + if (in_array($name, $this->getBridgeNames())) { + $index = array_search($name, $this->getBridgeNames()); + return $this->getBridgeNames()[$index]; + } - // Trim trailing 'Bridge' if exists - if (preg_match('/(.+)(?:Bridge)/i', $name, $matches)) { - $name = $matches[1]; - } + // The name is valid if a corresponding bridge file is found on disk + if (in_array(strtolower($name), array_map('strtolower', $this->getBridgeNames()))) { + $index = array_search(strtolower($name), array_map('strtolower', $this->getBridgeNames())); + return $this->getBridgeNames()[$index]; + } - // Improve performance for correctly written bridge names - if (in_array($name, $this->getBridgeNames())) { - $index = array_search($name, $this->getBridgeNames()); - return $this->getBridgeNames()[$index]; - } - - // The name is valid if a corresponding bridge file is found on disk - if (in_array(strtolower($name), array_map('strtolower', $this->getBridgeNames()))) { - $index = array_search(strtolower($name), array_map('strtolower', $this->getBridgeNames())); - return $this->getBridgeNames()[$index]; - } - - Debug::log('Invalid bridge name specified: "' . $name . '"!'); - return null; - } + Debug::log('Invalid bridge name specified: "' . $name . '"!'); + return null; + } } diff --git a/lib/BridgeInterface.php b/lib/BridgeInterface.php index 70625125..6cf949c8 100644 --- a/lib/BridgeInterface.php +++ b/lib/BridgeInterface.php @@ -1,4 +1,5 @@ <?php + /** * This file is part of RSS-Bridge, a PHP project capable of generating RSS and * Atom feeds for websites that don't have one. @@ -6,9 +7,9 @@ * For the full license information, please view the UNLICENSE file distributed * with this source code. * - * @package Core - * @license http://unlicense.org/ UNLICENSE - * @link https://github.com/rss-bridge/rss-bridge + * @package Core + * @license http://unlicense.org/ UNLICENSE + * @link https://github.com/rss-bridge/rss-bridge */ /** @@ -52,93 +53,94 @@ * * **Cache timeout** * The default cache timeout for the bridge. */ -interface BridgeInterface { - /** - * Collects data from the site - */ - public function collectData(); +interface BridgeInterface +{ + /** + * Collects data from the site + */ + public function collectData(); - /** - * Get the user's supplied configuration for the bridge - */ - public function getConfiguration(); + /** + * Get the user's supplied configuration for the bridge + */ + public function getConfiguration(); - /** - * Returns the value for the selected configuration - * - * @param string $input The option name - * @return mixed|null The option value or null if the input is not defined - */ - public function getOption($name); + /** + * Returns the value for the selected configuration + * + * @param string $input The option name + * @return mixed|null The option value or null if the input is not defined + */ + public function getOption($name); - /** - * Returns the description - * - * @return string Description - */ - public function getDescription(); + /** + * Returns the description + * + * @return string Description + */ + public function getDescription(); - /** - * Returns an array of collected items - * - * @return array Associative array of items - */ - public function getItems(); + /** + * Returns an array of collected items + * + * @return array Associative array of items + */ + public function getItems(); - /** - * Returns the bridge maintainer - * - * @return string Bridge maintainer - */ - public function getMaintainer(); + /** + * Returns the bridge maintainer + * + * @return string Bridge maintainer + */ + public function getMaintainer(); - /** - * Returns the bridge name - * - * @return string Bridge name - */ - public function getName(); + /** + * Returns the bridge name + * + * @return string Bridge name + */ + public function getName(); - /** - * Returns the bridge icon - * - * @return string Bridge icon - */ - public function getIcon(); + /** + * Returns the bridge icon + * + * @return string Bridge icon + */ + public function getIcon(); - /** - * Returns the bridge parameters - * - * @return array Bridge parameters - */ - public function getParameters(); + /** + * Returns the bridge parameters + * + * @return array Bridge parameters + */ + public function getParameters(); - /** - * Returns the bridge URI - * - * @return string Bridge URI - */ - public function getURI(); + /** + * Returns the bridge URI + * + * @return string Bridge URI + */ + public function getURI(); - /** - * Returns the bridge Donation URI - * - * @return string Bridge Donation URI - */ - public function getDonationURI(); + /** + * Returns the bridge Donation URI + * + * @return string Bridge Donation URI + */ + public function getDonationURI(); - /** - * Returns the cache timeout - * - * @return int Cache timeout - */ - public function getCacheTimeout(); + /** + * Returns the cache timeout + * + * @return int Cache timeout + */ + public function getCacheTimeout(); - /** - * Returns parameters from given URL or null if URL is not applicable - * - * @param string $url URL to extract parameters from - * @return array|null List of bridge parameters or null if detection failed. - */ - public function detectParameters($url); + /** + * Returns parameters from given URL or null if URL is not applicable + * + * @param string $url URL to extract parameters from + * @return array|null List of bridge parameters or null if detection failed. + */ + public function detectParameters($url); } diff --git a/lib/BridgeList.php b/lib/BridgeList.php index c5082e57..921dfe50 100644 --- a/lib/BridgeList.php +++ b/lib/BridgeList.php @@ -1,4 +1,5 @@ <?php + /** * This file is part of RSS-Bridge, a PHP project capable of generating RSS and * Atom feeds for websites that don't have one. @@ -6,9 +7,9 @@ * For the full license information, please view the UNLICENSE file distributed * with this source code. * - * @package Core - * @license http://unlicense.org/ UNLICENSE - * @link https://github.com/rss-bridge/rss-bridge + * @package Core + * @license http://unlicense.org/ UNLICENSE + * @link https://github.com/rss-bridge/rss-bridge */ /** @@ -19,14 +20,16 @@ * * @todo Return error if a caller creates an object of this class. */ -final class BridgeList { - /** - * Get the document head - * - * @return string The document head - */ - private static function getHead() { - return <<<EOD +final class BridgeList +{ + /** + * Get the document head + * + * @return string The document head + */ + private static function getHead() + { + return <<<EOD <head> <meta charset="utf-8"> <meta name="viewport" content="width=device-width, initial-scale=1.0" /> @@ -45,91 +48,87 @@ final class BridgeList { </noscript> </head> EOD; - } - - /** - * Get the document body for all bridge cards - * - * @param bool $showInactive Inactive bridges are visible on the home page if - * enabled. - * @param int $totalBridges (ref) Returns the total number of bridges. - * @param int $totalActiveBridges (ref) Returns the number of active bridges. - * @return string The document body for all bridge cards. - */ - private static function getBridges($showInactive, &$totalBridges, &$totalActiveBridges) { - - $body = ''; - $totalActiveBridges = 0; - $inactiveBridges = ''; - - $bridgeFac = new \BridgeFactory(); - $bridgeList = $bridgeFac->getBridgeNames(); - - $formatFac = new FormatFactory(); - $formats = $formatFac->getFormatNames(); - - $totalBridges = count($bridgeList); - - foreach($bridgeList as $bridgeName) { - - if($bridgeFac->isWhitelisted($bridgeName)) { - - $body .= BridgeCard::displayBridgeCard($bridgeName, $formats); - $totalActiveBridges++; - - } elseif($showInactive) { - - // inactive bridges - $inactiveBridges .= BridgeCard::displayBridgeCard($bridgeName, $formats, false) . PHP_EOL; - - } - - } - - $body .= $inactiveBridges; - - return $body; - } - - /** - * Get the document header - * - * @return string The document header - */ - private static function getHeader() { - $warning = ''; - - if(Debug::isEnabled()) { - if(!Debug::isSecure()) { - $warning .= <<<EOD + } + + /** + * Get the document body for all bridge cards + * + * @param bool $showInactive Inactive bridges are visible on the home page if + * enabled. + * @param int $totalBridges (ref) Returns the total number of bridges. + * @param int $totalActiveBridges (ref) Returns the number of active bridges. + * @return string The document body for all bridge cards. + */ + private static function getBridges($showInactive, &$totalBridges, &$totalActiveBridges) + { + $body = ''; + $totalActiveBridges = 0; + $inactiveBridges = ''; + + $bridgeFac = new \BridgeFactory(); + $bridgeList = $bridgeFac->getBridgeNames(); + + $formatFac = new FormatFactory(); + $formats = $formatFac->getFormatNames(); + + $totalBridges = count($bridgeList); + + foreach ($bridgeList as $bridgeName) { + if ($bridgeFac->isWhitelisted($bridgeName)) { + $body .= BridgeCard::displayBridgeCard($bridgeName, $formats); + $totalActiveBridges++; + } elseif ($showInactive) { + // inactive bridges + $inactiveBridges .= BridgeCard::displayBridgeCard($bridgeName, $formats, false) . PHP_EOL; + } + } + + $body .= $inactiveBridges; + + return $body; + } + + /** + * Get the document header + * + * @return string The document header + */ + private static function getHeader() + { + $warning = ''; + + if (Debug::isEnabled()) { + if (!Debug::isSecure()) { + $warning .= <<<EOD <section class="critical-warning">Warning : Debug mode is active from any location, make sure only you can access RSS-Bridge.</section> EOD; - } else { - $warning .= <<<EOD + } else { + $warning .= <<<EOD <section class="warning">Warning : Debug mode is active from your IP address, your requests will bypass the cache.</section> EOD; - } - } + } + } - return <<<EOD + return <<<EOD <header> <div class="logo"></div> {$warning} </header> EOD; - } - - /** - * Get the searchbar - * - * @return string The searchbar - */ - private static function getSearchbar() { - $query = filter_input(INPUT_GET, 'q', FILTER_SANITIZE_SPECIAL_CHARS); - - return <<<EOD + } + + /** + * Get the searchbar + * + * @return string The searchbar + */ + private static function getSearchbar() + { + $query = filter_input(INPUT_GET, 'q', FILTER_SANITIZE_SPECIAL_CHARS); + + return <<<EOD <section class="searchbar"> <h3>Search</h3> <input type="text" name="searchfield" @@ -137,46 +136,45 @@ EOD; onchange="search()" onkeyup="search()" value="{$query}"> </section> EOD; - } - - /** - * Get the document footer - * - * @param int $totalBridges The total number of bridges, shown in the footer - * @param int $totalActiveBridges The total number of active bridges, shown - * in the footer. - * @param bool $showInactive Sets the 'Show active'/'Show inactive' text in - * the footer. - * @return string The document footer - */ - private static function getFooter($totalBridges, $totalActiveBridges, $showInactive) { - $version = Configuration::getVersion(); - - $email = Configuration::getConfig('admin', 'email'); - $admininfo = ''; - if (!empty($email)) { - $admininfo = <<<EOD + } + + /** + * Get the document footer + * + * @param int $totalBridges The total number of bridges, shown in the footer + * @param int $totalActiveBridges The total number of active bridges, shown + * in the footer. + * @param bool $showInactive Sets the 'Show active'/'Show inactive' text in + * the footer. + * @return string The document footer + */ + private static function getFooter($totalBridges, $totalActiveBridges, $showInactive) + { + $version = Configuration::getVersion(); + + $email = Configuration::getConfig('admin', 'email'); + $admininfo = ''; + if (!empty($email)) { + $admininfo = <<<EOD <br /> <span> You may email the administrator of this RSS-Bridge instance at <a href="mailto:{$email}">{$email}</a> </span> EOD; - } + } - $inactive = ''; - - if($totalActiveBridges !== $totalBridges) { - - if(!$showInactive) { - $inactive = '<a href="?show_inactive=1"><button class="small">Show inactive bridges</button></a><br>'; - } else { - $inactive = '<a href="?show_inactive=0"><button class="small">Hide inactive bridges</button></a><br>'; - } + $inactive = ''; - } + if ($totalActiveBridges !== $totalBridges) { + if (!$showInactive) { + $inactive = '<a href="?show_inactive=1"><button class="small">Show inactive bridges</button></a><br>'; + } else { + $inactive = '<a href="?show_inactive=0"><button class="small">Hide inactive bridges</button></a><br>'; + } + } - return <<<EOD + return <<<EOD <section class="footer"> <a href="https://github.com/rss-bridge/rss-bridge">RSS-Bridge ~ Public Domain</a><br> <p class="version">{$version}</p> @@ -185,28 +183,27 @@ EOD; {$admininfo} </section> EOD; - } - - /** - * Create the entire home page - * - * @param bool $showInactive Inactive bridges are displayed on the home page, - * if enabled. - * @return string The home page - */ - public static function create($showInactive = true) { - - $totalBridges = 0; - $totalActiveBridges = 0; - - return '<!DOCTYPE html><html lang="en">' - . BridgeList::getHead() - . '<body onload="search()">' - . BridgeList::getHeader() - . BridgeList::getSearchbar() - . BridgeList::getBridges($showInactive, $totalBridges, $totalActiveBridges) - . BridgeList::getFooter($totalBridges, $totalActiveBridges, $showInactive) - . '</body></html>'; - - } + } + + /** + * Create the entire home page + * + * @param bool $showInactive Inactive bridges are displayed on the home page, + * if enabled. + * @return string The home page + */ + public static function create($showInactive = true) + { + $totalBridges = 0; + $totalActiveBridges = 0; + + return '<!DOCTYPE html><html lang="en">' + . BridgeList::getHead() + . '<body onload="search()">' + . BridgeList::getHeader() + . BridgeList::getSearchbar() + . BridgeList::getBridges($showInactive, $totalBridges, $totalActiveBridges) + . BridgeList::getFooter($totalBridges, $totalActiveBridges, $showInactive) + . '</body></html>'; + } } diff --git a/lib/CacheFactory.php b/lib/CacheFactory.php index 451f625f..ba1c3cb9 100644 --- a/lib/CacheFactory.php +++ b/lib/CacheFactory.php @@ -1,4 +1,5 @@ <?php + /** * This file is part of RSS-Bridge, a PHP project capable of generating RSS and * Atom feeds for websites that don't have one. @@ -6,62 +7,62 @@ * For the full license information, please view the UNLICENSE file distributed * with this source code. * - * @package Core - * @license http://unlicense.org/ UNLICENSE - * @link https://github.com/rss-bridge/rss-bridge + * @package Core + * @license http://unlicense.org/ UNLICENSE + * @link https://github.com/rss-bridge/rss-bridge */ class CacheFactory { - private $folder; - private $cacheNames; + private $folder; + private $cacheNames; - public function __construct(string $folder = PATH_LIB_CACHES) - { - $this->folder = $folder; - // create cache names - foreach(scandir($this->folder) as $file) { - if(preg_match('/^([^.]+)Cache\.php$/U', $file, $m)) { - $this->cacheNames[] = $m[1]; - } - } - } + public function __construct(string $folder = PATH_LIB_CACHES) + { + $this->folder = $folder; + // create cache names + foreach (scandir($this->folder) as $file) { + if (preg_match('/^([^.]+)Cache\.php$/U', $file, $m)) { + $this->cacheNames[] = $m[1]; + } + } + } - /** - * @param string $name The name of the cache e.g. "File", "Memcached" or "SQLite" - */ - public function create(string $name): CacheInterface - { - $name = $this->sanitizeCacheName($name) . 'Cache'; + /** + * @param string $name The name of the cache e.g. "File", "Memcached" or "SQLite" + */ + public function create(string $name): CacheInterface + { + $name = $this->sanitizeCacheName($name) . 'Cache'; - if(! preg_match('/^[A-Z][a-zA-Z0-9-]*$/', $name)) { - throw new \InvalidArgumentException('Cache name invalid!'); - } + if (! preg_match('/^[A-Z][a-zA-Z0-9-]*$/', $name)) { + throw new \InvalidArgumentException('Cache name invalid!'); + } - $filePath = $this->folder . $name . '.php'; - if(!file_exists($filePath)) { - throw new \Exception('Invalid cache'); - } - $className = '\\' . $name; - return new $className(); - } + $filePath = $this->folder . $name . '.php'; + if (!file_exists($filePath)) { + throw new \Exception('Invalid cache'); + } + $className = '\\' . $name; + return new $className(); + } - protected function sanitizeCacheName(string $name) - { - // Trim trailing '.php' if exists - if (preg_match('/(.+)(?:\.php)/', $name, $matches)) { - $name = $matches[1]; - } + protected function sanitizeCacheName(string $name) + { + // Trim trailing '.php' if exists + if (preg_match('/(.+)(?:\.php)/', $name, $matches)) { + $name = $matches[1]; + } - // Trim trailing 'Cache' if exists - if (preg_match('/(.+)(?:Cache)$/i', $name, $matches)) { - $name = $matches[1]; - } + // Trim trailing 'Cache' if exists + if (preg_match('/(.+)(?:Cache)$/i', $name, $matches)) { + $name = $matches[1]; + } - if(in_array(strtolower($name), array_map('strtolower', $this->cacheNames))) { - $index = array_search(strtolower($name), array_map('strtolower', $this->cacheNames)); - return $this->cacheNames[$index]; - } - return null; - } + if (in_array(strtolower($name), array_map('strtolower', $this->cacheNames))) { + $index = array_search(strtolower($name), array_map('strtolower', $this->cacheNames)); + return $this->cacheNames[$index]; + } + return null; + } } diff --git a/lib/CacheInterface.php b/lib/CacheInterface.php index 091c5f02..67cee681 100644 --- a/lib/CacheInterface.php +++ b/lib/CacheInterface.php @@ -1,4 +1,5 @@ <?php + /** * This file is part of RSS-Bridge, a PHP project capable of generating RSS and * Atom feeds for websites that don't have one. @@ -6,61 +7,62 @@ * For the full license information, please view the UNLICENSE file distributed * with this source code. * - * @package Core - * @license http://unlicense.org/ UNLICENSE - * @link https://github.com/rss-bridge/rss-bridge + * @package Core + * @license http://unlicense.org/ UNLICENSE + * @link https://github.com/rss-bridge/rss-bridge */ /** * The cache interface */ -interface CacheInterface { - /** - * Set scope of the current cache - * - * If $scope is an empty string, the cache is set to a global context. - * - * @param string $scope The scope the data is related to - */ - public function setScope($scope); +interface CacheInterface +{ + /** + * Set scope of the current cache + * + * If $scope is an empty string, the cache is set to a global context. + * + * @param string $scope The scope the data is related to + */ + public function setScope($scope); - /** - * Set key to assign the current data - * - * Since $key can be anything, the cache implementation must ensure to - * assign the related data reliably; most commonly by serializing and - * hashing the key in an appropriate way. - * - * @param array $key The key the data is related to - */ - public function setKey($key); + /** + * Set key to assign the current data + * + * Since $key can be anything, the cache implementation must ensure to + * assign the related data reliably; most commonly by serializing and + * hashing the key in an appropriate way. + * + * @param array $key The key the data is related to + */ + public function setKey($key); - /** - * Loads data from cache - * - * @return mixed The cached data or null - */ - public function loadData(); + /** + * Loads data from cache + * + * @return mixed The cached data or null + */ + public function loadData(); - /** - * Stores data to the cache - * - * @param mixed $data The data to store - * @return self The cache object - */ - public function saveData($data); + /** + * Stores data to the cache + * + * @param mixed $data The data to store + * @return self The cache object + */ + public function saveData($data); - /** - * Returns the timestamp for the curent cache data - * - * @return int Timestamp or null - */ - public function getTime(); + /** + * Returns the timestamp for the curent cache data + * + * @return int Timestamp or null + */ + public function getTime(); - /** - * Removes any data that is older than the specified age from cache - * - * @param int $seconds The cache age in seconds - */ - public function purgeCache($seconds); + /** + * Removes any data that is older than the specified age from cache + * + * @param int $seconds The cache age in seconds + */ + public function purgeCache($seconds); } diff --git a/lib/Configuration.php b/lib/Configuration.php index e97e7d6b..ce01b7df 100644 --- a/lib/Configuration.php +++ b/lib/Configuration.php @@ -1,4 +1,5 @@ <?php + /** * This file is part of RSS-Bridge, a PHP project capable of generating RSS and * Atom feeds for websites that don't have one. @@ -6,9 +7,9 @@ * For the full license information, please view the UNLICENSE file distributed * with this source code. * - * @package Core - * @license http://unlicense.org/ UNLICENSE - * @link https://github.com/rss-bridge/rss-bridge + * @package Core + * @license http://unlicense.org/ UNLICENSE + * @link https://github.com/rss-bridge/rss-bridge */ /** @@ -16,294 +17,317 @@ * * This class implements a configuration module for RSS-Bridge. */ -final class Configuration { - - /** - * Holds the current release version of RSS-Bridge. - * - * Do not access this property directly! - * Use {@see Configuration::getVersion()} instead. - * - * @var string - * - * @todo Replace this property by a constant. - */ - public static $VERSION = 'dev.2022-06-14'; - - /** - * Holds the configuration data. - * - * Do not access this property directly! - * Use {@see Configuration::getConfig()} instead. - * - * @var array|null - */ - private static $config = null; - - /** - * Throw an exception when trying to create a new instance of this class. - * - * @throws \LogicException if called. - */ - public function __construct(){ - throw new \LogicException('Can\'t create object of this class!'); - } - - /** - * Verifies the current installation of RSS-Bridge and PHP. - * - * Returns an error message and aborts execution if the installation does - * not satisfy the requirements of RSS-Bridge. - * - * **Requirements** - * - PHP 7.1.0 or higher - * - `openssl` extension - * - `libxml` extension - * - `mbstring` extension - * - `simplexml` extension - * - `curl` extension - * - `json` extension - * - The cache folder specified by {@see PATH_CACHE} requires write permission - * - The whitelist file specified by {@see WHITELIST} requires write permission - * - * @link http://php.net/supported-versions.php PHP Supported Versions - * @link http://php.net/manual/en/book.openssl.php OpenSSL - * @link http://php.net/manual/en/book.libxml.php libxml - * @link http://php.net/manual/en/book.mbstring.php Multibyte String (mbstring) - * @link http://php.net/manual/en/book.simplexml.php SimpleXML - * @link http://php.net/manual/en/book.curl.php Client URL Library (curl) - * @link http://php.net/manual/en/book.json.php JavaScript Object Notation (json) - * - * @return void - */ - public static function verifyInstallation() { - - // Check PHP version - if(version_compare(PHP_VERSION, '7.4.0') === -1) { - self::reportError('RSS-Bridge requires at least PHP version 7.4.0!'); - } - // extensions check - if(!extension_loaded('openssl')) - self::reportError('"openssl" extension not loaded. Please check "php.ini"'); - - if(!extension_loaded('libxml')) - self::reportError('"libxml" extension not loaded. Please check "php.ini"'); - - if(!extension_loaded('mbstring')) - self::reportError('"mbstring" extension not loaded. Please check "php.ini"'); - - if(!extension_loaded('simplexml')) - self::reportError('"simplexml" extension not loaded. Please check "php.ini"'); - - // Allow RSS-Bridge to run without curl module in CLI mode without root certificates - if(!extension_loaded('curl') && !(php_sapi_name() === 'cli' && empty(ini_get('curl.cainfo')))) - self::reportError('"curl" extension not loaded. Please check "php.ini"'); - - if(!extension_loaded('json')) - self::reportError('"json" extension not loaded. Please check "php.ini"'); - - } - - /** - * Loads the configuration from disk and checks if the parameters are valid. - * - * Returns an error message and aborts execution if the configuration is invalid. - * - * The RSS-Bridge configuration is split into two files: - * - {@see FILE_CONFIG_DEFAULT} The default configuration file that ships - * with every release of RSS-Bridge (do not modify this file!). - * - {@see FILE_CONFIG} The local configuration file that can be modified - * by server administrators. - * - * RSS-Bridge will first load {@see FILE_CONFIG_DEFAULT} into memory and then - * replace parameters with the contents of {@see FILE_CONFIG}. That way new - * parameters are automatically initialized with default values and custom - * configurations can be reduced to the minimum set of parametes necessary - * (only the ones that changed). - * - * The configuration files must be placed in the root folder of RSS-Bridge - * (next to `index.php`). - * - * _Notice_: The configuration is stored in {@see Configuration::$config}. - * - * @return void - */ - public static function loadConfiguration() { - - if(!file_exists(FILE_CONFIG_DEFAULT)) - self::reportError('The default configuration file is missing at ' . FILE_CONFIG_DEFAULT); - - Configuration::$config = parse_ini_file(FILE_CONFIG_DEFAULT, true, INI_SCANNER_TYPED); - if(!Configuration::$config) - self::reportError('Error parsing ' . FILE_CONFIG_DEFAULT); - - if(file_exists(FILE_CONFIG)) { - // Replace default configuration with custom settings - foreach(parse_ini_file(FILE_CONFIG, true, INI_SCANNER_TYPED) as $header => $section) { - foreach($section as $key => $value) { - Configuration::$config[$header][$key] = $value; - } - } - } - - foreach (getenv() as $envkey => $value) { - // Replace all settings with their respective environment variable if available - $keyArray = explode('_', $envkey); - if($keyArray[0] === 'RSSBRIDGE') { - $header = strtolower($keyArray[1]); - $key = strtolower($keyArray[2]); - if($value === 'true' || $value === 'false') { - $value = filter_var($value, FILTER_VALIDATE_BOOLEAN); - } - Configuration::$config[$header][$key] = $value; - } - } - - if(!is_string(self::getConfig('system', 'timezone')) - || !in_array(self::getConfig('system', 'timezone'), timezone_identifiers_list(DateTimeZone::ALL_WITH_BC))) - self::reportConfigurationError('system', 'timezone'); - - date_default_timezone_set(self::getConfig('system', 'timezone')); - - if(!is_string(self::getConfig('proxy', 'url'))) - self::reportConfigurationError('proxy', 'url', 'Is not a valid string'); - - if(!empty(self::getConfig('proxy', 'url'))) { - /** URL of the proxy server */ - define('PROXY_URL', self::getConfig('proxy', 'url')); - } - - if(!is_bool(self::getConfig('proxy', 'by_bridge'))) - self::reportConfigurationError('proxy', 'by_bridge', 'Is not a valid Boolean'); - - /** True if proxy usage can be enabled selectively for each bridge */ - define('PROXY_BYBRIDGE', self::getConfig('proxy', 'by_bridge')); - - if(!is_string(self::getConfig('proxy', 'name'))) - self::reportConfigurationError('proxy', 'name', 'Is not a valid string'); - - /** Name of the proxy server */ - define('PROXY_NAME', self::getConfig('proxy', 'name')); - - if(!is_string(self::getConfig('cache', 'type'))) - self::reportConfigurationError('cache', 'type', 'Is not a valid string'); - - if(!is_bool(self::getConfig('cache', 'custom_timeout'))) - self::reportConfigurationError('cache', 'custom_timeout', 'Is not a valid Boolean'); - - /** True if the cache timeout can be specified by the user */ - define('CUSTOM_CACHE_TIMEOUT', self::getConfig('cache', 'custom_timeout')); - - if(!is_bool(self::getConfig('authentication', 'enable'))) - self::reportConfigurationError('authentication', 'enable', 'Is not a valid Boolean'); - - if(!is_string(self::getConfig('authentication', 'username'))) - self::reportConfigurationError('authentication', 'username', 'Is not a valid string'); - - if(!is_string(self::getConfig('authentication', 'password'))) - self::reportConfigurationError('authentication', 'password', 'Is not a valid string'); - - if(!empty(self::getConfig('admin', 'email')) - && !filter_var(self::getConfig('admin', 'email'), FILTER_VALIDATE_EMAIL)) - self::reportConfigurationError('admin', 'email', 'Is not a valid email address'); - - if(!is_bool(self::getConfig('admin', 'donations'))) - self::reportConfigurationError('admin', 'donations', 'Is not a valid Boolean'); - - if(!is_string(self::getConfig('error', 'output'))) - self::reportConfigurationError('error', 'output', 'Is not a valid String'); - - if(!is_numeric(self::getConfig('error', 'report_limit')) - || self::getConfig('error', 'report_limit') < 1) - self::reportConfigurationError('admin', 'report_limit', 'Value is invalid'); - - } - - /** - * Returns the value of a parameter identified by section and key. - * - * @param string $section The section name. - * @param string $key The property name (key). - * @return mixed|null The parameter value. - */ - public static function getConfig($section, $key) { - if(array_key_exists($section, self::$config) && array_key_exists($key, self::$config[$section])) { - return self::$config[$section][$key]; - } - - return null; - } - - /** - * Returns the current version string of RSS-Bridge. - * - * This function returns the contents of {@see Configuration::$VERSION} for - * regular installations and the git branch name and commit id for instances - * running in a git environment. - * - * @return string The version string. - */ - public static function getVersion() { - - $headFile = PATH_ROOT . '.git/HEAD'; - - // '@' is used to mute open_basedir warning - if(@is_readable($headFile)) { - - $revisionHashFile = '.git/' . substr(file_get_contents($headFile), 5, -1); - $parts = explode('/', $revisionHashFile); - - if(isset($parts[3])) { - $branchName = $parts[3]; - if(file_exists($revisionHashFile)) { - return 'git.' . $branchName . '.' . substr(file_get_contents($revisionHashFile), 0, 7); - } - } - } - - return Configuration::$VERSION; - - } - - /** - * Reports an configuration error for the specified section and key to the - * user and ends execution - * - * @param string $section The section name - * @param string $key The configuration key - * @param string $message An optional message to the user - * - * @return void - */ - private static function reportConfigurationError($section, $key, $message = '') { - - $report = "Parameter [{$section}] => \"{$key}\" is invalid!" . PHP_EOL; - - if(file_exists(FILE_CONFIG)) { - $report .= 'Please check your configuration file at ' . FILE_CONFIG . PHP_EOL; - } elseif(!file_exists(FILE_CONFIG_DEFAULT)) { - $report .= 'The default configuration file is missing at ' . FILE_CONFIG_DEFAULT . PHP_EOL; - } else { - $report .= 'The default configuration file is broken.' . PHP_EOL - . 'Restore the original file from ' . REPOSITORY . PHP_EOL; - } - - $report .= $message; - self::reportError($report); - - } - - /** - * Reports an error message to the user and ends execution - * - * @param string $message The error message - * - * @return void - */ - private static function reportError($message) { - - header('Content-Type: text/plain', true, 500); - die('Configuration error' . PHP_EOL . $message); - - } +final class Configuration +{ + /** + * Holds the current release version of RSS-Bridge. + * + * Do not access this property directly! + * Use {@see Configuration::getVersion()} instead. + * + * @var string + * + * @todo Replace this property by a constant. + */ + public static $VERSION = 'dev.2022-06-14'; + + /** + * Holds the configuration data. + * + * Do not access this property directly! + * Use {@see Configuration::getConfig()} instead. + * + * @var array|null + */ + private static $config = null; + + /** + * Throw an exception when trying to create a new instance of this class. + * + * @throws \LogicException if called. + */ + public function __construct() + { + throw new \LogicException('Can\'t create object of this class!'); + } + + /** + * Verifies the current installation of RSS-Bridge and PHP. + * + * Returns an error message and aborts execution if the installation does + * not satisfy the requirements of RSS-Bridge. + * + * **Requirements** + * - PHP 7.1.0 or higher + * - `openssl` extension + * - `libxml` extension + * - `mbstring` extension + * - `simplexml` extension + * - `curl` extension + * - `json` extension + * - The cache folder specified by {@see PATH_CACHE} requires write permission + * - The whitelist file specified by {@see WHITELIST} requires write permission + * + * @link http://php.net/supported-versions.php PHP Supported Versions + * @link http://php.net/manual/en/book.openssl.php OpenSSL + * @link http://php.net/manual/en/book.libxml.php libxml + * @link http://php.net/manual/en/book.mbstring.php Multibyte String (mbstring) + * @link http://php.net/manual/en/book.simplexml.php SimpleXML + * @link http://php.net/manual/en/book.curl.php Client URL Library (curl) + * @link http://php.net/manual/en/book.json.php JavaScript Object Notation (json) + * + * @return void + */ + public static function verifyInstallation() + { + // Check PHP version + if (version_compare(PHP_VERSION, '7.4.0') === -1) { + self::reportError('RSS-Bridge requires at least PHP version 7.4.0!'); + } + // extensions check + if (!extension_loaded('openssl')) { + self::reportError('"openssl" extension not loaded. Please check "php.ini"'); + } + + if (!extension_loaded('libxml')) { + self::reportError('"libxml" extension not loaded. Please check "php.ini"'); + } + + if (!extension_loaded('mbstring')) { + self::reportError('"mbstring" extension not loaded. Please check "php.ini"'); + } + + if (!extension_loaded('simplexml')) { + self::reportError('"simplexml" extension not loaded. Please check "php.ini"'); + } + + // Allow RSS-Bridge to run without curl module in CLI mode without root certificates + if (!extension_loaded('curl') && !(php_sapi_name() === 'cli' && empty(ini_get('curl.cainfo')))) { + self::reportError('"curl" extension not loaded. Please check "php.ini"'); + } + + if (!extension_loaded('json')) { + self::reportError('"json" extension not loaded. Please check "php.ini"'); + } + } + + /** + * Loads the configuration from disk and checks if the parameters are valid. + * + * Returns an error message and aborts execution if the configuration is invalid. + * + * The RSS-Bridge configuration is split into two files: + * - {@see FILE_CONFIG_DEFAULT} The default configuration file that ships + * with every release of RSS-Bridge (do not modify this file!). + * - {@see FILE_CONFIG} The local configuration file that can be modified + * by server administrators. + * + * RSS-Bridge will first load {@see FILE_CONFIG_DEFAULT} into memory and then + * replace parameters with the contents of {@see FILE_CONFIG}. That way new + * parameters are automatically initialized with default values and custom + * configurations can be reduced to the minimum set of parametes necessary + * (only the ones that changed). + * + * The configuration files must be placed in the root folder of RSS-Bridge + * (next to `index.php`). + * + * _Notice_: The configuration is stored in {@see Configuration::$config}. + * + * @return void + */ + public static function loadConfiguration() + { + if (!file_exists(FILE_CONFIG_DEFAULT)) { + self::reportError('The default configuration file is missing at ' . FILE_CONFIG_DEFAULT); + } + + Configuration::$config = parse_ini_file(FILE_CONFIG_DEFAULT, true, INI_SCANNER_TYPED); + if (!Configuration::$config) { + self::reportError('Error parsing ' . FILE_CONFIG_DEFAULT); + } + + if (file_exists(FILE_CONFIG)) { + // Replace default configuration with custom settings + foreach (parse_ini_file(FILE_CONFIG, true, INI_SCANNER_TYPED) as $header => $section) { + foreach ($section as $key => $value) { + Configuration::$config[$header][$key] = $value; + } + } + } + + foreach (getenv() as $envkey => $value) { + // Replace all settings with their respective environment variable if available + $keyArray = explode('_', $envkey); + if ($keyArray[0] === 'RSSBRIDGE') { + $header = strtolower($keyArray[1]); + $key = strtolower($keyArray[2]); + if ($value === 'true' || $value === 'false') { + $value = filter_var($value, FILTER_VALIDATE_BOOLEAN); + } + Configuration::$config[$header][$key] = $value; + } + } + + if ( + !is_string(self::getConfig('system', 'timezone')) + || !in_array(self::getConfig('system', 'timezone'), timezone_identifiers_list(DateTimeZone::ALL_WITH_BC)) + ) { + self::reportConfigurationError('system', 'timezone'); + } + + date_default_timezone_set(self::getConfig('system', 'timezone')); + + if (!is_string(self::getConfig('proxy', 'url'))) { + self::reportConfigurationError('proxy', 'url', 'Is not a valid string'); + } + + if (!empty(self::getConfig('proxy', 'url'))) { + /** URL of the proxy server */ + define('PROXY_URL', self::getConfig('proxy', 'url')); + } + + if (!is_bool(self::getConfig('proxy', 'by_bridge'))) { + self::reportConfigurationError('proxy', 'by_bridge', 'Is not a valid Boolean'); + } + + /** True if proxy usage can be enabled selectively for each bridge */ + define('PROXY_BYBRIDGE', self::getConfig('proxy', 'by_bridge')); + + if (!is_string(self::getConfig('proxy', 'name'))) { + self::reportConfigurationError('proxy', 'name', 'Is not a valid string'); + } + + /** Name of the proxy server */ + define('PROXY_NAME', self::getConfig('proxy', 'name')); + + if (!is_string(self::getConfig('cache', 'type'))) { + self::reportConfigurationError('cache', 'type', 'Is not a valid string'); + } + + if (!is_bool(self::getConfig('cache', 'custom_timeout'))) { + self::reportConfigurationError('cache', 'custom_timeout', 'Is not a valid Boolean'); + } + + /** True if the cache timeout can be specified by the user */ + define('CUSTOM_CACHE_TIMEOUT', self::getConfig('cache', 'custom_timeout')); + + if (!is_bool(self::getConfig('authentication', 'enable'))) { + self::reportConfigurationError('authentication', 'enable', 'Is not a valid Boolean'); + } + + if (!is_string(self::getConfig('authentication', 'username'))) { + self::reportConfigurationError('authentication', 'username', 'Is not a valid string'); + } + + if (!is_string(self::getConfig('authentication', 'password'))) { + self::reportConfigurationError('authentication', 'password', 'Is not a valid string'); + } + + if ( + !empty(self::getConfig('admin', 'email')) + && !filter_var(self::getConfig('admin', 'email'), FILTER_VALIDATE_EMAIL) + ) { + self::reportConfigurationError('admin', 'email', 'Is not a valid email address'); + } + + if (!is_bool(self::getConfig('admin', 'donations'))) { + self::reportConfigurationError('admin', 'donations', 'Is not a valid Boolean'); + } + + if (!is_string(self::getConfig('error', 'output'))) { + self::reportConfigurationError('error', 'output', 'Is not a valid String'); + } + + if ( + !is_numeric(self::getConfig('error', 'report_limit')) + || self::getConfig('error', 'report_limit') < 1 + ) { + self::reportConfigurationError('admin', 'report_limit', 'Value is invalid'); + } + } + + /** + * Returns the value of a parameter identified by section and key. + * + * @param string $section The section name. + * @param string $key The property name (key). + * @return mixed|null The parameter value. + */ + public static function getConfig($section, $key) + { + if (array_key_exists($section, self::$config) && array_key_exists($key, self::$config[$section])) { + return self::$config[$section][$key]; + } + + return null; + } + + /** + * Returns the current version string of RSS-Bridge. + * + * This function returns the contents of {@see Configuration::$VERSION} for + * regular installations and the git branch name and commit id for instances + * running in a git environment. + * + * @return string The version string. + */ + public static function getVersion() + { + $headFile = PATH_ROOT . '.git/HEAD'; + + // '@' is used to mute open_basedir warning + if (@is_readable($headFile)) { + $revisionHashFile = '.git/' . substr(file_get_contents($headFile), 5, -1); + $parts = explode('/', $revisionHashFile); + + if (isset($parts[3])) { + $branchName = $parts[3]; + if (file_exists($revisionHashFile)) { + return 'git.' . $branchName . '.' . substr(file_get_contents($revisionHashFile), 0, 7); + } + } + } + + return Configuration::$VERSION; + } + + /** + * Reports an configuration error for the specified section and key to the + * user and ends execution + * + * @param string $section The section name + * @param string $key The configuration key + * @param string $message An optional message to the user + * + * @return void + */ + private static function reportConfigurationError($section, $key, $message = '') + { + $report = "Parameter [{$section}] => \"{$key}\" is invalid!" . PHP_EOL; + + if (file_exists(FILE_CONFIG)) { + $report .= 'Please check your configuration file at ' . FILE_CONFIG . PHP_EOL; + } elseif (!file_exists(FILE_CONFIG_DEFAULT)) { + $report .= 'The default configuration file is missing at ' . FILE_CONFIG_DEFAULT . PHP_EOL; + } else { + $report .= 'The default configuration file is broken.' . PHP_EOL + . 'Restore the original file from ' . REPOSITORY . PHP_EOL; + } + + $report .= $message; + self::reportError($report); + } + + /** + * Reports an error message to the user and ends execution + * + * @param string $message The error message + * + * @return void + */ + private static function reportError($message) + { + header('Content-Type: text/plain', true, 500); + die('Configuration error' . PHP_EOL . $message); + } } diff --git a/lib/Debug.php b/lib/Debug.php index f912fb3b..75bf5f33 100644 --- a/lib/Debug.php +++ b/lib/Debug.php @@ -1,4 +1,5 @@ <?php + /** * This file is part of RSS-Bridge, a PHP project capable of generating RSS and * Atom feeds for websites that don't have one. @@ -6,9 +7,9 @@ * For the full license information, please view the UNLICENSE file distributed * with this source code. * - * @package Core - * @license http://unlicense.org/ UNLICENSE - * @link https://github.com/rss-bridge/rss-bridge + * @package Core + * @license http://unlicense.org/ UNLICENSE + * @link https://github.com/rss-bridge/rss-bridge */ /** @@ -30,92 +31,93 @@ * Warning: In debug mode your server may display sensitive information! For * security reasons it is recommended to whitelist only specific IP addresses. */ -class Debug { - - /** - * Indicates if debug mode is enabled. - * - * Do not access this property directly! - * Use {@see Debug::isEnabled()} instead. - * - * @var bool - */ - private static $enabled = false; - - /** - * Indicates if debug mode is secure. - * - * Do not access this property directly! - * Use {@see Debug::isSecure()} instead. - * - * @var bool - */ - private static $secure = false; - - /** - * Returns true if debug mode is enabled - * - * If debug mode is enabled, sets `display_errors = 1` and `error_reporting = E_ALL` - * - * @return bool True if enabled. - */ - public static function isEnabled() { - static $firstCall = true; // Initialized on first call - - if($firstCall && file_exists(PATH_ROOT . 'DEBUG')) { - - $debug_whitelist = trim(file_get_contents(PATH_ROOT . 'DEBUG')); - - self::$enabled = empty($debug_whitelist) || in_array($_SERVER['REMOTE_ADDR'], - explode("\n", str_replace("\r", '', $debug_whitelist) - ) - ); - - if(self::$enabled) { - ini_set('display_errors', '1'); - error_reporting(E_ALL); - - self::$secure = !empty($debug_whitelist); - } - - $firstCall = false; // Skip check on next call - - } - - return self::$enabled; - } - - /** - * Returns true if debug mode is enabled only for specific IP addresses. - * - * Notice: The security flag is set by {@see Debug::isEnabled()}. If this - * function is called before {@see Debug::isEnabled()}, the default value is - * false! - * - * @return bool True if debug mode is secure - */ - public static function isSecure() { - return self::$secure; - } - - /** - * Adds a debug message to error_log if debug mode is enabled - * - * @param string $text The message to add to error_log - */ - public static function log($text) { - if(!self::isEnabled()) { - return; - } - - $backtrace = debug_backtrace(DEBUG_BACKTRACE_IGNORE_ARGS, 3); - $calling = end($backtrace); - $message = $calling['file'] . ':' - . $calling['line'] . ' class ' - . (isset($calling['class']) ? $calling['class'] : '<no-class>') . '->' - . $calling['function'] . ' - ' - . $text; - - error_log($message); - } +class Debug +{ + /** + * Indicates if debug mode is enabled. + * + * Do not access this property directly! + * Use {@see Debug::isEnabled()} instead. + * + * @var bool + */ + private static $enabled = false; + + /** + * Indicates if debug mode is secure. + * + * Do not access this property directly! + * Use {@see Debug::isSecure()} instead. + * + * @var bool + */ + private static $secure = false; + + /** + * Returns true if debug mode is enabled + * + * If debug mode is enabled, sets `display_errors = 1` and `error_reporting = E_ALL` + * + * @return bool True if enabled. + */ + public static function isEnabled() + { + static $firstCall = true; // Initialized on first call + + if ($firstCall && file_exists(PATH_ROOT . 'DEBUG')) { + $debug_whitelist = trim(file_get_contents(PATH_ROOT . 'DEBUG')); + + self::$enabled = empty($debug_whitelist) || in_array( + $_SERVER['REMOTE_ADDR'], + explode("\n", str_replace("\r", '', $debug_whitelist)) + ); + + if (self::$enabled) { + ini_set('display_errors', '1'); + error_reporting(E_ALL); + + self::$secure = !empty($debug_whitelist); + } + + $firstCall = false; // Skip check on next call + } + + return self::$enabled; + } + + /** + * Returns true if debug mode is enabled only for specific IP addresses. + * + * Notice: The security flag is set by {@see Debug::isEnabled()}. If this + * function is called before {@see Debug::isEnabled()}, the default value is + * false! + * + * @return bool True if debug mode is secure + */ + public static function isSecure() + { + return self::$secure; + } + + /** + * Adds a debug message to error_log if debug mode is enabled + * + * @param string $text The message to add to error_log + */ + public static function log($text) + { + if (!self::isEnabled()) { + return; + } + + $backtrace = debug_backtrace(DEBUG_BACKTRACE_IGNORE_ARGS, 3); + $calling = end($backtrace); + $message = $calling['file'] . ':' + . $calling['line'] . ' class ' + . (isset($calling['class']) ? $calling['class'] : '<no-class>') . '->' + . $calling['function'] . ' - ' + . $text; + + error_log($message); + } } diff --git a/lib/Exceptions.php b/lib/Exceptions.php index a9d2365b..8cd42de5 100644 --- a/lib/Exceptions.php +++ b/lib/Exceptions.php @@ -1,4 +1,5 @@ <?php + /** * This file is part of RSS-Bridge, a PHP project capable of generating RSS and * Atom feeds for websites that don't have one. @@ -6,18 +7,19 @@ * For the full license information, please view the UNLICENSE file distributed * with this source code. * - * @package Core - * @license http://unlicense.org/ UNLICENSE - * @link https://github.com/rss-bridge/rss-bridge + * @package Core + * @license http://unlicense.org/ UNLICENSE + * @link https://github.com/rss-bridge/rss-bridge */ /** * Builds a GitHub search query to find open bugs for the current bridge */ -function buildGitHubSearchQuery($bridgeName){ - return REPOSITORY - . 'issues?q=' - . urlencode('is:issue is:open ' . $bridgeName); +function buildGitHubSearchQuery($bridgeName) +{ + return REPOSITORY + . 'issues?q=' + . urlencode('is:issue is:open ' . $bridgeName); } /** @@ -33,86 +35,87 @@ function buildGitHubSearchQuery($bridgeName){ * * @todo This function belongs inside a class */ -function buildGitHubIssueQuery($title, $body, $labels = null, $maintainer = null){ - if(!isset($title) || !isset($body) || empty($title) || empty($body)) { - return null; - } - - // Add title and body - $uri = REPOSITORY - . 'issues/new?title=' - . urlencode($title) - . '&body=' - . urlencode($body); - - // Add labels - if(!is_null($labels) && is_array($labels) && count($labels) > 0) { - if(count($lables) === 1) { - $uri .= '&labels=' . urlencode($labels[0]); - } else { - foreach($labels as $label) { - $uri .= '&labels[]=' . urlencode($label); - } - } - } elseif(!is_null($labels) && is_string($labels)) { - $uri .= '&labels=' . urlencode($labels); - } - - // Add maintainer - if(!empty($maintainer)) { - $uri .= '&assignee=' . urlencode($maintainer); - } - - return $uri; +function buildGitHubIssueQuery($title, $body, $labels = null, $maintainer = null) +{ + if (!isset($title) || !isset($body) || empty($title) || empty($body)) { + return null; + } + + // Add title and body + $uri = REPOSITORY + . 'issues/new?title=' + . urlencode($title) + . '&body=' + . urlencode($body); + + // Add labels + if (!is_null($labels) && is_array($labels) && count($labels) > 0) { + if (count($lables) === 1) { + $uri .= '&labels=' . urlencode($labels[0]); + } else { + foreach ($labels as $label) { + $uri .= '&labels[]=' . urlencode($label); + } + } + } elseif (!is_null($labels) && is_string($labels)) { + $uri .= '&labels=' . urlencode($labels); + } + + // Add maintainer + if (!empty($maintainer)) { + $uri .= '&assignee=' . urlencode($maintainer); + } + + return $uri; } function buildBridgeException(\Throwable $e, BridgeInterface $bridge): string { - $title = $bridge->getName() . ' failed with error ' . $e->getCode(); - - // Build a GitHub compatible message - $body = 'Error message: `' - . $e->getMessage() - . "`\nQuery string: `" - . (isset($_SERVER['QUERY_STRING']) ? $_SERVER['QUERY_STRING'] : '') - . "`\nVersion: `" - . Configuration::getVersion() - . '`'; - - $body_html = nl2br($body); - $link = buildGitHubIssueQuery($title, $body, 'Bridge-Broken', $bridge->getMaintainer()); - $searchQuery = buildGitHubSearchQuery($bridge::NAME); - - $header = buildHeader($e, $bridge); - $message = <<<EOD + $title = $bridge->getName() . ' failed with error ' . $e->getCode(); + + // Build a GitHub compatible message + $body = 'Error message: `' + . $e->getMessage() + . "`\nQuery string: `" + . (isset($_SERVER['QUERY_STRING']) ? $_SERVER['QUERY_STRING'] : '') + . "`\nVersion: `" + . Configuration::getVersion() + . '`'; + + $body_html = nl2br($body); + $link = buildGitHubIssueQuery($title, $body, 'Bridge-Broken', $bridge->getMaintainer()); + $searchQuery = buildGitHubSearchQuery($bridge::NAME); + + $header = buildHeader($e, $bridge); + $message = <<<EOD <strong>{$bridge->getName()}</strong> was unable to receive or process the remote website's content!<br> {$body_html} EOD; - $section = buildSection($e, $bridge, $message, $link, $searchQuery); + $section = buildSection($e, $bridge, $message, $link, $searchQuery); - return $section; + return $section; } function buildTransformException(\Throwable $e, BridgeInterface $bridge): string { - $title = $bridge->getName() . ' failed with error ' . $e->getCode(); - - // Build a GitHub compatible message - $body = 'Error message: `' - . $e->getMessage() - . "`\nQuery string: `" - . (isset($_SERVER['QUERY_STRING']) ? $_SERVER['QUERY_STRING'] : '') - . '`'; - - $link = buildGitHubIssueQuery($title, $body, 'Bridge-Broken', $bridge->getMaintainer()); - $searchQuery = buildGitHubSearchQuery($bridge::NAME); - $header = buildHeader($e, $bridge); - $message = "RSS-Bridge was unable to transform the contents returned by + $title = $bridge->getName() . ' failed with error ' . $e->getCode(); + + // Build a GitHub compatible message + $body = 'Error message: `' + . $e->getMessage() + . "`\nQuery string: `" + . (isset($_SERVER['QUERY_STRING']) ? $_SERVER['QUERY_STRING'] : '') + . '`'; + + $link = buildGitHubIssueQuery($title, $body, 'Bridge-Broken', $bridge->getMaintainer()); + $searchQuery = buildGitHubSearchQuery($bridge::NAME); + $header = buildHeader($e, $bridge); + $message = "RSS-Bridge was unable to transform the contents returned by <strong>{$bridge->getName()}</strong>!"; - $section = buildSection($e, $bridge, $message, $link, $searchQuery); + $section = buildSection($e, $bridge, $message, $link, $searchQuery); - return buildPage($title, $header, $section); + return buildPage($title, $header, $section); } /** @@ -124,8 +127,9 @@ function buildTransformException(\Throwable $e, BridgeInterface $bridge): string * * @todo This function belongs inside a class */ -function buildHeader($e, $bridge){ - return <<<EOD +function buildHeader($e, $bridge) +{ + return <<<EOD <header> <h1>Error {$e->getCode()}</h1> <h2>{$e->getMessage()}</h2> @@ -146,8 +150,9 @@ EOD; * * @todo This function belongs inside a class */ -function buildSection($e, $bridge, $message, $link, $searchQuery){ - return <<<EOD +function buildSection($e, $bridge, $message, $link, $searchQuery) +{ + return <<<EOD <section> <p class="exception-message">{$message}</p> <div class="advice"> @@ -178,8 +183,9 @@ EOD; * * @todo This function belongs inside a class */ -function buildPage($title, $header, $section){ - return <<<EOD +function buildPage($title, $header, $section) +{ + return <<<EOD <!DOCTYPE html> <html lang="en"> <head> diff --git a/lib/FactoryAbstract.php b/lib/FactoryAbstract.php index c91ae2e0..53ffb839 100644 --- a/lib/FactoryAbstract.php +++ b/lib/FactoryAbstract.php @@ -1,4 +1,5 @@ <?php + /** * This file is part of RSS-Bridge, a PHP project capable of generating RSS and * Atom feeds for websites that don't have one. @@ -6,65 +7,67 @@ * For the full license information, please view the UNLICENSE file distributed * with this source code. * - * @package Core - * @license http://unlicense.org/ UNLICENSE - * @link https://github.com/rss-bridge/rss-bridge + * @package Core + * @license http://unlicense.org/ UNLICENSE + * @link https://github.com/rss-bridge/rss-bridge */ /** * Abstract class for factories. */ -abstract class FactoryAbstract { - - /** - * Holds the working directory - * - * @var string - */ - private $workingDir = null; +abstract class FactoryAbstract +{ + /** + * Holds the working directory + * + * @var string + */ + private $workingDir = null; - /** - * Set the working directory. - * - * @param string $dir The working directory. - * @return void - */ - public function setWorkingDir($dir) { - $this->workingDir = null; + /** + * Set the working directory. + * + * @param string $dir The working directory. + * @return void + */ + public function setWorkingDir($dir) + { + $this->workingDir = null; - if(!is_string($dir)) { - throw new \InvalidArgumentException('Working directory must be a string!'); - } + if (!is_string($dir)) { + throw new \InvalidArgumentException('Working directory must be a string!'); + } - if(!file_exists($dir)) { - throw new \Exception('Working directory does not exist!'); - } + if (!file_exists($dir)) { + throw new \Exception('Working directory does not exist!'); + } - if(!is_dir($dir)) { - throw new \InvalidArgumentException($dir . ' is not a directory!'); - } + if (!is_dir($dir)) { + throw new \InvalidArgumentException($dir . ' is not a directory!'); + } - $this->workingDir = realpath($dir) . '/'; - } + $this->workingDir = realpath($dir) . '/'; + } - /** - * Get the working directory - * - * @return string The working directory. - */ - public function getWorkingDir() { - if(is_null($this->workingDir)) { - throw new \LogicException('Working directory is not set!'); - } + /** + * Get the working directory + * + * @return string The working directory. + */ + public function getWorkingDir() + { + if (is_null($this->workingDir)) { + throw new \LogicException('Working directory is not set!'); + } - return $this->workingDir; - } + return $this->workingDir; + } - /** - * Creates a new instance for the object specified by name. - * - * @param string $name The name of the object to create. - * @return object The object instance - */ - abstract public function create($name); + /** + * Creates a new instance for the object specified by name. + * + * @param string $name The name of the object to create. + * @return object The object instance + */ + abstract public function create($name); } diff --git a/lib/FeedExpander.php b/lib/FeedExpander.php index b84c608a..b79bf3a8 100644 --- a/lib/FeedExpander.php +++ b/lib/FeedExpander.php @@ -1,4 +1,5 @@ <?php + /** * This file is part of RSS-Bridge, a PHP project capable of generating RSS and * Atom feeds for websites that don't have one. @@ -6,9 +7,9 @@ * For the full license information, please view the UNLICENSE file distributed * with this source code. * - * @package Core - * @license http://unlicense.org/ UNLICENSE - * @link https://github.com/rss-bridge/rss-bridge + * @package Core + * @license http://unlicense.org/ UNLICENSE + * @link https://github.com/rss-bridge/rss-bridge */ /** @@ -32,406 +33,452 @@ * @todo The parsing functions should all be private. This class is complicated * enough without having to consider children overriding functions. */ -abstract class FeedExpander extends BridgeAbstract { - - /** Indicates an RSS 1.0 feed */ - const FEED_TYPE_RSS_1_0 = 'RSS_1_0'; - - /** Indicates an RSS 2.0 feed */ - const FEED_TYPE_RSS_2_0 = 'RSS_2_0'; - - /** Indicates an Atom 1.0 feed */ - const FEED_TYPE_ATOM_1_0 = 'ATOM_1_0'; - - /** - * Holds the title of the current feed - * - * @var string - */ - private $title; - - /** - * Holds the URI of the feed - * - * @var string - */ - private $uri; - - /** - * Holds the icon of the feed - * - */ - private $icon; - - /** - * Holds the feed type during internal operations. - * - * @var string - */ - private $feedType; - - /** - * Collects data from an existing feed. - * - * Children should call this function in {@see BridgeInterface::collectData()} - * to extract a feed. - * - * @param string $url URL to the feed. - * @param int $maxItems Maximum number of items to collect from the feed - * (`-1`: no limit). - * @return self - */ - public function collectExpandableDatas($url, $maxItems = -1){ - if(empty($url)) { - returnServerError('There is no $url for this RSS expander'); - } - - Debug::log('Loading from ' . $url); - - /* Notice we do not use cache here on purpose: - * we want a fresh view of the RSS stream each time - */ - - $mimeTypes = [ - MrssFormat::MIME_TYPE, - AtomFormat::MIME_TYPE, - '*/*', - ]; - $httpHeaders = ['Accept: ' . implode(', ', $mimeTypes)]; - $content = getContents($url, $httpHeaders) - or returnServerError('Could not request ' . $url); - $rssContent = simplexml_load_string(trim($content)); - - if ($rssContent === false) { - throw new \Exception('Unable to parse string as xml'); - } - - Debug::log('Detecting feed format/version'); - switch(true) { - case isset($rssContent->item[0]): - Debug::log('Detected RSS 1.0 format'); - $this->feedType = self::FEED_TYPE_RSS_1_0; - $this->collectRss1($rssContent, $maxItems); - break; - case isset($rssContent->channel[0]): - Debug::log('Detected RSS 0.9x or 2.0 format'); - $this->feedType = self::FEED_TYPE_RSS_2_0; - $this->collectRss2($rssContent, $maxItems); - break; - case isset($rssContent->entry[0]): - Debug::log('Detected ATOM format'); - $this->feedType = self::FEED_TYPE_ATOM_1_0; - $this->collectAtom1($rssContent, $maxItems); - break; - default: - Debug::log('Unknown feed format/version'); - returnServerError('The feed format is unknown!'); - break; - } - - return $this; - } - - /** - * Collect data from a RSS 1.0 compatible feed - * - * @link http://web.resource.org/rss/1.0/spec RDF Site Summary (RSS) 1.0 - * - * @param string $rssContent The RSS content - * @param int $maxItems Maximum number of items to collect from the feed - * (`-1`: no limit). - * @return void - * - * @todo Instead of passing $maxItems to all functions, just add all items - * and remove excessive items later. - */ - protected function collectRss1($rssContent, $maxItems){ - $this->loadRss2Data($rssContent->channel[0]); - foreach($rssContent->item as $item) { - Debug::log('parsing item ' . var_export($item, true)); - $tmp_item = $this->parseItem($item); - if (!empty($tmp_item)) { - $this->items[] = $tmp_item; - } - if($maxItems !== -1 && count($this->items) >= $maxItems) break; - } - } - - /** - * Collect data from a RSS 2.0 compatible feed - * - * @link http://www.rssboard.org/rss-specification RSS 2.0 Specification - * - * @param object $rssContent The RSS content - * @param int $maxItems Maximum number of items to collect from the feed - * (`-1`: no limit). - * @return void - * - * @todo Instead of passing $maxItems to all functions, just add all items - * and remove excessive items later. - */ - protected function collectRss2($rssContent, $maxItems){ - $rssContent = $rssContent->channel[0]; - Debug::log('RSS content is ===========\n' - . var_export($rssContent, true) - . '==========='); - - $this->loadRss2Data($rssContent); - foreach($rssContent->item as $item) { - Debug::log('parsing item ' . var_export($item, true)); - $tmp_item = $this->parseItem($item); - if (!empty($tmp_item)) { - $this->items[] = $tmp_item; - } - if($maxItems !== -1 && count($this->items) >= $maxItems) break; - } - } - - /** - * Collect data from a Atom 1.0 compatible feed - * - * @link https://tools.ietf.org/html/rfc4287 The Atom Syndication Format - * - * @param object $content The Atom content - * @param int $maxItems Maximum number of items to collect from the feed - * (`-1`: no limit). - * @return void - * - * @todo Instead of passing $maxItems to all functions, just add all items - * and remove excessive items later. - */ - protected function collectAtom1($content, $maxItems){ - $this->loadAtomData($content); - foreach($content->entry as $item) { - Debug::log('parsing item ' . var_export($item, true)); - $tmp_item = $this->parseItem($item); - if (!empty($tmp_item)) { - $this->items[] = $tmp_item; - } - if($maxItems !== -1 && count($this->items) >= $maxItems) break; - } - } - - /** - * Load RSS 2.0 feed data into RSS-Bridge - * - * @param object $rssContent The RSS content - * @return void - * - * @todo set title, link, description, language, and so on - */ - protected function loadRss2Data($rssContent){ - $this->title = trim((string)$rssContent->title); - $this->uri = trim((string)$rssContent->link); - - if (!empty($rssContent->image)) { - $this->icon = trim((string)$rssContent->image->url); - } - } - - /** - * Load Atom feed data into RSS-Bridge - * - * @param object $content The Atom content - * @return void - */ - protected function loadAtomData($content){ - $this->title = (string)$content->title; - - // Find best link (only one, or first of 'alternate') - if(!isset($content->link)) { - $this->uri = ''; - } elseif (count($content->link) === 1) { - $this->uri = (string)$content->link[0]['href']; - } else { - $this->uri = ''; - foreach($content->link as $link) { - if(strtolower($link['rel']) === 'alternate') { - $this->uri = (string)$link['href']; - break; - } - } - } - - if(!empty($content->icon)) { - $this->icon = (string)$content->icon; - } elseif(!empty($content->logo)) { - $this->icon = (string)$content->logo; - } - } - - /** - * Parse the contents of a single Atom feed item into a RSS-Bridge item for - * further transformation. - * - * @param object $feedItem A single feed item - * @return object The RSS-Bridge item - * - * @todo To reduce confusion, the RSS-Bridge item should maybe have a class - * of its own? - */ - protected function parseATOMItem($feedItem){ - // Some ATOM entries also contain RSS 2.0 fields - $item = $this->parseRss2Item($feedItem); - - if(isset($feedItem->id)) $item['uri'] = (string)$feedItem->id; - if(isset($feedItem->title)) $item['title'] = (string)$feedItem->title; - if(isset($feedItem->updated)) $item['timestamp'] = strtotime((string)$feedItem->updated); - if(isset($feedItem->author)) $item['author'] = (string)$feedItem->author->name; - if(isset($feedItem->content)) $item['content'] = (string)$feedItem->content; - - //When "link" field is present, URL is more reliable than "id" field - if (count($feedItem->link) === 1) { - $item['uri'] = (string)$feedItem->link[0]['href']; - } else { - foreach($feedItem->link as $link) { - if(strtolower($link['rel']) === 'alternate') { - $item['uri'] = (string)$link['href']; - } - if(strtolower($link['rel']) === 'enclosure') { - $item['enclosures'][] = (string)$link['href']; - } - } - } - - return $item; - } - - /** - * Parse the contents of a single RSS 0.91 feed item into a RSS-Bridge item - * for further transformation. - * - * @param object $feedItem A single feed item - * @return object The RSS-Bridge item - * - * @todo To reduce confusion, the RSS-Bridge item should maybe have a class - * of its own? - */ - protected function parseRss091Item($feedItem){ - $item = array(); - if(isset($feedItem->link)) $item['uri'] = (string)$feedItem->link; - if(isset($feedItem->title)) $item['title'] = (string)$feedItem->title; - // rss 0.91 doesn't support timestamps - // rss 0.91 doesn't support authors - // rss 0.91 doesn't support enclosures - if(isset($feedItem->description)) $item['content'] = (string)$feedItem->description; - return $item; - } - - /** - * Parse the contents of a single RSS 1.0 feed item into a RSS-Bridge item - * for further transformation. - * - * @param object $feedItem A single feed item - * @return object The RSS-Bridge item - * - * @todo To reduce confusion, the RSS-Bridge item should maybe have a class - * of its own? - */ - protected function parseRss1Item($feedItem){ - // 1.0 adds optional elements around the 0.91 standard - $item = $this->parseRss091Item($feedItem); - - $namespaces = $feedItem->getNamespaces(true); - if(isset($namespaces['dc'])) { - $dc = $feedItem->children($namespaces['dc']); - if(isset($dc->date)) $item['timestamp'] = strtotime((string)$dc->date); - if(isset($dc->creator)) $item['author'] = (string)$dc->creator; - } - - return $item; - } - - /** - * Parse the contents of a single RSS 2.0 feed item into a RSS-Bridge item - * for further transformation. - * - * @param object $feedItem A single feed item - * @return object The RSS-Bridge item - * - * @todo To reduce confusion, the RSS-Bridge item should maybe have a class - * of its own? - */ - protected function parseRss2Item($feedItem){ - // Primary data is compatible to 0.91 with some additional data - $item = $this->parseRss091Item($feedItem); - - $namespaces = $feedItem->getNamespaces(true); - if(isset($namespaces['dc'])) $dc = $feedItem->children($namespaces['dc']); - if(isset($namespaces['media'])) $media = $feedItem->children($namespaces['media']); - - if(isset($feedItem->guid)) { - foreach($feedItem->guid->attributes() as $attribute => $value) { - if($attribute === 'isPermaLink' - && ($value === 'true' || ( - filter_var($feedItem->guid, FILTER_VALIDATE_URL) - && (empty($item['uri']) || !filter_var($item['uri'], FILTER_VALIDATE_URL)) - ) - ) - ) { - $item['uri'] = (string)$feedItem->guid; - break; - } - } - } - - if(isset($feedItem->pubDate)) { - $item['timestamp'] = strtotime((string)$feedItem->pubDate); - } elseif(isset($dc->date)) { - $item['timestamp'] = strtotime((string)$dc->date); - } - - if(isset($feedItem->author)) { - $item['author'] = (string)$feedItem->author; - } elseif (isset($feedItem->creator)) { - $item['author'] = (string)$feedItem->creator; - } elseif(isset($dc->creator)) { - $item['author'] = (string)$dc->creator; - } elseif(isset($media->credit)) { - $item['author'] = (string)$media->credit; - } - - if(isset($feedItem->enclosure) && !empty($feedItem->enclosure['url'])) { - $item['enclosures'] = array((string)$feedItem->enclosure['url']); - } - - return $item; - } - - /** - * Parse the contents of a single feed item, depending on the current feed - * type, into a RSS-Bridge item. - * - * @param object $item The current feed item - * @return object A RSS-Bridge item, with (hopefully) the whole content - */ - protected function parseItem($item){ - switch($this->feedType) { - case self::FEED_TYPE_RSS_1_0: - return $this->parseRss1Item($item); - break; - case self::FEED_TYPE_RSS_2_0: - return $this->parseRss2Item($item); - break; - case self::FEED_TYPE_ATOM_1_0: - return $this->parseATOMItem($item); - break; - default: returnClientError('Unknown version ' . $this->getInput('version') . '!'); - } - } - - /** {@inheritdoc} */ - public function getURI(){ - return !empty($this->uri) ? $this->uri : parent::getURI(); - } - - /** {@inheritdoc} */ - public function getName(){ - return !empty($this->title) ? $this->title : parent::getName(); - } - - /** {@inheritdoc} */ - public function getIcon(){ - return !empty($this->icon) ? $this->icon : parent::getIcon(); - } +abstract class FeedExpander extends BridgeAbstract +{ + /** Indicates an RSS 1.0 feed */ + const FEED_TYPE_RSS_1_0 = 'RSS_1_0'; + + /** Indicates an RSS 2.0 feed */ + const FEED_TYPE_RSS_2_0 = 'RSS_2_0'; + + /** Indicates an Atom 1.0 feed */ + const FEED_TYPE_ATOM_1_0 = 'ATOM_1_0'; + + /** + * Holds the title of the current feed + * + * @var string + */ + private $title; + + /** + * Holds the URI of the feed + * + * @var string + */ + private $uri; + + /** + * Holds the icon of the feed + * + */ + private $icon; + + /** + * Holds the feed type during internal operations. + * + * @var string + */ + private $feedType; + + /** + * Collects data from an existing feed. + * + * Children should call this function in {@see BridgeInterface::collectData()} + * to extract a feed. + * + * @param string $url URL to the feed. + * @param int $maxItems Maximum number of items to collect from the feed + * (`-1`: no limit). + * @return self + */ + public function collectExpandableDatas($url, $maxItems = -1) + { + if (empty($url)) { + returnServerError('There is no $url for this RSS expander'); + } + + Debug::log('Loading from ' . $url); + + /* Notice we do not use cache here on purpose: + * we want a fresh view of the RSS stream each time + */ + + $mimeTypes = [ + MrssFormat::MIME_TYPE, + AtomFormat::MIME_TYPE, + '*/*', + ]; + $httpHeaders = ['Accept: ' . implode(', ', $mimeTypes)]; + $content = getContents($url, $httpHeaders) + or returnServerError('Could not request ' . $url); + $rssContent = simplexml_load_string(trim($content)); + + if ($rssContent === false) { + throw new \Exception('Unable to parse string as xml'); + } + + Debug::log('Detecting feed format/version'); + switch (true) { + case isset($rssContent->item[0]): + Debug::log('Detected RSS 1.0 format'); + $this->feedType = self::FEED_TYPE_RSS_1_0; + $this->collectRss1($rssContent, $maxItems); + break; + case isset($rssContent->channel[0]): + Debug::log('Detected RSS 0.9x or 2.0 format'); + $this->feedType = self::FEED_TYPE_RSS_2_0; + $this->collectRss2($rssContent, $maxItems); + break; + case isset($rssContent->entry[0]): + Debug::log('Detected ATOM format'); + $this->feedType = self::FEED_TYPE_ATOM_1_0; + $this->collectAtom1($rssContent, $maxItems); + break; + default: + Debug::log('Unknown feed format/version'); + returnServerError('The feed format is unknown!'); + break; + } + + return $this; + } + + /** + * Collect data from a RSS 1.0 compatible feed + * + * @link http://web.resource.org/rss/1.0/spec RDF Site Summary (RSS) 1.0 + * + * @param string $rssContent The RSS content + * @param int $maxItems Maximum number of items to collect from the feed + * (`-1`: no limit). + * @return void + * + * @todo Instead of passing $maxItems to all functions, just add all items + * and remove excessive items later. + */ + protected function collectRss1($rssContent, $maxItems) + { + $this->loadRss2Data($rssContent->channel[0]); + foreach ($rssContent->item as $item) { + Debug::log('parsing item ' . var_export($item, true)); + $tmp_item = $this->parseItem($item); + if (!empty($tmp_item)) { + $this->items[] = $tmp_item; + } + if ($maxItems !== -1 && count($this->items) >= $maxItems) { + break; + } + } + } + + /** + * Collect data from a RSS 2.0 compatible feed + * + * @link http://www.rssboard.org/rss-specification RSS 2.0 Specification + * + * @param object $rssContent The RSS content + * @param int $maxItems Maximum number of items to collect from the feed + * (`-1`: no limit). + * @return void + * + * @todo Instead of passing $maxItems to all functions, just add all items + * and remove excessive items later. + */ + protected function collectRss2($rssContent, $maxItems) + { + $rssContent = $rssContent->channel[0]; + Debug::log('RSS content is ===========\n' + . var_export($rssContent, true) + . '==========='); + + $this->loadRss2Data($rssContent); + foreach ($rssContent->item as $item) { + Debug::log('parsing item ' . var_export($item, true)); + $tmp_item = $this->parseItem($item); + if (!empty($tmp_item)) { + $this->items[] = $tmp_item; + } + if ($maxItems !== -1 && count($this->items) >= $maxItems) { + break; + } + } + } + + /** + * Collect data from a Atom 1.0 compatible feed + * + * @link https://tools.ietf.org/html/rfc4287 The Atom Syndication Format + * + * @param object $content The Atom content + * @param int $maxItems Maximum number of items to collect from the feed + * (`-1`: no limit). + * @return void + * + * @todo Instead of passing $maxItems to all functions, just add all items + * and remove excessive items later. + */ + protected function collectAtom1($content, $maxItems) + { + $this->loadAtomData($content); + foreach ($content->entry as $item) { + Debug::log('parsing item ' . var_export($item, true)); + $tmp_item = $this->parseItem($item); + if (!empty($tmp_item)) { + $this->items[] = $tmp_item; + } + if ($maxItems !== -1 && count($this->items) >= $maxItems) { + break; + } + } + } + + /** + * Load RSS 2.0 feed data into RSS-Bridge + * + * @param object $rssContent The RSS content + * @return void + * + * @todo set title, link, description, language, and so on + */ + protected function loadRss2Data($rssContent) + { + $this->title = trim((string)$rssContent->title); + $this->uri = trim((string)$rssContent->link); + + if (!empty($rssContent->image)) { + $this->icon = trim((string)$rssContent->image->url); + } + } + + /** + * Load Atom feed data into RSS-Bridge + * + * @param object $content The Atom content + * @return void + */ + protected function loadAtomData($content) + { + $this->title = (string)$content->title; + + // Find best link (only one, or first of 'alternate') + if (!isset($content->link)) { + $this->uri = ''; + } elseif (count($content->link) === 1) { + $this->uri = (string)$content->link[0]['href']; + } else { + $this->uri = ''; + foreach ($content->link as $link) { + if (strtolower($link['rel']) === 'alternate') { + $this->uri = (string)$link['href']; + break; + } + } + } + + if (!empty($content->icon)) { + $this->icon = (string)$content->icon; + } elseif (!empty($content->logo)) { + $this->icon = (string)$content->logo; + } + } + + /** + * Parse the contents of a single Atom feed item into a RSS-Bridge item for + * further transformation. + * + * @param object $feedItem A single feed item + * @return object The RSS-Bridge item + * + * @todo To reduce confusion, the RSS-Bridge item should maybe have a class + * of its own? + */ + protected function parseATOMItem($feedItem) + { + // Some ATOM entries also contain RSS 2.0 fields + $item = $this->parseRss2Item($feedItem); + + if (isset($feedItem->id)) { + $item['uri'] = (string)$feedItem->id; + } + if (isset($feedItem->title)) { + $item['title'] = (string)$feedItem->title; + } + if (isset($feedItem->updated)) { + $item['timestamp'] = strtotime((string)$feedItem->updated); + } + if (isset($feedItem->author)) { + $item['author'] = (string)$feedItem->author->name; + } + if (isset($feedItem->content)) { + $item['content'] = (string)$feedItem->content; + } + + //When "link" field is present, URL is more reliable than "id" field + if (count($feedItem->link) === 1) { + $item['uri'] = (string)$feedItem->link[0]['href']; + } else { + foreach ($feedItem->link as $link) { + if (strtolower($link['rel']) === 'alternate') { + $item['uri'] = (string)$link['href']; + } + if (strtolower($link['rel']) === 'enclosure') { + $item['enclosures'][] = (string)$link['href']; + } + } + } + + return $item; + } + + /** + * Parse the contents of a single RSS 0.91 feed item into a RSS-Bridge item + * for further transformation. + * + * @param object $feedItem A single feed item + * @return object The RSS-Bridge item + * + * @todo To reduce confusion, the RSS-Bridge item should maybe have a class + * of its own? + */ + protected function parseRss091Item($feedItem) + { + $item = []; + if (isset($feedItem->link)) { + $item['uri'] = (string)$feedItem->link; + } + if (isset($feedItem->title)) { + $item['title'] = (string)$feedItem->title; + } + // rss 0.91 doesn't support timestamps + // rss 0.91 doesn't support authors + // rss 0.91 doesn't support enclosures + if (isset($feedItem->description)) { + $item['content'] = (string)$feedItem->description; + } + return $item; + } + + /** + * Parse the contents of a single RSS 1.0 feed item into a RSS-Bridge item + * for further transformation. + * + * @param object $feedItem A single feed item + * @return object The RSS-Bridge item + * + * @todo To reduce confusion, the RSS-Bridge item should maybe have a class + * of its own? + */ + protected function parseRss1Item($feedItem) + { + // 1.0 adds optional elements around the 0.91 standard + $item = $this->parseRss091Item($feedItem); + + $namespaces = $feedItem->getNamespaces(true); + if (isset($namespaces['dc'])) { + $dc = $feedItem->children($namespaces['dc']); + if (isset($dc->date)) { + $item['timestamp'] = strtotime((string)$dc->date); + } + if (isset($dc->creator)) { + $item['author'] = (string)$dc->creator; + } + } + + return $item; + } + + /** + * Parse the contents of a single RSS 2.0 feed item into a RSS-Bridge item + * for further transformation. + * + * @param object $feedItem A single feed item + * @return object The RSS-Bridge item + * + * @todo To reduce confusion, the RSS-Bridge item should maybe have a class + * of its own? + */ + protected function parseRss2Item($feedItem) + { + // Primary data is compatible to 0.91 with some additional data + $item = $this->parseRss091Item($feedItem); + + $namespaces = $feedItem->getNamespaces(true); + if (isset($namespaces['dc'])) { + $dc = $feedItem->children($namespaces['dc']); + } + if (isset($namespaces['media'])) { + $media = $feedItem->children($namespaces['media']); + } + + if (isset($feedItem->guid)) { + foreach ($feedItem->guid->attributes() as $attribute => $value) { + if ( + $attribute === 'isPermaLink' + && ($value === 'true' || ( + filter_var($feedItem->guid, FILTER_VALIDATE_URL) + && (empty($item['uri']) || !filter_var($item['uri'], FILTER_VALIDATE_URL)) + ) + ) + ) { + $item['uri'] = (string)$feedItem->guid; + break; + } + } + } + + if (isset($feedItem->pubDate)) { + $item['timestamp'] = strtotime((string)$feedItem->pubDate); + } elseif (isset($dc->date)) { + $item['timestamp'] = strtotime((string)$dc->date); + } + + if (isset($feedItem->author)) { + $item['author'] = (string)$feedItem->author; + } elseif (isset($feedItem->creator)) { + $item['author'] = (string)$feedItem->creator; + } elseif (isset($dc->creator)) { + $item['author'] = (string)$dc->creator; + } elseif (isset($media->credit)) { + $item['author'] = (string)$media->credit; + } + + if (isset($feedItem->enclosure) && !empty($feedItem->enclosure['url'])) { + $item['enclosures'] = [(string)$feedItem->enclosure['url']]; + } + + return $item; + } + + /** + * Parse the contents of a single feed item, depending on the current feed + * type, into a RSS-Bridge item. + * + * @param object $item The current feed item + * @return object A RSS-Bridge item, with (hopefully) the whole content + */ + protected function parseItem($item) + { + switch ($this->feedType) { + case self::FEED_TYPE_RSS_1_0: + return $this->parseRss1Item($item); + break; + case self::FEED_TYPE_RSS_2_0: + return $this->parseRss2Item($item); + break; + case self::FEED_TYPE_ATOM_1_0: + return $this->parseATOMItem($item); + break; + default: + returnClientError('Unknown version ' . $this->getInput('version') . '!'); + } + } + + /** {@inheritdoc} */ + public function getURI() + { + return !empty($this->uri) ? $this->uri : parent::getURI(); + } + + /** {@inheritdoc} */ + public function getName() + { + return !empty($this->title) ? $this->title : parent::getName(); + } + + /** {@inheritdoc} */ + public function getIcon() + { + return !empty($this->icon) ? $this->icon : parent::getIcon(); + } } diff --git a/lib/FeedItem.php b/lib/FeedItem.php index 8690eb95..2d3872f2 100644 --- a/lib/FeedItem.php +++ b/lib/FeedItem.php @@ -1,4 +1,5 @@ <?php + /** * This file is part of RSS-Bridge, a PHP project capable of generating RSS and * Atom feeds for websites that don't have one. @@ -6,9 +7,9 @@ * For the full license information, please view the UNLICENSE file distributed * with this source code. * - * @package Core - * @license http://unlicense.org/ UNLICENSE - * @link https://github.com/rss-bridge/rss-bridge + * @package Core + * @license http://unlicense.org/ UNLICENSE + * @link https://github.com/rss-bridge/rss-bridge */ /** @@ -33,493 +34,554 @@ * (i.e. `$feedItem = \FeedItem($item);`). Support for legacy items may be removed * in future versions of RSS-Bridge. */ -class FeedItem { - /** @var string|null URI to the full article */ - protected $uri = null; - - /** @var string|null Title of the item */ - protected $title = null; - - /** @var int|null Timestamp of when the item was first released */ - protected $timestamp = null; - - /** @var string|null Name of the author */ - protected $author = null; - - /** @var string|null Body of the feed */ - protected $content = null; - - /** @var array List of links to media objects */ - protected $enclosures = array(); - - /** @var array List of category names or tags */ - protected $categories = array(); - - /** @var string Unique ID for the current item */ - protected $uid = null; - - /** @var array Associative list of additional parameters */ - protected $misc = array(); // Custom parameters - - /** - * Create object from legacy item. - * - * The provided array must be an associative array of key-value-pairs, where - * keys may correspond to any of the properties of this class. - * - * Example use: - * - * ```PHP - * <?php - * $item = array(); - * - * $item['uri'] = 'https://www.github.com/rss-bridge/rss-bridge/'; - * $item['title'] = 'Title'; - * $item['timestamp'] = strtotime('now'); - * $item['author'] = 'Unknown author'; - * $item['content'] = 'Hello World!'; - * $item['enclosures'] = array('https://github.com/favicon.ico'); - * $item['categories'] = array('php', 'rss-bridge', 'awesome'); - * - * $feedItem = new \FeedItem($item); - * - * ``` - * - * The result of the code above is the same as the code below: - * - * ```PHP - * <?php - * $feedItem = \FeedItem(); - * - * $feedItem->uri = 'https://www.github.com/rss-bridge/rss-bridge/'; - * $feedItem->title = 'Title'; - * $feedItem->timestamp = strtotime('now'); - * $feedItem->autor = 'Unknown author'; - * $feedItem->content = 'Hello World!'; - * $feedItem->enclosures = array('https://github.com/favicon.ico'); - * $feedItem->categories = array('php', 'rss-bridge', 'awesome'); - * ``` - * - * @param array $item (optional) A legacy item (empty: no legacy support). - * @return object A new object of this class - */ - public function __construct($item = array()) { - if(!is_array($item)) - Debug::log('Item must be an array!'); - - foreach($item as $key => $value) { - $this->__set($key, $value); - } - } - - /** - * Get current URI. - * - * Use {@see FeedItem::setURI()} to set the URI. - * - * @return string|null The URI or null if it hasn't been set. - */ - public function getURI() { - return $this->uri; - } - - /** - * Set URI to the full article. - * - * Use {@see FeedItem::getURI()} to get the URI. - * - * _Note_: Removes whitespace from the beginning and end of the URI. - * - * _Remarks_: Uses the attribute "href" or "src" if the provided URI is an - * object of simple_html_dom_node. - * - * @param object|string $uri URI to the full article. - * @return self - */ - public function setURI($uri) { - $this->uri = null; // Clear previous data - - if($uri instanceof simple_html_dom_node) { - if($uri->hasAttribute('href')) { // Anchor - $uri = $uri->href; - } elseif($uri->hasAttribute('src')) { // Image - $uri = $uri->src; - } else { - Debug::log('The item provided as URI is unknown!'); - } - } - - if(!is_string($uri)) { - Debug::log('URI must be a string!'); - } elseif(!filter_var( - $uri, - FILTER_VALIDATE_URL, FILTER_FLAG_PATH_REQUIRED)) { - Debug::log('URI must include a scheme, host and path!'); - } else { - $scheme = parse_url($uri, PHP_URL_SCHEME); - - if($scheme !== 'http' && $scheme !== 'https') { - Debug::log('URI scheme must be "http" or "https"!'); - } else { - $this->uri = trim($uri); - } - } - - return $this; - } - - /** - * Get current title. - * - * Use {@see FeedItem::setTitle()} to set the title. - * - * @return string|null The current title or null if it hasn't been set. - */ - public function getTitle() { - return $this->title; - } - - /** - * Set title. - * - * Use {@see FeedItem::getTitle()} to get the title. - * - * _Note_: Removes whitespace from beginning and end of the title. - * - * @param string $title The title - * @return self - */ - public function setTitle($title) { - $this->title = null; // Clear previous data - - if(!is_string($title)) { - Debug::log('Title must be a string!'); - } else { - $this->title = trim($title); - } - - return $this; - } - - /** - * Get current timestamp. - * - * Use {@see FeedItem::setTimestamp()} to set the timestamp. - * - * @return int|null The current timestamp or null if it hasn't been set. - */ - public function getTimestamp() { - return $this->timestamp; - } - - /** - * Set timestamp of first release. - * - * _Note_: The timestamp should represent the number of seconds since - * January 1 1970 00:00:00 GMT (Unix time). - * - * _Remarks_: If the provided timestamp is a string (not numeric), this - * function automatically attempts to parse the string using - * [strtotime](http://php.net/manual/en/function.strtotime.php) - * - * @link http://php.net/manual/en/function.strtotime.php strtotime (PHP) - * @link https://en.wikipedia.org/wiki/Unix_time Unix time (Wikipedia) - * - * @param string|int $timestamp A timestamp of when the item was first released - * @return self - */ - public function setTimestamp($timestamp) { - $this->timestamp = null; // Clear previous data - - if(!is_numeric($timestamp) - && !$timestamp = strtotime($timestamp)) { - Debug::log('Unable to parse timestamp!'); - } - - if($timestamp <= 0) { - Debug::log('Timestamp must be greater than zero!'); - } else { - $this->timestamp = $timestamp; - } - - return $this; - } - - /** - * Get the current author name. - * - * Use {@see FeedItem::setAuthor()} to set the author. - * - * @return string|null The author or null if it hasn't been set. - */ - public function getAuthor() { - return $this->author; - } - - /** - * Set the author name. - * - * Use {@see FeedItem::getAuthor()} to get the author. - * - * @param string $author The author name. - * @return self - */ - public function setAuthor($author) { - $this->author = null; // Clear previous data - - if(!is_string($author)) { - Debug::log('Author must be a string!'); - } else { - $this->author = $author; - } - - return $this; - } - - /** - * Get item content. - * - * Use {@see FeedItem::setContent()} to set the item content. - * - * @return string|null The item content or null if it hasn't been set. - */ - public function getContent() { - return $this->content; - } - - /** - * Set item content. - * - * Note: This function casts objects of type simple_html_dom and - * simple_html_dom_node to string. - * - * Use {@see FeedItem::getContent()} to get the current item content. - * - * @param string|object $content The item content as text or simple_html_dom - * object. - * @return self - */ - public function setContent($content) { - $this->content = null; // Clear previous data - - if($content instanceof simple_html_dom - || $content instanceof simple_html_dom_node) { - $content = (string)$content; - } - - if(!is_string($content)) { - Debug::log('Content must be a string!'); - } else { - $this->content = $content; - } - - return $this; - } - - /** - * Get item enclosures. - * - * Use {@see FeedItem::setEnclosures()} to set feed enclosures. - * - * @return array Enclosures as array of enclosure URIs. - */ - public function getEnclosures() { - return $this->enclosures; - } - - /** - * Set item enclosures. - * - * Use {@see FeedItem::getEnclosures()} to get the current item enclosures. - * - * @param array $enclosures Array of enclosures, where each element links to - * one enclosure. - * @return self - */ - public function setEnclosures($enclosures) { - $this->enclosures = array(); // Clear previous data - - if(!is_array($enclosures)) { - Debug::log('Enclosures must be an array!'); - } else { - foreach($enclosures as $enclosure) { - if(!filter_var( - $enclosure, - FILTER_VALIDATE_URL, FILTER_FLAG_PATH_REQUIRED)) { - Debug::log('Each enclosure must contain a scheme, host and path!'); - } elseif(!in_array($enclosure, $this->enclosures)) { - $this->enclosures[] = $enclosure; - } - } - } - - return $this; - } - - /** - * Get item categories. - * - * Use {@see FeedItem::setCategories()} to set item categories. - * - * @param array The item categories. - */ - public function getCategories() { - return $this->categories; - } - - /** - * Set item categories. - * - * Use {@see FeedItem::getCategories()} to get the current item categories. - * - * @param array $categories Array of categories, where each element defines - * a single category name. - * @return self - */ - public function setCategories($categories) { - $this->categories = array(); // Clear previous data - - if(!is_array($categories)) { - Debug::log('Categories must be an array!'); - } else { - foreach($categories as $category) { - if(!is_string($category)) { - Debug::log('Category must be a string!'); - } else { - $this->categories[] = $category; - } - } - } - - return $this; - } - - /** - * Get unique id - * - * Use {@see FeedItem::setUid()} to set the unique id. - * - * @param string The unique id. - */ - public function getUid() { - return $this->uid; - } - - /** - * Set unique id. - * - * Use {@see FeedItem::getUid()} to get the unique id. - * - * @param string $uid A string that uniquely identifies the current item - * @return self - */ - public function setUid($uid) { - $this->uid = null; // Clear previous data - - if(!is_string($uid)) { - Debug::log('Unique id must be a string!'); - } elseif (preg_match('/^[a-f0-9]{40}$/', $uid)) { - // keep id if it already is a SHA-1 hash - $this->uid = $uid; - } else { - $this->uid = sha1($uid); - } - - return $this; - } - - /** - * Add miscellaneous elements to the item. - * - * @param string $key Name of the element. - * @param mixed $value Value of the element. - * @return self - */ - public function addMisc($key, $value) { - - if(!is_string($key)) { - Debug::log('Key must be a string!'); - } elseif(in_array($key, get_object_vars($this))) { - Debug::log('Key must be unique!'); - } else { - $this->misc[$key] = $value; - } - - return $this; - } - - /** - * Transform current object to array - * - * @return array - */ - public function toArray() { - return array_merge( - array( - 'uri' => $this->uri, - 'title' => $this->title, - 'timestamp' => $this->timestamp, - 'author' => $this->author, - 'content' => $this->content, - 'enclosures' => $this->enclosures, - 'categories' => $this->categories, - 'uid' => $this->uid, - ), $this->misc - ); - } - - /** - * Set item property - * - * Allows simple assignment to parameters. This method is slower, but easier - * to implement in some cases: - * - * ```PHP - * $item = new \FeedItem(); - * $item->content = 'Hello World!'; - * $item->my_id = 42; - * ``` - * - * @param string $name Property name - * @param mixed $value Property value - */ - public function __set($name, $value) { - switch($name) { - case 'uri': $this->setURI($value); break; - case 'title': $this->setTitle($value); break; - case 'timestamp': $this->setTimestamp($value); break; - case 'author': $this->setAuthor($value); break; - case 'content': $this->setContent($value); break; - case 'enclosures': $this->setEnclosures($value); break; - case 'categories': $this->setCategories($value); break; - case 'uid': $this->setUid($value); break; - default: $this->addMisc($name, $value); - } - } - - /** - * Get item property - * - * Allows simple assignment to parameters. This method is slower, but easier - * to implement in some cases. - * - * @param string $name Property name - * @return mixed Property value - */ - public function __get($name) { - switch($name) { - case 'uri': return $this->getURI(); - case 'title': return $this->getTitle(); - case 'timestamp': return $this->getTimestamp(); - case 'author': return $this->getAuthor(); - case 'content': return $this->getContent(); - case 'enclosures': return $this->getEnclosures(); - case 'categories': return $this->getCategories(); - case 'uid': return $this->getUid(); - default: - if(array_key_exists($name, $this->misc)) - return $this->misc[$name]; - return null; - } - } +class FeedItem +{ + /** @var string|null URI to the full article */ + protected $uri = null; + + /** @var string|null Title of the item */ + protected $title = null; + + /** @var int|null Timestamp of when the item was first released */ + protected $timestamp = null; + + /** @var string|null Name of the author */ + protected $author = null; + + /** @var string|null Body of the feed */ + protected $content = null; + + /** @var array List of links to media objects */ + protected $enclosures = []; + + /** @var array List of category names or tags */ + protected $categories = []; + + /** @var string Unique ID for the current item */ + protected $uid = null; + + /** @var array Associative list of additional parameters */ + protected $misc = []; // Custom parameters + + /** + * Create object from legacy item. + * + * The provided array must be an associative array of key-value-pairs, where + * keys may correspond to any of the properties of this class. + * + * Example use: + * + * ```PHP + * <?php + * $item = array(); + * + * $item['uri'] = 'https://www.github.com/rss-bridge/rss-bridge/'; + * $item['title'] = 'Title'; + * $item['timestamp'] = strtotime('now'); + * $item['author'] = 'Unknown author'; + * $item['content'] = 'Hello World!'; + * $item['enclosures'] = array('https://github.com/favicon.ico'); + * $item['categories'] = array('php', 'rss-bridge', 'awesome'); + * + * $feedItem = new \FeedItem($item); + * + * ``` + * + * The result of the code above is the same as the code below: + * + * ```PHP + * <?php + * $feedItem = \FeedItem(); + * + * $feedItem->uri = 'https://www.github.com/rss-bridge/rss-bridge/'; + * $feedItem->title = 'Title'; + * $feedItem->timestamp = strtotime('now'); + * $feedItem->autor = 'Unknown author'; + * $feedItem->content = 'Hello World!'; + * $feedItem->enclosures = array('https://github.com/favicon.ico'); + * $feedItem->categories = array('php', 'rss-bridge', 'awesome'); + * ``` + * + * @param array $item (optional) A legacy item (empty: no legacy support). + * @return object A new object of this class + */ + public function __construct($item = []) + { + if (!is_array($item)) { + Debug::log('Item must be an array!'); + } + + foreach ($item as $key => $value) { + $this->__set($key, $value); + } + } + + /** + * Get current URI. + * + * Use {@see FeedItem::setURI()} to set the URI. + * + * @return string|null The URI or null if it hasn't been set. + */ + public function getURI() + { + return $this->uri; + } + + /** + * Set URI to the full article. + * + * Use {@see FeedItem::getURI()} to get the URI. + * + * _Note_: Removes whitespace from the beginning and end of the URI. + * + * _Remarks_: Uses the attribute "href" or "src" if the provided URI is an + * object of simple_html_dom_node. + * + * @param object|string $uri URI to the full article. + * @return self + */ + public function setURI($uri) + { + $this->uri = null; // Clear previous data + + if ($uri instanceof simple_html_dom_node) { + if ($uri->hasAttribute('href')) { // Anchor + $uri = $uri->href; + } elseif ($uri->hasAttribute('src')) { // Image + $uri = $uri->src; + } else { + Debug::log('The item provided as URI is unknown!'); + } + } + + if (!is_string($uri)) { + Debug::log('URI must be a string!'); + } elseif ( + !filter_var( + $uri, + FILTER_VALIDATE_URL, + FILTER_FLAG_PATH_REQUIRED + ) + ) { + Debug::log('URI must include a scheme, host and path!'); + } else { + $scheme = parse_url($uri, PHP_URL_SCHEME); + + if ($scheme !== 'http' && $scheme !== 'https') { + Debug::log('URI scheme must be "http" or "https"!'); + } else { + $this->uri = trim($uri); + } + } + + return $this; + } + + /** + * Get current title. + * + * Use {@see FeedItem::setTitle()} to set the title. + * + * @return string|null The current title or null if it hasn't been set. + */ + public function getTitle() + { + return $this->title; + } + + /** + * Set title. + * + * Use {@see FeedItem::getTitle()} to get the title. + * + * _Note_: Removes whitespace from beginning and end of the title. + * + * @param string $title The title + * @return self + */ + public function setTitle($title) + { + $this->title = null; // Clear previous data + + if (!is_string($title)) { + Debug::log('Title must be a string!'); + } else { + $this->title = trim($title); + } + + return $this; + } + + /** + * Get current timestamp. + * + * Use {@see FeedItem::setTimestamp()} to set the timestamp. + * + * @return int|null The current timestamp or null if it hasn't been set. + */ + public function getTimestamp() + { + return $this->timestamp; + } + + /** + * Set timestamp of first release. + * + * _Note_: The timestamp should represent the number of seconds since + * January 1 1970 00:00:00 GMT (Unix time). + * + * _Remarks_: If the provided timestamp is a string (not numeric), this + * function automatically attempts to parse the string using + * [strtotime](http://php.net/manual/en/function.strtotime.php) + * + * @link http://php.net/manual/en/function.strtotime.php strtotime (PHP) + * @link https://en.wikipedia.org/wiki/Unix_time Unix time (Wikipedia) + * + * @param string|int $timestamp A timestamp of when the item was first released + * @return self + */ + public function setTimestamp($timestamp) + { + $this->timestamp = null; // Clear previous data + + if ( + !is_numeric($timestamp) + && !$timestamp = strtotime($timestamp) + ) { + Debug::log('Unable to parse timestamp!'); + } + + if ($timestamp <= 0) { + Debug::log('Timestamp must be greater than zero!'); + } else { + $this->timestamp = $timestamp; + } + + return $this; + } + + /** + * Get the current author name. + * + * Use {@see FeedItem::setAuthor()} to set the author. + * + * @return string|null The author or null if it hasn't been set. + */ + public function getAuthor() + { + return $this->author; + } + + /** + * Set the author name. + * + * Use {@see FeedItem::getAuthor()} to get the author. + * + * @param string $author The author name. + * @return self + */ + public function setAuthor($author) + { + $this->author = null; // Clear previous data + + if (!is_string($author)) { + Debug::log('Author must be a string!'); + } else { + $this->author = $author; + } + + return $this; + } + + /** + * Get item content. + * + * Use {@see FeedItem::setContent()} to set the item content. + * + * @return string|null The item content or null if it hasn't been set. + */ + public function getContent() + { + return $this->content; + } + + /** + * Set item content. + * + * Note: This function casts objects of type simple_html_dom and + * simple_html_dom_node to string. + * + * Use {@see FeedItem::getContent()} to get the current item content. + * + * @param string|object $content The item content as text or simple_html_dom + * object. + * @return self + */ + public function setContent($content) + { + $this->content = null; // Clear previous data + + if ( + $content instanceof simple_html_dom + || $content instanceof simple_html_dom_node + ) { + $content = (string)$content; + } + + if (!is_string($content)) { + Debug::log('Content must be a string!'); + } else { + $this->content = $content; + } + + return $this; + } + + /** + * Get item enclosures. + * + * Use {@see FeedItem::setEnclosures()} to set feed enclosures. + * + * @return array Enclosures as array of enclosure URIs. + */ + public function getEnclosures() + { + return $this->enclosures; + } + + /** + * Set item enclosures. + * + * Use {@see FeedItem::getEnclosures()} to get the current item enclosures. + * + * @param array $enclosures Array of enclosures, where each element links to + * one enclosure. + * @return self + */ + public function setEnclosures($enclosures) + { + $this->enclosures = []; // Clear previous data + + if (!is_array($enclosures)) { + Debug::log('Enclosures must be an array!'); + } else { + foreach ($enclosures as $enclosure) { + if ( + !filter_var( + $enclosure, + FILTER_VALIDATE_URL, + FILTER_FLAG_PATH_REQUIRED + ) + ) { + Debug::log('Each enclosure must contain a scheme, host and path!'); + } elseif (!in_array($enclosure, $this->enclosures)) { + $this->enclosures[] = $enclosure; + } + } + } + + return $this; + } + + /** + * Get item categories. + * + * Use {@see FeedItem::setCategories()} to set item categories. + * + * @param array The item categories. + */ + public function getCategories() + { + return $this->categories; + } + + /** + * Set item categories. + * + * Use {@see FeedItem::getCategories()} to get the current item categories. + * + * @param array $categories Array of categories, where each element defines + * a single category name. + * @return self + */ + public function setCategories($categories) + { + $this->categories = []; // Clear previous data + + if (!is_array($categories)) { + Debug::log('Categories must be an array!'); + } else { + foreach ($categories as $category) { + if (!is_string($category)) { + Debug::log('Category must be a string!'); + } else { + $this->categories[] = $category; + } + } + } + + return $this; + } + + /** + * Get unique id + * + * Use {@see FeedItem::setUid()} to set the unique id. + * + * @param string The unique id. + */ + public function getUid() + { + return $this->uid; + } + + /** + * Set unique id. + * + * Use {@see FeedItem::getUid()} to get the unique id. + * + * @param string $uid A string that uniquely identifies the current item + * @return self + */ + public function setUid($uid) + { + $this->uid = null; // Clear previous data + + if (!is_string($uid)) { + Debug::log('Unique id must be a string!'); + } elseif (preg_match('/^[a-f0-9]{40}$/', $uid)) { + // keep id if it already is a SHA-1 hash + $this->uid = $uid; + } else { + $this->uid = sha1($uid); + } + + return $this; + } + + /** + * Add miscellaneous elements to the item. + * + * @param string $key Name of the element. + * @param mixed $value Value of the element. + * @return self + */ + public function addMisc($key, $value) + { + if (!is_string($key)) { + Debug::log('Key must be a string!'); + } elseif (in_array($key, get_object_vars($this))) { + Debug::log('Key must be unique!'); + } else { + $this->misc[$key] = $value; + } + + return $this; + } + + /** + * Transform current object to array + * + * @return array + */ + public function toArray() + { + return array_merge( + [ + 'uri' => $this->uri, + 'title' => $this->title, + 'timestamp' => $this->timestamp, + 'author' => $this->author, + 'content' => $this->content, + 'enclosures' => $this->enclosures, + 'categories' => $this->categories, + 'uid' => $this->uid, + ], + $this->misc + ); + } + + /** + * Set item property + * + * Allows simple assignment to parameters. This method is slower, but easier + * to implement in some cases: + * + * ```PHP + * $item = new \FeedItem(); + * $item->content = 'Hello World!'; + * $item->my_id = 42; + * ``` + * + * @param string $name Property name + * @param mixed $value Property value + */ + public function __set($name, $value) + { + switch ($name) { + case 'uri': + $this->setURI($value); + break; + case 'title': + $this->setTitle($value); + break; + case 'timestamp': + $this->setTimestamp($value); + break; + case 'author': + $this->setAuthor($value); + break; + case 'content': + $this->setContent($value); + break; + case 'enclosures': + $this->setEnclosures($value); + break; + case 'categories': + $this->setCategories($value); + break; + case 'uid': + $this->setUid($value); + break; + default: + $this->addMisc($name, $value); + } + } + + /** + * Get item property + * + * Allows simple assignment to parameters. This method is slower, but easier + * to implement in some cases. + * + * @param string $name Property name + * @return mixed Property value + */ + public function __get($name) + { + switch ($name) { + case 'uri': + return $this->getURI(); + case 'title': + return $this->getTitle(); + case 'timestamp': + return $this->getTimestamp(); + case 'author': + return $this->getAuthor(); + case 'content': + return $this->getContent(); + case 'enclosures': + return $this->getEnclosures(); + case 'categories': + return $this->getCategories(); + case 'uid': + return $this->getUid(); + default: + if (array_key_exists($name, $this->misc)) { + return $this->misc[$name]; + } + return null; + } + } } diff --git a/lib/FormatAbstract.php b/lib/FormatAbstract.php index 768b0157..7a4c6c92 100644 --- a/lib/FormatAbstract.php +++ b/lib/FormatAbstract.php @@ -1,4 +1,5 @@ <?php + /** * This file is part of RSS-Bridge, a PHP project capable of generating RSS and * Atom feeds for websites that don't have one. @@ -6,9 +7,9 @@ * For the full license information, please view the UNLICENSE file distributed * with this source code. * - * @package Core - * @license https://unlicense.org/ UNLICENSE - * @link https://github.com/rss-bridge/rss-bridge + * @package Core + * @license https://unlicense.org/ UNLICENSE + * @link https://github.com/rss-bridge/rss-bridge */ /** @@ -16,126 +17,135 @@ * * This class implements {@see FormatInterface} */ -abstract class FormatAbstract implements FormatInterface { - - /** The default charset (UTF-8) */ - const DEFAULT_CHARSET = 'UTF-8'; - - /** MIME type of format output */ - const MIME_TYPE = 'text/plain'; - - /** @var string $charset The charset */ - protected $charset; - - /** @var array $items The items */ - protected $items; - - /** - * @var int $lastModified A timestamp to indicate the last modified time of - * the output data. - */ - protected $lastModified; - - /** @var array $extraInfos The extra infos */ - protected $extraInfos; - - /** {@inheritdoc} */ - public function getMimeType(){ - return static::MIME_TYPE; - } - - /** - * {@inheritdoc} - * - * @param string $charset {@inheritdoc} - */ - public function setCharset($charset){ - $this->charset = $charset; - - return $this; - } - - /** {@inheritdoc} */ - public function getCharset(){ - $charset = $this->charset; - - return is_null($charset) ? static::DEFAULT_CHARSET : $charset; - } - - /** - * Set the last modified time - * - * @param int $lastModified The last modified time - * @return void - */ - public function setLastModified($lastModified){ - $this->lastModified = $lastModified; - } - - /** - * {@inheritdoc} - * - * @param array $items {@inheritdoc} - */ - public function setItems(array $items){ - $this->items = $items; - - return $this; - } - - /** {@inheritdoc} */ - public function getItems(){ - if(!is_array($this->items)) - throw new \LogicException('Feed the ' . get_class($this) . ' with "setItems" method before !'); - - return $this->items; - } - - /** - * {@inheritdoc} - * - * @param array $extraInfos {@inheritdoc} - */ - public function setExtraInfos(array $extraInfos = array()){ - foreach(array('name', 'uri', 'icon', 'donationUri') as $infoName) { - if(!isset($extraInfos[$infoName])) { - $extraInfos[$infoName] = ''; - } - } - - $this->extraInfos = $extraInfos; - - return $this; - } - - /** {@inheritdoc} */ - public function getExtraInfos(){ - if(is_null($this->extraInfos)) { // No extra info ? - $this->setExtraInfos(); // Define with default value - } - - return $this->extraInfos; - } - - /** - * Sanitize HTML while leaving it functional. - * - * Keeps HTML as-is (with clickable hyperlinks) while reducing annoying and - * potentially dangerous things. - * - * @param string $html The HTML content - * @return string The sanitized HTML content - * - * @todo This belongs into `html.php` - * @todo Maybe switch to http://htmlpurifier.org/ - * @todo Maybe switch to http://www.bioinformatics.org/phplabware/internal_utilities/htmLawed/index.php - */ - protected function sanitizeHtml(string $html): string - { - $html = str_replace('<script', '<‌script', $html); // Disable scripts, but leave them visible. - $html = str_replace('<iframe', '<‌iframe', $html); - $html = str_replace('<link', '<‌link', $html); - // We leave alone object and embed so that videos can play in RSS readers. - return $html; - } +abstract class FormatAbstract implements FormatInterface +{ + /** The default charset (UTF-8) */ + const DEFAULT_CHARSET = 'UTF-8'; + + /** MIME type of format output */ + const MIME_TYPE = 'text/plain'; + + /** @var string $charset The charset */ + protected $charset; + + /** @var array $items The items */ + protected $items; + + /** + * @var int $lastModified A timestamp to indicate the last modified time of + * the output data. + */ + protected $lastModified; + + /** @var array $extraInfos The extra infos */ + protected $extraInfos; + + /** {@inheritdoc} */ + public function getMimeType() + { + return static::MIME_TYPE; + } + + /** + * {@inheritdoc} + * + * @param string $charset {@inheritdoc} + */ + public function setCharset($charset) + { + $this->charset = $charset; + + return $this; + } + + /** {@inheritdoc} */ + public function getCharset() + { + $charset = $this->charset; + + return is_null($charset) ? static::DEFAULT_CHARSET : $charset; + } + + /** + * Set the last modified time + * + * @param int $lastModified The last modified time + * @return void + */ + public function setLastModified($lastModified) + { + $this->lastModified = $lastModified; + } + + /** + * {@inheritdoc} + * + * @param array $items {@inheritdoc} + */ + public function setItems(array $items) + { + $this->items = $items; + + return $this; + } + + /** {@inheritdoc} */ + public function getItems() + { + if (!is_array($this->items)) { + throw new \LogicException('Feed the ' . get_class($this) . ' with "setItems" method before !'); + } + + return $this->items; + } + + /** + * {@inheritdoc} + * + * @param array $extraInfos {@inheritdoc} + */ + public function setExtraInfos(array $extraInfos = []) + { + foreach (['name', 'uri', 'icon', 'donationUri'] as $infoName) { + if (!isset($extraInfos[$infoName])) { + $extraInfos[$infoName] = ''; + } + } + + $this->extraInfos = $extraInfos; + + return $this; + } + + /** {@inheritdoc} */ + public function getExtraInfos() + { + if (is_null($this->extraInfos)) { // No extra info ? + $this->setExtraInfos(); // Define with default value + } + + return $this->extraInfos; + } + + /** + * Sanitize HTML while leaving it functional. + * + * Keeps HTML as-is (with clickable hyperlinks) while reducing annoying and + * potentially dangerous things. + * + * @param string $html The HTML content + * @return string The sanitized HTML content + * + * @todo This belongs into `html.php` + * @todo Maybe switch to http://htmlpurifier.org/ + * @todo Maybe switch to http://www.bioinformatics.org/phplabware/internal_utilities/htmLawed/index.php + */ + protected function sanitizeHtml(string $html): string + { + $html = str_replace('<script', '<‌script', $html); // Disable scripts, but leave them visible. + $html = str_replace('<iframe', '<‌iframe', $html); + $html = str_replace('<link', '<‌link', $html); + // We leave alone object and embed so that videos can play in RSS readers. + return $html; + } } diff --git a/lib/FormatFactory.php b/lib/FormatFactory.php index 2044a899..e2ef52fa 100644 --- a/lib/FormatFactory.php +++ b/lib/FormatFactory.php @@ -1,4 +1,5 @@ <?php + /** * This file is part of RSS-Bridge, a PHP project capable of generating RSS and * Atom feeds for websites that don't have one. @@ -6,65 +7,66 @@ * For the full license information, please view the UNLICENSE file distributed * with this source code. * - * @package Core - * @license http://unlicense.org/ UNLICENSE - * @link https://github.com/rss-bridge/rss-bridge + * @package Core + * @license http://unlicense.org/ UNLICENSE + * @link https://github.com/rss-bridge/rss-bridge */ class FormatFactory { - private $folder; - private $formatNames; + private $folder; + private $formatNames; - public function __construct(string $folder = PATH_LIB_FORMATS) - { - $this->folder = $folder; + public function __construct(string $folder = PATH_LIB_FORMATS) + { + $this->folder = $folder; - // create format names - foreach(scandir($this->folder) as $file) { - if(preg_match('/^([^.]+)Format\.php$/U', $file, $m)) { - $this->formatNames[] = $m[1]; - } - } - } + // create format names + foreach (scandir($this->folder) as $file) { + if (preg_match('/^([^.]+)Format\.php$/U', $file, $m)) { + $this->formatNames[] = $m[1]; + } + } + } - /** - * @throws \InvalidArgumentException - * @param string $name The name of the format e.g. "Atom", "Mrss" or "Json" - */ - public function create(string $name): FormatInterface - { - if (! preg_match('/^[a-zA-Z0-9-]*$/', $name)) { - throw new \InvalidArgumentException('Format name invalid!'); - } - $name = $this->sanitizeFormatName($name); - if ($name === null) { - throw new \InvalidArgumentException('Unknown format given!'); - } - $className = '\\' . $name . 'Format'; - return new $className; - } + /** + * @throws \InvalidArgumentException + * @param string $name The name of the format e.g. "Atom", "Mrss" or "Json" + */ + public function create(string $name): FormatInterface + { + if (! preg_match('/^[a-zA-Z0-9-]*$/', $name)) { + throw new \InvalidArgumentException('Format name invalid!'); + } + $name = $this->sanitizeFormatName($name); + if ($name === null) { + throw new \InvalidArgumentException('Unknown format given!'); + } + $className = '\\' . $name . 'Format'; + return new $className(); + } - public function getFormatNames(): array - { - return $this->formatNames; - } + public function getFormatNames(): array + { + return $this->formatNames; + } - protected function sanitizeFormatName(string $name) { - $name = ucfirst(strtolower($name)); + protected function sanitizeFormatName(string $name) + { + $name = ucfirst(strtolower($name)); - // Trim trailing '.php' if exists - if (preg_match('/(.+)(?:\.php)/', $name, $matches)) { - $name = $matches[1]; - } + // Trim trailing '.php' if exists + if (preg_match('/(.+)(?:\.php)/', $name, $matches)) { + $name = $matches[1]; + } - // Trim trailing 'Format' if exists - if (preg_match('/(.+)(?:Format)/i', $name, $matches)) { - $name = $matches[1]; - } - if (in_array($name, $this->formatNames)) { - return $name; - } - return null; - } + // Trim trailing 'Format' if exists + if (preg_match('/(.+)(?:Format)/i', $name, $matches)) { + $name = $matches[1]; + } + if (in_array($name, $this->formatNames)) { + return $name; + } + return null; + } } diff --git a/lib/FormatInterface.php b/lib/FormatInterface.php index 5fd46ef9..8f98d6e4 100644 --- a/lib/FormatInterface.php +++ b/lib/FormatInterface.php @@ -1,4 +1,5 @@ <?php + /** * This file is part of RSS-Bridge, a PHP project capable of generating RSS and * Atom feeds for websites that don't have one. @@ -6,9 +7,9 @@ * For the full license information, please view the UNLICENSE file distributed * with this source code. * - * @package Core - * @license http://unlicense.org/ UNLICENSE - * @link https://github.com/rss-bridge/rss-bridge + * @package Core + * @license http://unlicense.org/ UNLICENSE + * @link https://github.com/rss-bridge/rss-bridge */ /** @@ -18,66 +19,67 @@ * @todo Explain parameters and return values in more detail * @todo Return self more often (to allow call chaining) */ -interface FormatInterface { - /** - * Generate a string representation of the current data - * - * @return string The string representation - */ - public function stringify(); +interface FormatInterface +{ + /** + * Generate a string representation of the current data + * + * @return string The string representation + */ + public function stringify(); - /** - * Set items - * - * @param array $bridges The items - * @return self The format object - * - * @todo Rename parameter `$bridges` to `$items` - */ - public function setItems(array $bridges); + /** + * Set items + * + * @param array $bridges The items + * @return self The format object + * + * @todo Rename parameter `$bridges` to `$items` + */ + public function setItems(array $bridges); - /** - * Return items - * - * @throws \LogicException if the items are not set - * @return array The items - */ - public function getItems(); + /** + * Return items + * + * @throws \LogicException if the items are not set + * @return array The items + */ + public function getItems(); - /** - * Set extra information - * - * @param array $infos Extra information - * @return self The format object - */ - public function setExtraInfos(array $infos); + /** + * Set extra information + * + * @param array $infos Extra information + * @return self The format object + */ + public function setExtraInfos(array $infos); - /** - * Return extra information - * - * @return array Extra information - */ - public function getExtraInfos(); + /** + * Return extra information + * + * @return array Extra information + */ + public function getExtraInfos(); - /** - * Return MIME type - * - * @return string The MIME type - */ - public function getMimeType(); + /** + * Return MIME type + * + * @return string The MIME type + */ + public function getMimeType(); - /** - * Set charset - * - * @param string $charset The charset - * @return self The format object - */ - public function setCharset($charset); + /** + * Set charset + * + * @param string $charset The charset + * @return self The format object + */ + public function setCharset($charset); - /** - * Return current charset - * - * @return string The charset - */ - public function getCharset(); + /** + * Return current charset + * + * @return string The charset + */ + public function getCharset(); } diff --git a/lib/ParameterValidator.php b/lib/ParameterValidator.php index 12e07942..a903ff8d 100644 --- a/lib/ParameterValidator.php +++ b/lib/ParameterValidator.php @@ -1,4 +1,5 @@ <?php + /** * This file is part of RSS-Bridge, a PHP project capable of generating RSS and * Atom feeds for websites that don't have one. @@ -6,234 +7,259 @@ * For the full license information, please view the UNLICENSE file distributed * with this source code. * - * @package Core - * @license http://unlicense.org/ UNLICENSE - * @link https://github.com/rss-bridge/rss-bridge + * @package Core + * @license http://unlicense.org/ UNLICENSE + * @link https://github.com/rss-bridge/rss-bridge */ /** * Validator for bridge parameters */ -class ParameterValidator { - - /** - * Holds the list of invalid parameters - * - * @var array - */ - private $invalid = array(); - - /** - * Add item to list of invalid parameters - * - * @param string $name The name of the parameter - * @param string $reason The reason for that parameter being invalid - * @return void - */ - private function addInvalidParameter($name, $reason){ - $this->invalid[] = array( - 'name' => $name, - 'reason' => $reason - ); - } - - /** - * Return list of invalid parameters. - * - * Each element is an array of 'name' and 'reason'. - * - * @return array List of invalid parameters - */ - public function getInvalidParameters() { - return $this->invalid; - } - - /** - * Validate value for a text input - * - * @param string $value The value of a text input - * @param string|null $pattern (optional) A regex pattern - * @return string|null The filtered value or null if the value is invalid - */ - private function validateTextValue($value, $pattern = null){ - if(!is_null($pattern)) { - $filteredValue = filter_var($value, - FILTER_VALIDATE_REGEXP, - array('options' => array( - 'regexp' => '/^' . $pattern . '$/' - ) - )); - } else { - $filteredValue = filter_var($value); - } - - if($filteredValue === false) - return null; - - return $filteredValue; - } - - /** - * Validate value for a number input - * - * @param int $value The value of a number input - * @return int|null The filtered value or null if the value is invalid - */ - private function validateNumberValue($value){ - $filteredValue = filter_var($value, FILTER_VALIDATE_INT); - - if($filteredValue === false) - return null; - - return $filteredValue; - } - - /** - * Validate value for a checkbox - * - * @param bool $value The value of a checkbox - * @return bool The filtered value - */ - private function validateCheckboxValue($value){ - return filter_var($value, FILTER_VALIDATE_BOOLEAN, FILTER_NULL_ON_FAILURE); - } - - /** - * Validate value for a list - * - * @param string $value The value of a list - * @param array $expectedValues A list of expected values - * @return string|null The filtered value or null if the value is invalid - */ - private function validateListValue($value, $expectedValues){ - $filteredValue = filter_var($value); - - if($filteredValue === false) - return null; - - if(!in_array($filteredValue, $expectedValues)) { // Check sub-values? - foreach($expectedValues as $subName => $subValue) { - if(is_array($subValue) && in_array($filteredValue, $subValue)) - return $filteredValue; - } - return null; - } - - return $filteredValue; - } - - /** - * Check if all required parameters are satisfied - * - * @param array $data (ref) A list of input values - * @param array $parameters The bridge parameters - * @return bool True if all parameters are satisfied - */ - public function validateData(&$data, $parameters){ - - if(!is_array($data)) - return false; - - foreach($data as $name => $value) { - // Some RSS readers add a cache-busting parameter (_=<timestamp>) to feed URLs, detect and ignore them. - if ($name === '_') continue; - - $registered = false; - foreach($parameters as $context => $set) { - if(array_key_exists($name, $set)) { - $registered = true; - if(!isset($set[$name]['type'])) { - $set[$name]['type'] = 'text'; - } - - switch($set[$name]['type']) { - case 'number': - $data[$name] = $this->validateNumberValue($value); - break; - case 'checkbox': - $data[$name] = $this->validateCheckboxValue($value); - break; - case 'list': - $data[$name] = $this->validateListValue($value, $set[$name]['values']); - break; - default: - case 'text': - if(isset($set[$name]['pattern'])) { - $data[$name] = $this->validateTextValue($value, $set[$name]['pattern']); - } else { - $data[$name] = $this->validateTextValue($value); - } - break; - } - - if(is_null($data[$name]) && isset($set[$name]['required']) && $set[$name]['required']) { - $this->addInvalidParameter($name, 'Parameter is invalid!'); - } - } - } - - if(!$registered) { - $this->addInvalidParameter($name, 'Parameter is not registered!'); - } - } - - return empty($this->invalid); - } - - /** - * Get the name of the context matching the provided inputs - * - * @param array $data Associative array of user data - * @param array $parameters Array of bridge parameters - * @return string|null Returns the context name or null if no match was found - */ - public function getQueriedContext($data, $parameters){ - $queriedContexts = array(); - - // Detect matching context - foreach($parameters as $context => $set) { - $queriedContexts[$context] = null; - - // Ensure all user data exist in the current context - $notInContext = array_diff_key($data, $set); - if(array_key_exists('global', $parameters)) - $notInContext = array_diff_key($notInContext, $parameters['global']); - if(sizeof($notInContext) > 0) - continue; - - // Check if all parameters of the context are satisfied - foreach($set as $id => $properties) { - if(isset($data[$id]) && !empty($data[$id])) { - $queriedContexts[$context] = true; - } elseif (isset($properties['type']) - && ($properties['type'] === 'checkbox' || $properties['type'] === 'list')) { - continue; - } elseif(isset($properties['required']) && $properties['required'] === true) { - $queriedContexts[$context] = false; - break; - } - } - } - - // Abort if one of the globally required parameters is not satisfied - if(array_key_exists('global', $parameters) - && $queriedContexts['global'] === false) { - return null; - } - unset($queriedContexts['global']); - - switch(array_sum($queriedContexts)) { - case 0: // Found no match, is there a context without parameters? - if(isset($data['context'])) return $data['context']; - foreach($queriedContexts as $context => $queried) { - if(is_null($queried)) { - return $context; - } - } - return null; - case 1: // Found unique match - return array_search(true, $queriedContexts); - default: return false; - } - } +class ParameterValidator +{ + /** + * Holds the list of invalid parameters + * + * @var array + */ + private $invalid = []; + + /** + * Add item to list of invalid parameters + * + * @param string $name The name of the parameter + * @param string $reason The reason for that parameter being invalid + * @return void + */ + private function addInvalidParameter($name, $reason) + { + $this->invalid[] = [ + 'name' => $name, + 'reason' => $reason + ]; + } + + /** + * Return list of invalid parameters. + * + * Each element is an array of 'name' and 'reason'. + * + * @return array List of invalid parameters + */ + public function getInvalidParameters() + { + return $this->invalid; + } + + /** + * Validate value for a text input + * + * @param string $value The value of a text input + * @param string|null $pattern (optional) A regex pattern + * @return string|null The filtered value or null if the value is invalid + */ + private function validateTextValue($value, $pattern = null) + { + if (!is_null($pattern)) { + $filteredValue = filter_var( + $value, + FILTER_VALIDATE_REGEXP, + ['options' => [ + 'regexp' => '/^' . $pattern . '$/' + ] + ] + ); + } else { + $filteredValue = filter_var($value); + } + + if ($filteredValue === false) { + return null; + } + + return $filteredValue; + } + + /** + * Validate value for a number input + * + * @param int $value The value of a number input + * @return int|null The filtered value or null if the value is invalid + */ + private function validateNumberValue($value) + { + $filteredValue = filter_var($value, FILTER_VALIDATE_INT); + + if ($filteredValue === false) { + return null; + } + + return $filteredValue; + } + + /** + * Validate value for a checkbox + * + * @param bool $value The value of a checkbox + * @return bool The filtered value + */ + private function validateCheckboxValue($value) + { + return filter_var($value, FILTER_VALIDATE_BOOLEAN, FILTER_NULL_ON_FAILURE); + } + + /** + * Validate value for a list + * + * @param string $value The value of a list + * @param array $expectedValues A list of expected values + * @return string|null The filtered value or null if the value is invalid + */ + private function validateListValue($value, $expectedValues) + { + $filteredValue = filter_var($value); + + if ($filteredValue === false) { + return null; + } + + if (!in_array($filteredValue, $expectedValues)) { // Check sub-values? + foreach ($expectedValues as $subName => $subValue) { + if (is_array($subValue) && in_array($filteredValue, $subValue)) { + return $filteredValue; + } + } + return null; + } + + return $filteredValue; + } + + /** + * Check if all required parameters are satisfied + * + * @param array $data (ref) A list of input values + * @param array $parameters The bridge parameters + * @return bool True if all parameters are satisfied + */ + public function validateData(&$data, $parameters) + { + if (!is_array($data)) { + return false; + } + + foreach ($data as $name => $value) { + // Some RSS readers add a cache-busting parameter (_=<timestamp>) to feed URLs, detect and ignore them. + if ($name === '_') { + continue; + } + + $registered = false; + foreach ($parameters as $context => $set) { + if (array_key_exists($name, $set)) { + $registered = true; + if (!isset($set[$name]['type'])) { + $set[$name]['type'] = 'text'; + } + + switch ($set[$name]['type']) { + case 'number': + $data[$name] = $this->validateNumberValue($value); + break; + case 'checkbox': + $data[$name] = $this->validateCheckboxValue($value); + break; + case 'list': + $data[$name] = $this->validateListValue($value, $set[$name]['values']); + break; + default: + case 'text': + if (isset($set[$name]['pattern'])) { + $data[$name] = $this->validateTextValue($value, $set[$name]['pattern']); + } else { + $data[$name] = $this->validateTextValue($value); + } + break; + } + + if (is_null($data[$name]) && isset($set[$name]['required']) && $set[$name]['required']) { + $this->addInvalidParameter($name, 'Parameter is invalid!'); + } + } + } + + if (!$registered) { + $this->addInvalidParameter($name, 'Parameter is not registered!'); + } + } + + return empty($this->invalid); + } + + /** + * Get the name of the context matching the provided inputs + * + * @param array $data Associative array of user data + * @param array $parameters Array of bridge parameters + * @return string|null Returns the context name or null if no match was found + */ + public function getQueriedContext($data, $parameters) + { + $queriedContexts = []; + + // Detect matching context + foreach ($parameters as $context => $set) { + $queriedContexts[$context] = null; + + // Ensure all user data exist in the current context + $notInContext = array_diff_key($data, $set); + if (array_key_exists('global', $parameters)) { + $notInContext = array_diff_key($notInContext, $parameters['global']); + } + if (sizeof($notInContext) > 0) { + continue; + } + + // Check if all parameters of the context are satisfied + foreach ($set as $id => $properties) { + if (isset($data[$id]) && !empty($data[$id])) { + $queriedContexts[$context] = true; + } elseif ( + isset($properties['type']) + && ($properties['type'] === 'checkbox' || $properties['type'] === 'list') + ) { + continue; + } elseif (isset($properties['required']) && $properties['required'] === true) { + $queriedContexts[$context] = false; + break; + } + } + } + + // Abort if one of the globally required parameters is not satisfied + if ( + array_key_exists('global', $parameters) + && $queriedContexts['global'] === false + ) { + return null; + } + unset($queriedContexts['global']); + + switch (array_sum($queriedContexts)) { + case 0: // Found no match, is there a context without parameters? + if (isset($data['context'])) { + return $data['context']; + } + foreach ($queriedContexts as $context => $queried) { + if (is_null($queried)) { + return $context; + } + } + return null; + case 1: // Found unique match + return array_search(true, $queriedContexts); + default: + return false; + } + } } diff --git a/lib/XPathAbstract.php b/lib/XPathAbstract.php index 0ca1587b..686addf4 100644 --- a/lib/XPathAbstract.php +++ b/lib/XPathAbstract.php @@ -15,572 +15,598 @@ * This class extends {@see BridgeAbstract}, which means it incorporates and * extends all of its functionality. **/ -abstract class XPathAbstract extends BridgeAbstract { - - /** - * Source Web page URL (should provide either HTML or XML content) - * You can specify any website URL which serves data suited for display in RSS feeds - * (for example a news blog). - * - * Use {@see XPathAbstract::getSourceUrl()} to read this parameter - */ - const FEED_SOURCE_URL = ''; - - /** - * XPath expression for extracting the feed title from the source page. - * If this is left blank or does not provide any data {@see BridgeAbstract::getName()} - * is used instead as the feed's title. - * - * Use {@see XPathAbstract::getExpressionTitle()} to read this parameter - */ - const XPATH_EXPRESSION_FEED_TITLE = './/title'; - - /** - * XPath expression for extracting the feed favicon URL from the source page. - * If this is left blank or does not provide any data {@see BridgeAbstract::getIcon()} - * is used instead as the feed's favicon URL. - * - * Use {@see XPathAbstract::getExpressionIcon()} to read this parameter - */ - const XPATH_EXPRESSION_FEED_ICON = './/link[@rel="icon"]/@href'; - - /** - * XPath expression for extracting the feed items from the source page - * Enter an XPath expression matching a list of dom nodes, each node containing one - * feed article item in total (usually a surrounding <div> or <span> tag). This will - * be the context nodes for all of the following expressions. This expression usually - * starts with a single forward slash. - * - * Use {@see XPathAbstract::getExpressionItem()} to read this parameter - */ - const XPATH_EXPRESSION_ITEM = ''; - - /** - * XPath expression for extracting an item title from the item context - * This expression should match a node contained within each article item node - * containing the article headline. It should start with a dot followed by two - * forward slashes, referring to any descendant nodes of the article item node. - * - * Use {@see XPathAbstract::getExpressionItemTitle()} to read this parameter - */ - const XPATH_EXPRESSION_ITEM_TITLE = ''; - - /** - * XPath expression for extracting an item's content from the item context - * This expression should match a node contained within each article item node - * containing the article content or description. It should start with a dot - * followed by two forward slashes, referring to any descendant nodes of the - * article item node. - * - * Use {@see XPathAbstract::getExpressionItemContent()} to read this parameter - */ - const XPATH_EXPRESSION_ITEM_CONTENT = ''; - - /** - * XPath expression for extracting an item link from the item context - * This expression should match a node's attribute containing the article URL - * (usually the href attribute of an <a> tag). It should start with a dot - * followed by two forward slashes, referring to any descendant nodes of - * the article item node. Attributes can be selected by prepending an @ char - * before the attributes name. - * - * Use {@see XPathAbstract::getExpressionItemUri()} to read this parameter - */ - const XPATH_EXPRESSION_ITEM_URI = ''; - - /** - * XPath expression for extracting an item author from the item context - * This expression should match a node contained within each article item - * node containing the article author's name. It should start with a dot - * followed by two forward slashes, referring to any descendant nodes of - * the article item node. - * - * Use {@see XPathAbstract::getExpressionItemAuthor()} to read this parameter - */ - const XPATH_EXPRESSION_ITEM_AUTHOR = ''; - - /** - * XPath expression for extracting an item timestamp from the item context - * This expression should match a node or node's attribute containing the - * article timestamp or date (parsable by PHP's strtotime function). It - * should start with a dot followed by two forward slashes, referring to - * any descendant nodes of the article item node. Attributes can be - * selected by prepending an @ char before the attributes name. - * - * Use {@see XPathAbstract::getExpressionItemTimestamp()} to read this parameter - */ - const XPATH_EXPRESSION_ITEM_TIMESTAMP = ''; - - /** - * XPath expression for extracting item enclosures (media content like - * images or movies) from the item context - * This expression should match a node's attribute containing an article - * image URL (usually the src attribute of an <img> tag or a style - * attribute). It should start with a dot followed by two forward slashes, - * referring to any descendant nodes of the article item node. Attributes - * can be selected by prepending an @ char before the attributes name. - * - * Use {@see XPathAbstract::getExpressionItemEnclosures()} to read this parameter - */ - const XPATH_EXPRESSION_ITEM_ENCLOSURES = ''; - - /** - * XPath expression for extracting an item category from the item context - * This expression should match a node or node's attribute contained - * within each article item node containing the article category. This - * could be inside <div> or <span> tags or sometimes be hidden - * in a data attribute. It should start with a dot followed by two - * forward slashes, referring to any descendant nodes of the article - * item node. Attributes can be selected by prepending an @ char - * before the attributes name. - * - * Use {@see XPathAbstract::getExpressionItemCategories()} to read this parameter - */ - const XPATH_EXPRESSION_ITEM_CATEGORIES = ''; - - /** - * Fix encoding - * Set this to true for fixing feed encoding by invoking PHP's utf8_decode - * function on all extracted texts. Try this in case you see "broken" or - * "weird" characters in your feed where you'd normally expect umlauts - * or any other non-ascii characters. - * - * Use {@see XPathAbstract::getSettingFixEncoding()} to read this parameter - */ - const SETTING_FIX_ENCODING = false; - - /** - * Internal storage for resulting feed name, automatically detected - * @var string - */ - private $feedName; - - /** - * Internal storage for resulting feed name, automatically detected - * @var string - */ - private $feedUri; - - /** - * Internal storage for resulting feed favicon, automatically detected - * @var string - */ - private $feedIcon; - - public function getName(){ - return $this->feedName ?: parent::getName(); - } - - public function getURI() { - return $this->feedUri ?: parent::getURI(); - } - - public function getIcon() { - return $this->feedIcon ?: parent::getIcon(); - } - - /** - * Source Web page URL (should provide either HTML or XML content) - * @return string - */ - protected function getSourceUrl(){ - return static::FEED_SOURCE_URL; - } - - /** - * XPath expression for extracting the feed title from the source page - * @return string - */ - protected function getExpressionTitle(){ - return static::XPATH_EXPRESSION_FEED_TITLE; - } - - /** - * XPath expression for extracting the feed favicon from the source page - * @return string - */ - protected function getExpressionIcon(){ - return static::XPATH_EXPRESSION_FEED_ICON; - } - - /** - * XPath expression for extracting the feed items from the source page - * @return string - */ - protected function getExpressionItem(){ - return static::XPATH_EXPRESSION_ITEM; - } - - /** - * XPath expression for extracting an item title from the item context - * @return string - */ - protected function getExpressionItemTitle(){ - return static::XPATH_EXPRESSION_ITEM_TITLE; - } - - /** - * XPath expression for extracting an item's content from the item context - * @return string - */ - protected function getExpressionItemContent(){ - return static::XPATH_EXPRESSION_ITEM_CONTENT; - } - - /** - * XPath expression for extracting an item link from the item context - * @return string - */ - protected function getExpressionItemUri(){ - return static::XPATH_EXPRESSION_ITEM_URI; - } - - /** - * XPath expression for extracting an item author from the item context - * @return string - */ - protected function getExpressionItemAuthor(){ - return static::XPATH_EXPRESSION_ITEM_AUTHOR; - } - - /** - * XPath expression for extracting an item timestamp from the item context - * @return string - */ - protected function getExpressionItemTimestamp(){ - return static::XPATH_EXPRESSION_ITEM_TIMESTAMP; - } - - /** - * XPath expression for extracting item enclosures (media content like - * images or movies) from the item context - * @return string - */ - protected function getExpressionItemEnclosures(){ - return static::XPATH_EXPRESSION_ITEM_ENCLOSURES; - } - - /** - * XPath expression for extracting an item category from the item context - * @return string - */ - protected function getExpressionItemCategories(){ - return static::XPATH_EXPRESSION_ITEM_CATEGORIES; - } - - /** - * Fix encoding - * @return string - */ - protected function getSettingFixEncoding(){ - return static::SETTING_FIX_ENCODING; - } - - /** - * Internal helper method for quickly accessing all the user defined constants - * in derived classes - * - * @param $name - * @return bool|string - */ - private function getParam($name){ - switch($name) { - - case 'url': - return $this->getSourceUrl(); - case 'feed_title': - return $this->getExpressionTitle(); - case 'feed_icon': - return $this->getExpressionIcon(); - case 'item': - return $this->getExpressionItem(); - case 'title': - return $this->getExpressionItemTitle(); - case 'content': - return $this->getExpressionItemContent(); - case 'uri': - return $this->getExpressionItemUri(); - case 'author': - return $this->getExpressionItemAuthor(); - case 'timestamp': - return $this->getExpressionItemTimestamp(); - case 'enclosures': - return $this->getExpressionItemEnclosures(); - case 'categories': - return $this->getExpressionItemCategories(); - case 'fix_encoding': - return $this->getSettingFixEncoding(); - } - } - - /** - * Should provide the source website HTML content - * can be easily overwritten for example if special headers or auth infos are required - * @return string - */ - protected function provideWebsiteContent() { - return getContents($this->feedUri); - } - - /** - * Should provide the feeds title - * - * @param DOMXPath $xpath - * @return string - */ - protected function provideFeedTitle(DOMXPath $xpath) { - $title = $xpath->query($this->getParam('feed_title')); - if(count($title) === 1) { - return $this->getItemValueOrNodeValue($title); - } - } - - /** - * Should provide the URL of the feed's favicon - * - * @param DOMXPath $xpath - * @return string - */ - protected function provideFeedIcon(DOMXPath $xpath) { - $icon = $xpath->query($this->getParam('feed_icon')); - if(count($icon) === 1) { - return $this->cleanMediaUrl($this->getItemValueOrNodeValue($icon)); - } - } - - /** - * Should provide the feed's items. - * - * @param DOMXPath $xpath - * @return DOMNodeList - */ - protected function provideFeedItems(DOMXPath $xpath) { - return @$xpath->query($this->getParam('item')); - } - - public function collectData() { - - $this->feedUri = $this->getParam('url'); - - $webPageHtml = new DOMDocument(); - libxml_use_internal_errors(true); - $webPageHtml->loadHTML($this->provideWebsiteContent()); - libxml_clear_errors(); - libxml_use_internal_errors(false); - - $xpath = new DOMXPath($webPageHtml); - - $this->feedName = $this->provideFeedTitle($xpath); - $this->feedIcon = $this->provideFeedIcon($xpath); - - $entries = $this->provideFeedItems($xpath); - if($entries === false) { - return; - } - - foreach ($entries as $entry) { - $item = new \FeedItem(); - foreach(array('title', 'content', 'uri', 'author', 'timestamp', 'enclosures', 'categories') as $param) { - - $expression = $this->getParam($param); - if('' === $expression) { - continue; - } - - //can be a string or DOMNodeList, depending on the expression result - $typedResult = @$xpath->evaluate($expression, $entry); - if ($typedResult === false || ($typedResult instanceof DOMNodeList && count($typedResult) === 0) - || (is_string($typedResult) && strlen(trim($typedResult)) === 0)) { - continue; - } - - $item->__set($param, $this->formatParamValue($param, $this->getItemValueOrNodeValue($typedResult))); - - } - - $itemId = $this->generateItemId($item); - if(null !== $itemId) { - $item->setUid($itemId); - } - - $this->items[] = $item; - } - - } - - /** - * @param $param - * @param $value - * @return string|array - */ - protected function formatParamValue($param, $value) - { - $value = $this->fixEncoding($value); - switch ($param) { - case 'title': - return $this->formatItemTitle($value); - case 'content': - return $this->formatItemContent($value); - case 'uri': - return $this->formatItemUri($value); - case 'author': - return $this->formatItemAuthor($value); - case 'timestamp': - return $this->formatItemTimestamp($value); - case 'enclosures': - return $this->formatItemEnclosures($value); - case 'categories': - return $this->formatItemCategories($value); - } - return $value; - } - - /** - * Formats the title of a feed item. Takes extracted raw title and returns it formatted - * as string. - * Can be easily overwritten for in case the value needs to be transformed into something - * else. - * @param string $value - * @return string - */ - protected function formatItemTitle($value) { - return $value; - } - - /** - * Formats the timestamp of a feed item. Takes extracted raw timestamp and returns unix - * timestamp as integer. - * Can be easily overwritten for example if a special format has to be expected on the - * source website. - * @param string $value - * @return string - */ - protected function formatItemContent($value) { - return $value; - } - - /** - * Formats the URI of a feed item. Takes extracted raw URI and returns it formatted - * as string. - * Can be easily overwritten for in case the value needs to be transformed into something - * else. - * @param string $value - * @return string - */ - protected function formatItemUri($value) { - if(strlen($value) === 0) { - return ''; - } - if(strpos($value, 'http://') === 0 || strpos($value, 'https://') === 0) { - return $value; - } - - return urljoin($this->feedUri, $value); - } - - /** - * Formats the author of a feed item. Takes extracted raw author and returns it formatted - * as string. - * Can be easily overwritten for in case the value needs to be transformed into something - * else. - * @param string $value - * @return string - */ - protected function formatItemAuthor($value) { - return $value; - } - - /** - * Formats the timestamp of a feed item. Takes extracted raw timestamp and returns unix - * timestamp as integer. - * Can be easily overwritten for example if a special format has to be expected on the - * source website. - * @param string $value - * @return false|int - */ - protected function formatItemTimestamp($value) { - return strtotime($value); - } - - /** - * Formats the enclosures of a feed item. Takes extracted raw enclosures and returns them - * formatted as array. - * Can be easily overwritten for in case the values need to be transformed into something - * else. - * @param string $value - * @return array - */ - protected function formatItemEnclosures($value) { - return array($this->cleanMediaUrl($value)); - } - - /** - * Formats the categories of a feed item. Takes extracted raw categories and returns them - * formatted as array. - * Can be easily overwritten for in case the values need to be transformed into something - * else. - * @param string $value - * @return array - */ - protected function formatItemCategories($value) { - return array($value); - } - - /** - * @param $mediaUrl - * @return string|void - */ - protected function cleanMediaUrl($mediaUrl) - { - $pattern = '~(?:http(?:s)?:)?[\/a-zA-Z0-9\-=_,\.\%]+\.(?:jpg|gif|png|jpeg|ico|mp3|webp){1}~i'; - $result = preg_match($pattern, $mediaUrl, $matches); - if(1 !== $result) { - return; - } - return urljoin($this->feedUri, $matches[0]); - } - - /** - * @param $typedResult - * @return string - */ - protected function getItemValueOrNodeValue($typedResult) - { - if($typedResult instanceof DOMNodeList) { - $item = $typedResult->item(0); - if ($item instanceof DOMElement) { - return trim($item->nodeValue); - } elseif ($item instanceof DOMAttr) { - return trim($item->value); - } elseif ($item instanceof DOMText) { - return trim($item->wholeText); - } - } elseif(is_string($typedResult) && strlen($typedResult) > 0) { - return trim($typedResult); - } - returnServerError('Unknown type of XPath expression result.'); - } - - /** - * Fixes feed encoding by invoking PHP's utf8_decode function on extracted texts. - * Useful in case of "broken" or "weird" characters in the feed where you'd normally - * expect umlauts. - * - * @param $input - * @return string - */ - protected function fixEncoding($input) - { - return $this->getParam('fix_encoding') ? utf8_decode($input) : $input; - } - - /** - * Allows overriding default mechanism determining items Uid's - * - * @param FeedItem $item - * @return string|null - */ - protected function generateItemId(\FeedItem $item) { - return null; //auto generation - } +abstract class XPathAbstract extends BridgeAbstract +{ + /** + * Source Web page URL (should provide either HTML or XML content) + * You can specify any website URL which serves data suited for display in RSS feeds + * (for example a news blog). + * + * Use {@see XPathAbstract::getSourceUrl()} to read this parameter + */ + const FEED_SOURCE_URL = ''; + + /** + * XPath expression for extracting the feed title from the source page. + * If this is left blank or does not provide any data {@see BridgeAbstract::getName()} + * is used instead as the feed's title. + * + * Use {@see XPathAbstract::getExpressionTitle()} to read this parameter + */ + const XPATH_EXPRESSION_FEED_TITLE = './/title'; + + /** + * XPath expression for extracting the feed favicon URL from the source page. + * If this is left blank or does not provide any data {@see BridgeAbstract::getIcon()} + * is used instead as the feed's favicon URL. + * + * Use {@see XPathAbstract::getExpressionIcon()} to read this parameter + */ + const XPATH_EXPRESSION_FEED_ICON = './/link[@rel="icon"]/@href'; + + /** + * XPath expression for extracting the feed items from the source page + * Enter an XPath expression matching a list of dom nodes, each node containing one + * feed article item in total (usually a surrounding <div> or <span> tag). This will + * be the context nodes for all of the following expressions. This expression usually + * starts with a single forward slash. + * + * Use {@see XPathAbstract::getExpressionItem()} to read this parameter + */ + const XPATH_EXPRESSION_ITEM = ''; + + /** + * XPath expression for extracting an item title from the item context + * This expression should match a node contained within each article item node + * containing the article headline. It should start with a dot followed by two + * forward slashes, referring to any descendant nodes of the article item node. + * + * Use {@see XPathAbstract::getExpressionItemTitle()} to read this parameter + */ + const XPATH_EXPRESSION_ITEM_TITLE = ''; + + /** + * XPath expression for extracting an item's content from the item context + * This expression should match a node contained within each article item node + * containing the article content or description. It should start with a dot + * followed by two forward slashes, referring to any descendant nodes of the + * article item node. + * + * Use {@see XPathAbstract::getExpressionItemContent()} to read this parameter + */ + const XPATH_EXPRESSION_ITEM_CONTENT = ''; + + /** + * XPath expression for extracting an item link from the item context + * This expression should match a node's attribute containing the article URL + * (usually the href attribute of an <a> tag). It should start with a dot + * followed by two forward slashes, referring to any descendant nodes of + * the article item node. Attributes can be selected by prepending an @ char + * before the attributes name. + * + * Use {@see XPathAbstract::getExpressionItemUri()} to read this parameter + */ + const XPATH_EXPRESSION_ITEM_URI = ''; + + /** + * XPath expression for extracting an item author from the item context + * This expression should match a node contained within each article item + * node containing the article author's name. It should start with a dot + * followed by two forward slashes, referring to any descendant nodes of + * the article item node. + * + * Use {@see XPathAbstract::getExpressionItemAuthor()} to read this parameter + */ + const XPATH_EXPRESSION_ITEM_AUTHOR = ''; + + /** + * XPath expression for extracting an item timestamp from the item context + * This expression should match a node or node's attribute containing the + * article timestamp or date (parsable by PHP's strtotime function). It + * should start with a dot followed by two forward slashes, referring to + * any descendant nodes of the article item node. Attributes can be + * selected by prepending an @ char before the attributes name. + * + * Use {@see XPathAbstract::getExpressionItemTimestamp()} to read this parameter + */ + const XPATH_EXPRESSION_ITEM_TIMESTAMP = ''; + + /** + * XPath expression for extracting item enclosures (media content like + * images or movies) from the item context + * This expression should match a node's attribute containing an article + * image URL (usually the src attribute of an <img> tag or a style + * attribute). It should start with a dot followed by two forward slashes, + * referring to any descendant nodes of the article item node. Attributes + * can be selected by prepending an @ char before the attributes name. + * + * Use {@see XPathAbstract::getExpressionItemEnclosures()} to read this parameter + */ + const XPATH_EXPRESSION_ITEM_ENCLOSURES = ''; + + /** + * XPath expression for extracting an item category from the item context + * This expression should match a node or node's attribute contained + * within each article item node containing the article category. This + * could be inside <div> or <span> tags or sometimes be hidden + * in a data attribute. It should start with a dot followed by two + * forward slashes, referring to any descendant nodes of the article + * item node. Attributes can be selected by prepending an @ char + * before the attributes name. + * + * Use {@see XPathAbstract::getExpressionItemCategories()} to read this parameter + */ + const XPATH_EXPRESSION_ITEM_CATEGORIES = ''; + + /** + * Fix encoding + * Set this to true for fixing feed encoding by invoking PHP's utf8_decode + * function on all extracted texts. Try this in case you see "broken" or + * "weird" characters in your feed where you'd normally expect umlauts + * or any other non-ascii characters. + * + * Use {@see XPathAbstract::getSettingFixEncoding()} to read this parameter + */ + const SETTING_FIX_ENCODING = false; + + /** + * Internal storage for resulting feed name, automatically detected + * @var string + */ + private $feedName; + + /** + * Internal storage for resulting feed name, automatically detected + * @var string + */ + private $feedUri; + + /** + * Internal storage for resulting feed favicon, automatically detected + * @var string + */ + private $feedIcon; + + public function getName() + { + return $this->feedName ?: parent::getName(); + } + + public function getURI() + { + return $this->feedUri ?: parent::getURI(); + } + + public function getIcon() + { + return $this->feedIcon ?: parent::getIcon(); + } + + /** + * Source Web page URL (should provide either HTML or XML content) + * @return string + */ + protected function getSourceUrl() + { + return static::FEED_SOURCE_URL; + } + + /** + * XPath expression for extracting the feed title from the source page + * @return string + */ + protected function getExpressionTitle() + { + return static::XPATH_EXPRESSION_FEED_TITLE; + } + + /** + * XPath expression for extracting the feed favicon from the source page + * @return string + */ + protected function getExpressionIcon() + { + return static::XPATH_EXPRESSION_FEED_ICON; + } + + /** + * XPath expression for extracting the feed items from the source page + * @return string + */ + protected function getExpressionItem() + { + return static::XPATH_EXPRESSION_ITEM; + } + + /** + * XPath expression for extracting an item title from the item context + * @return string + */ + protected function getExpressionItemTitle() + { + return static::XPATH_EXPRESSION_ITEM_TITLE; + } + + /** + * XPath expression for extracting an item's content from the item context + * @return string + */ + protected function getExpressionItemContent() + { + return static::XPATH_EXPRESSION_ITEM_CONTENT; + } + + /** + * XPath expression for extracting an item link from the item context + * @return string + */ + protected function getExpressionItemUri() + { + return static::XPATH_EXPRESSION_ITEM_URI; + } + + /** + * XPath expression for extracting an item author from the item context + * @return string + */ + protected function getExpressionItemAuthor() + { + return static::XPATH_EXPRESSION_ITEM_AUTHOR; + } + + /** + * XPath expression for extracting an item timestamp from the item context + * @return string + */ + protected function getExpressionItemTimestamp() + { + return static::XPATH_EXPRESSION_ITEM_TIMESTAMP; + } + + /** + * XPath expression for extracting item enclosures (media content like + * images or movies) from the item context + * @return string + */ + protected function getExpressionItemEnclosures() + { + return static::XPATH_EXPRESSION_ITEM_ENCLOSURES; + } + + /** + * XPath expression for extracting an item category from the item context + * @return string + */ + protected function getExpressionItemCategories() + { + return static::XPATH_EXPRESSION_ITEM_CATEGORIES; + } + + /** + * Fix encoding + * @return string + */ + protected function getSettingFixEncoding() + { + return static::SETTING_FIX_ENCODING; + } + + /** + * Internal helper method for quickly accessing all the user defined constants + * in derived classes + * + * @param $name + * @return bool|string + */ + private function getParam($name) + { + switch ($name) { + case 'url': + return $this->getSourceUrl(); + case 'feed_title': + return $this->getExpressionTitle(); + case 'feed_icon': + return $this->getExpressionIcon(); + case 'item': + return $this->getExpressionItem(); + case 'title': + return $this->getExpressionItemTitle(); + case 'content': + return $this->getExpressionItemContent(); + case 'uri': + return $this->getExpressionItemUri(); + case 'author': + return $this->getExpressionItemAuthor(); + case 'timestamp': + return $this->getExpressionItemTimestamp(); + case 'enclosures': + return $this->getExpressionItemEnclosures(); + case 'categories': + return $this->getExpressionItemCategories(); + case 'fix_encoding': + return $this->getSettingFixEncoding(); + } + } + + /** + * Should provide the source website HTML content + * can be easily overwritten for example if special headers or auth infos are required + * @return string + */ + protected function provideWebsiteContent() + { + return getContents($this->feedUri); + } + + /** + * Should provide the feeds title + * + * @param DOMXPath $xpath + * @return string + */ + protected function provideFeedTitle(DOMXPath $xpath) + { + $title = $xpath->query($this->getParam('feed_title')); + if (count($title) === 1) { + return $this->getItemValueOrNodeValue($title); + } + } + + /** + * Should provide the URL of the feed's favicon + * + * @param DOMXPath $xpath + * @return string + */ + protected function provideFeedIcon(DOMXPath $xpath) + { + $icon = $xpath->query($this->getParam('feed_icon')); + if (count($icon) === 1) { + return $this->cleanMediaUrl($this->getItemValueOrNodeValue($icon)); + } + } + + /** + * Should provide the feed's items. + * + * @param DOMXPath $xpath + * @return DOMNodeList + */ + protected function provideFeedItems(DOMXPath $xpath) + { + return @$xpath->query($this->getParam('item')); + } + + public function collectData() + { + $this->feedUri = $this->getParam('url'); + + $webPageHtml = new DOMDocument(); + libxml_use_internal_errors(true); + $webPageHtml->loadHTML($this->provideWebsiteContent()); + libxml_clear_errors(); + libxml_use_internal_errors(false); + + $xpath = new DOMXPath($webPageHtml); + + $this->feedName = $this->provideFeedTitle($xpath); + $this->feedIcon = $this->provideFeedIcon($xpath); + + $entries = $this->provideFeedItems($xpath); + if ($entries === false) { + return; + } + + foreach ($entries as $entry) { + $item = new \FeedItem(); + foreach (['title', 'content', 'uri', 'author', 'timestamp', 'enclosures', 'categories'] as $param) { + $expression = $this->getParam($param); + if ('' === $expression) { + continue; + } + + //can be a string or DOMNodeList, depending on the expression result + $typedResult = @$xpath->evaluate($expression, $entry); + if ( + $typedResult === false || ($typedResult instanceof DOMNodeList && count($typedResult) === 0) + || (is_string($typedResult) && strlen(trim($typedResult)) === 0) + ) { + continue; + } + + $item->__set($param, $this->formatParamValue($param, $this->getItemValueOrNodeValue($typedResult))); + } + + $itemId = $this->generateItemId($item); + if (null !== $itemId) { + $item->setUid($itemId); + } + + $this->items[] = $item; + } + } + + /** + * @param $param + * @param $value + * @return string|array + */ + protected function formatParamValue($param, $value) + { + $value = $this->fixEncoding($value); + switch ($param) { + case 'title': + return $this->formatItemTitle($value); + case 'content': + return $this->formatItemContent($value); + case 'uri': + return $this->formatItemUri($value); + case 'author': + return $this->formatItemAuthor($value); + case 'timestamp': + return $this->formatItemTimestamp($value); + case 'enclosures': + return $this->formatItemEnclosures($value); + case 'categories': + return $this->formatItemCategories($value); + } + return $value; + } + + /** + * Formats the title of a feed item. Takes extracted raw title and returns it formatted + * as string. + * Can be easily overwritten for in case the value needs to be transformed into something + * else. + * @param string $value + * @return string + */ + protected function formatItemTitle($value) + { + return $value; + } + + /** + * Formats the timestamp of a feed item. Takes extracted raw timestamp and returns unix + * timestamp as integer. + * Can be easily overwritten for example if a special format has to be expected on the + * source website. + * @param string $value + * @return string + */ + protected function formatItemContent($value) + { + return $value; + } + + /** + * Formats the URI of a feed item. Takes extracted raw URI and returns it formatted + * as string. + * Can be easily overwritten for in case the value needs to be transformed into something + * else. + * @param string $value + * @return string + */ + protected function formatItemUri($value) + { + if (strlen($value) === 0) { + return ''; + } + if (strpos($value, 'http://') === 0 || strpos($value, 'https://') === 0) { + return $value; + } + + return urljoin($this->feedUri, $value); + } + + /** + * Formats the author of a feed item. Takes extracted raw author and returns it formatted + * as string. + * Can be easily overwritten for in case the value needs to be transformed into something + * else. + * @param string $value + * @return string + */ + protected function formatItemAuthor($value) + { + return $value; + } + + /** + * Formats the timestamp of a feed item. Takes extracted raw timestamp and returns unix + * timestamp as integer. + * Can be easily overwritten for example if a special format has to be expected on the + * source website. + * @param string $value + * @return false|int + */ + protected function formatItemTimestamp($value) + { + return strtotime($value); + } + + /** + * Formats the enclosures of a feed item. Takes extracted raw enclosures and returns them + * formatted as array. + * Can be easily overwritten for in case the values need to be transformed into something + * else. + * @param string $value + * @return array + */ + protected function formatItemEnclosures($value) + { + return [$this->cleanMediaUrl($value)]; + } + + /** + * Formats the categories of a feed item. Takes extracted raw categories and returns them + * formatted as array. + * Can be easily overwritten for in case the values need to be transformed into something + * else. + * @param string $value + * @return array + */ + protected function formatItemCategories($value) + { + return [$value]; + } + + /** + * @param $mediaUrl + * @return string|void + */ + protected function cleanMediaUrl($mediaUrl) + { + $pattern = '~(?:http(?:s)?:)?[\/a-zA-Z0-9\-=_,\.\%]+\.(?:jpg|gif|png|jpeg|ico|mp3|webp){1}~i'; + $result = preg_match($pattern, $mediaUrl, $matches); + if (1 !== $result) { + return; + } + return urljoin($this->feedUri, $matches[0]); + } + + /** + * @param $typedResult + * @return string + */ + protected function getItemValueOrNodeValue($typedResult) + { + if ($typedResult instanceof DOMNodeList) { + $item = $typedResult->item(0); + if ($item instanceof DOMElement) { + return trim($item->nodeValue); + } elseif ($item instanceof DOMAttr) { + return trim($item->value); + } elseif ($item instanceof DOMText) { + return trim($item->wholeText); + } + } elseif (is_string($typedResult) && strlen($typedResult) > 0) { + return trim($typedResult); + } + returnServerError('Unknown type of XPath expression result.'); + } + + /** + * Fixes feed encoding by invoking PHP's utf8_decode function on extracted texts. + * Useful in case of "broken" or "weird" characters in the feed where you'd normally + * expect umlauts. + * + * @param $input + * @return string + */ + protected function fixEncoding($input) + { + return $this->getParam('fix_encoding') ? utf8_decode($input) : $input; + } + + /** + * Allows overriding default mechanism determining items Uid's + * + * @param FeedItem $item + * @return string|null + */ + protected function generateItemId(\FeedItem $item) + { + return null; //auto generation + } } diff --git a/lib/contents.php b/lib/contents.php index cc80248b..a01d81e1 100644 --- a/lib/contents.php +++ b/lib/contents.php @@ -1,48 +1,50 @@ <?php -final class HttpException extends \Exception {} +final class HttpException extends \Exception +{ +} // todo: move this somewhere useful, possibly into a function const RSSBRIDGE_HTTP_STATUS_CODES = [ - '100' => 'Continue', - '101' => 'Switching Protocols', - '200' => 'OK', - '201' => 'Created', - '202' => 'Accepted', - '203' => 'Non-Authoritative Information', - '204' => 'No Content', - '205' => 'Reset Content', - '206' => 'Partial Content', - '300' => 'Multiple Choices', - '302' => 'Found', - '303' => 'See Other', - '304' => 'Not Modified', - '305' => 'Use Proxy', - '400' => 'Bad Request', - '401' => 'Unauthorized', - '402' => 'Payment Required', - '403' => 'Forbidden', - '404' => 'Not Found', - '405' => 'Method Not Allowed', - '406' => 'Not Acceptable', - '407' => 'Proxy Authentication Required', - '408' => 'Request Timeout', - '409' => 'Conflict', - '410' => 'Gone', - '411' => 'Length Required', - '412' => 'Precondition Failed', - '413' => 'Request Entity Too Large', - '414' => 'Request-URI Too Long', - '415' => 'Unsupported Media Type', - '416' => 'Requested Range Not Satisfiable', - '417' => 'Expectation Failed', - '429' => 'Too Many Requests', - '500' => 'Internal Server Error', - '501' => 'Not Implemented', - '502' => 'Bad Gateway', - '503' => 'Service Unavailable', - '504' => 'Gateway Timeout', - '505' => 'HTTP Version Not Supported' + '100' => 'Continue', + '101' => 'Switching Protocols', + '200' => 'OK', + '201' => 'Created', + '202' => 'Accepted', + '203' => 'Non-Authoritative Information', + '204' => 'No Content', + '205' => 'Reset Content', + '206' => 'Partial Content', + '300' => 'Multiple Choices', + '302' => 'Found', + '303' => 'See Other', + '304' => 'Not Modified', + '305' => 'Use Proxy', + '400' => 'Bad Request', + '401' => 'Unauthorized', + '402' => 'Payment Required', + '403' => 'Forbidden', + '404' => 'Not Found', + '405' => 'Method Not Allowed', + '406' => 'Not Acceptable', + '407' => 'Proxy Authentication Required', + '408' => 'Request Timeout', + '409' => 'Conflict', + '410' => 'Gone', + '411' => 'Length Required', + '412' => 'Precondition Failed', + '413' => 'Request Entity Too Large', + '414' => 'Request-URI Too Long', + '415' => 'Unsupported Media Type', + '416' => 'Requested Range Not Satisfiable', + '417' => 'Expectation Failed', + '429' => 'Too Many Requests', + '500' => 'Internal Server Error', + '501' => 'Not Implemented', + '502' => 'Bad Gateway', + '503' => 'Service Unavailable', + '504' => 'Gateway Timeout', + '505' => 'HTTP Version Not Supported' ]; /** @@ -61,70 +63,70 @@ const RSSBRIDGE_HTTP_STATUS_CODES = [ * @return string|array */ function getContents( - string $url, - array $httpHeaders = [], - array $curlOptions = [], - bool $returnFull = false + string $url, + array $httpHeaders = [], + array $curlOptions = [], + bool $returnFull = false ) { - $cacheFactory = new CacheFactory(); + $cacheFactory = new CacheFactory(); - $cache = $cacheFactory->create(Configuration::getConfig('cache', 'type')); - $cache->setScope('server'); - $cache->purgeCache(86400); // 24 hours (forced) - $cache->setKey([$url]); + $cache = $cacheFactory->create(Configuration::getConfig('cache', 'type')); + $cache->setScope('server'); + $cache->purgeCache(86400); // 24 hours (forced) + $cache->setKey([$url]); - $config = [ - 'headers' => $httpHeaders, - 'curl_options' => $curlOptions, - ]; - if (defined('PROXY_URL') && !defined('NOPROXY')) { - $config['proxy'] = PROXY_URL; - } - if(!Debug::isEnabled() && $cache->getTime()) { - $config['if_not_modified_since'] = $cache->getTime(); - } + $config = [ + 'headers' => $httpHeaders, + 'curl_options' => $curlOptions, + ]; + if (defined('PROXY_URL') && !defined('NOPROXY')) { + $config['proxy'] = PROXY_URL; + } + if (!Debug::isEnabled() && $cache->getTime()) { + $config['if_not_modified_since'] = $cache->getTime(); + } - $result = _http_request($url, $config); - $response = [ - 'code' => $result['code'], - 'status_lines' => $result['status_lines'], - 'header' => $result['headers'], - 'content' => $result['body'], - ]; + $result = _http_request($url, $config); + $response = [ + 'code' => $result['code'], + 'status_lines' => $result['status_lines'], + 'header' => $result['headers'], + 'content' => $result['body'], + ]; - switch($result['code']) { - case 200: - case 201: - case 202: - if(isset($result['headers']['cache-control'])) { - $cachecontrol = $result['headers']['cache-control']; - $lastValue = array_pop($cachecontrol); - $directives = explode(',', $lastValue); - $directives = array_map('trim', $directives); - if(in_array('no-cache', $directives) || in_array('no-store', $directives)) { - // Don't cache as instructed by the server - break; - } - } - $cache->saveData($result['body']); - break; - case 304: // Not Modified - $response['content'] = $cache->loadData(); - break; - default: - throw new HttpException( - sprintf( - '%s %s', - $result['code'], - RSSBRIDGE_HTTP_STATUS_CODES[$result['code']] ?? '' - ), - $result['code'] - ); - } - if ($returnFull === true) { - return $response; - } - return $response['content']; + switch ($result['code']) { + case 200: + case 201: + case 202: + if (isset($result['headers']['cache-control'])) { + $cachecontrol = $result['headers']['cache-control']; + $lastValue = array_pop($cachecontrol); + $directives = explode(',', $lastValue); + $directives = array_map('trim', $directives); + if (in_array('no-cache', $directives) || in_array('no-store', $directives)) { + // Don't cache as instructed by the server + break; + } + } + $cache->saveData($result['body']); + break; + case 304: // Not Modified + $response['content'] = $cache->loadData(); + break; + default: + throw new HttpException( + sprintf( + '%s %s', + $result['code'], + RSSBRIDGE_HTTP_STATUS_CODES[$result['code']] ?? '' + ), + $result['code'] + ); + } + if ($returnFull === true) { + return $response; + } + return $response['content']; } /** @@ -136,85 +138,85 @@ function getContents( */ function _http_request(string $url, array $config = []): array { - $defaults = [ - 'useragent' => Configuration::getConfig('http', 'useragent'), - 'timeout' => Configuration::getConfig('http', 'timeout'), - 'headers' => [], - 'proxy' => null, - 'curl_options' => [], - 'if_not_modified_since' => null, - 'retries' => 3, - ]; - $config = array_merge($defaults, $config); + $defaults = [ + 'useragent' => Configuration::getConfig('http', 'useragent'), + 'timeout' => Configuration::getConfig('http', 'timeout'), + 'headers' => [], + 'proxy' => null, + 'curl_options' => [], + 'if_not_modified_since' => null, + 'retries' => 3, + ]; + $config = array_merge($defaults, $config); - $ch = curl_init($url); - curl_setopt($ch, CURLOPT_RETURNTRANSFER, true); - curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true); - curl_setopt($ch, CURLOPT_MAXREDIRS, 5); - curl_setopt($ch, CURLOPT_HEADER, false); - curl_setopt($ch, CURLOPT_HTTPHEADER, $config['headers']); - curl_setopt($ch, CURLOPT_USERAGENT, $config['useragent']); - curl_setopt($ch, CURLOPT_TIMEOUT, $config['timeout']); - curl_setopt($ch, CURLOPT_ENCODING, ''); - curl_setopt($ch, CURLOPT_PROTOCOLS, CURLPROTO_HTTP | CURLPROTO_HTTPS); - if($config['proxy']) { - curl_setopt($ch, CURLOPT_PROXY, $config['proxy']); - } - if (curl_setopt_array($ch, $config['curl_options']) === false) { - throw new \Exception('Tried to set an illegal curl option'); - } + $ch = curl_init($url); + curl_setopt($ch, CURLOPT_RETURNTRANSFER, true); + curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true); + curl_setopt($ch, CURLOPT_MAXREDIRS, 5); + curl_setopt($ch, CURLOPT_HEADER, false); + curl_setopt($ch, CURLOPT_HTTPHEADER, $config['headers']); + curl_setopt($ch, CURLOPT_USERAGENT, $config['useragent']); + curl_setopt($ch, CURLOPT_TIMEOUT, $config['timeout']); + curl_setopt($ch, CURLOPT_ENCODING, ''); + curl_setopt($ch, CURLOPT_PROTOCOLS, CURLPROTO_HTTP | CURLPROTO_HTTPS); + if ($config['proxy']) { + curl_setopt($ch, CURLOPT_PROXY, $config['proxy']); + } + if (curl_setopt_array($ch, $config['curl_options']) === false) { + throw new \Exception('Tried to set an illegal curl option'); + } - if ($config['if_not_modified_since']) { - curl_setopt($ch, CURLOPT_TIMEVALUE, $config['if_not_modified_since']); - curl_setopt($ch, CURLOPT_TIMECONDITION, CURL_TIMECOND_IFMODSINCE); - } + if ($config['if_not_modified_since']) { + curl_setopt($ch, CURLOPT_TIMEVALUE, $config['if_not_modified_since']); + curl_setopt($ch, CURLOPT_TIMECONDITION, CURL_TIMECOND_IFMODSINCE); + } - $responseStatusLines = []; - $responseHeaders = []; - curl_setopt($ch, CURLOPT_HEADERFUNCTION, function ($ch, $rawHeader) use (&$responseHeaders, &$responseStatusLines) { - $len = strlen($rawHeader); - if ($rawHeader === "\r\n") { - return $len; - } - if (preg_match('#^HTTP/(2|1.1|1.0)#', $rawHeader)) { - $responseStatusLines[] = $rawHeader; - return $len; - } - $header = explode(':', $rawHeader); - if (count($header) === 1) { - return $len; - } - $name = mb_strtolower(trim($header[0])); - $value = trim(implode(':', array_slice($header, 1))); - if (!isset($responseHeaders[$name])) { - $responseHeaders[$name] = []; - } - $responseHeaders[$name][] = $value; - return $len; - }); + $responseStatusLines = []; + $responseHeaders = []; + curl_setopt($ch, CURLOPT_HEADERFUNCTION, function ($ch, $rawHeader) use (&$responseHeaders, &$responseStatusLines) { + $len = strlen($rawHeader); + if ($rawHeader === "\r\n") { + return $len; + } + if (preg_match('#^HTTP/(2|1.1|1.0)#', $rawHeader)) { + $responseStatusLines[] = $rawHeader; + return $len; + } + $header = explode(':', $rawHeader); + if (count($header) === 1) { + return $len; + } + $name = mb_strtolower(trim($header[0])); + $value = trim(implode(':', array_slice($header, 1))); + if (!isset($responseHeaders[$name])) { + $responseHeaders[$name] = []; + } + $responseHeaders[$name][] = $value; + return $len; + }); - $attempts = 0; - while(true) { - $attempts++; - $data = curl_exec($ch); - if ($data !== false) { - // The network call was successful, so break out of the loop - break; - } - if ($attempts > $config['retries']) { - // Finally give up - throw new HttpException(sprintf('%s (%s)', curl_error($ch), curl_errno($ch))); - } - } + $attempts = 0; + while (true) { + $attempts++; + $data = curl_exec($ch); + if ($data !== false) { + // The network call was successful, so break out of the loop + break; + } + if ($attempts > $config['retries']) { + // Finally give up + throw new HttpException(sprintf('%s (%s)', curl_error($ch), curl_errno($ch))); + } + } - $statusCode = curl_getinfo($ch, CURLINFO_HTTP_CODE); - curl_close($ch); - return [ - 'code' => $statusCode, - 'status_lines' => $responseStatusLines, - 'headers' => $responseHeaders, - 'body' => $data, - ]; + $statusCode = curl_getinfo($ch, CURLINFO_HTTP_CODE); + curl_close($ch); + return [ + 'code' => $statusCode, + 'status_lines' => $responseStatusLines, + 'headers' => $responseHeaders, + 'body' => $data, + ]; } /** @@ -243,28 +245,31 @@ function _http_request(string $url, array $config = []): array * tags when returning plaintext. * @return false|simple_html_dom Contents as simplehtmldom object. */ -function getSimpleHTMLDOM($url, - $header = array(), - $opts = array(), - $lowercase = true, - $forceTagsClosed = true, - $target_charset = DEFAULT_TARGET_CHARSET, - $stripRN = true, - $defaultBRText = DEFAULT_BR_TEXT, - $defaultSpanText = DEFAULT_SPAN_TEXT){ - - $content = getContents( - $url, - $header ?? [], - $opts ?? [] - ); - return str_get_html($content, - $lowercase, - $forceTagsClosed, - $target_charset, - $stripRN, - $defaultBRText, - $defaultSpanText); +function getSimpleHTMLDOM( + $url, + $header = [], + $opts = [], + $lowercase = true, + $forceTagsClosed = true, + $target_charset = DEFAULT_TARGET_CHARSET, + $stripRN = true, + $defaultBRText = DEFAULT_BR_TEXT, + $defaultSpanText = DEFAULT_SPAN_TEXT +) { + $content = getContents( + $url, + $header ?? [], + $opts ?? [] + ); + return str_get_html( + $content, + $lowercase, + $forceTagsClosed, + $target_charset, + $stripRN, + $defaultBRText, + $defaultSpanText + ); } /** @@ -297,53 +302,58 @@ function getSimpleHTMLDOM($url, * tags when returning plaintext. * @return false|simple_html_dom Contents as simplehtmldom object. */ -function getSimpleHTMLDOMCached($url, - $duration = 86400, - $header = array(), - $opts = array(), - $lowercase = true, - $forceTagsClosed = true, - $target_charset = DEFAULT_TARGET_CHARSET, - $stripRN = true, - $defaultBRText = DEFAULT_BR_TEXT, - $defaultSpanText = DEFAULT_SPAN_TEXT){ - - Debug::log('Caching url ' . $url . ', duration ' . $duration); +function getSimpleHTMLDOMCached( + $url, + $duration = 86400, + $header = [], + $opts = [], + $lowercase = true, + $forceTagsClosed = true, + $target_charset = DEFAULT_TARGET_CHARSET, + $stripRN = true, + $defaultBRText = DEFAULT_BR_TEXT, + $defaultSpanText = DEFAULT_SPAN_TEXT +) { + Debug::log('Caching url ' . $url . ', duration ' . $duration); - // Initialize cache - $cacheFac = new CacheFactory(); + // Initialize cache + $cacheFac = new CacheFactory(); - $cache = $cacheFac->create(Configuration::getConfig('cache', 'type')); - $cache->setScope('pages'); - $cache->purgeCache(86400); // 24 hours (forced) + $cache = $cacheFac->create(Configuration::getConfig('cache', 'type')); + $cache->setScope('pages'); + $cache->purgeCache(86400); // 24 hours (forced) - $params = array($url); - $cache->setKey($params); + $params = [$url]; + $cache->setKey($params); - // Determine if cached file is within duration - $time = $cache->getTime(); - if($time !== false - && (time() - $duration < $time) - && !Debug::isEnabled()) { // Contents within duration - $content = $cache->loadData(); - } else { // Content not within duration - $content = getContents( - $url, - $header ?? [], - $opts ?? [] - ); - if($content !== false) { - $cache->saveData($content); - } - } + // Determine if cached file is within duration + $time = $cache->getTime(); + if ( + $time !== false + && (time() - $duration < $time) + && !Debug::isEnabled() + ) { // Contents within duration + $content = $cache->loadData(); + } else { // Content not within duration + $content = getContents( + $url, + $header ?? [], + $opts ?? [] + ); + if ($content !== false) { + $cache->saveData($content); + } + } - return str_get_html($content, - $lowercase, - $forceTagsClosed, - $target_charset, - $stripRN, - $defaultBRText, - $defaultSpanText); + return str_get_html( + $content, + $lowercase, + $forceTagsClosed, + $target_charset, + $stripRN, + $defaultBRText, + $defaultSpanText + ); } /** @@ -360,49 +370,53 @@ function getSimpleHTMLDOMCached($url, * @param string $url The URL or path to the file. * @return string The MIME type of the file. */ -function getMimeType($url) { - static $mime = null; +function getMimeType($url) +{ + static $mime = null; - if (is_null($mime)) { - // Default values, overriden by /etc/mime.types when present - $mime = array( - 'jpg' => 'image/jpeg', - 'gif' => 'image/gif', - 'png' => 'image/png', - 'image' => 'image/*', - 'mp3' => 'audio/mpeg', - ); - // '@' is used to mute open_basedir warning, see issue #818 - if (@is_readable('/etc/mime.types')) { - $file = fopen('/etc/mime.types', 'r'); - while(($line = fgets($file)) !== false) { - $line = trim(preg_replace('/#.*/', '', $line)); - if(!$line) - continue; - $parts = preg_split('/\s+/', $line); - if(count($parts) == 1) - continue; - $type = array_shift($parts); - foreach($parts as $part) - $mime[$part] = $type; - } - fclose($file); - } - } + if (is_null($mime)) { + // Default values, overriden by /etc/mime.types when present + $mime = [ + 'jpg' => 'image/jpeg', + 'gif' => 'image/gif', + 'png' => 'image/png', + 'image' => 'image/*', + 'mp3' => 'audio/mpeg', + ]; + // '@' is used to mute open_basedir warning, see issue #818 + if (@is_readable('/etc/mime.types')) { + $file = fopen('/etc/mime.types', 'r'); + while (($line = fgets($file)) !== false) { + $line = trim(preg_replace('/#.*/', '', $line)); + if (!$line) { + continue; + } + $parts = preg_split('/\s+/', $line); + if (count($parts) == 1) { + continue; + } + $type = array_shift($parts); + foreach ($parts as $part) { + $mime[$part] = $type; + } + } + fclose($file); + } + } - if (strpos($url, '?') !== false) { - $url_temp = substr($url, 0, strpos($url, '?')); - if (strpos($url, '#') !== false) { - $anchor = substr($url, strpos($url, '#')); - $url_temp .= $anchor; - } - $url = $url_temp; - } + if (strpos($url, '?') !== false) { + $url_temp = substr($url, 0, strpos($url, '?')); + if (strpos($url, '#') !== false) { + $anchor = substr($url, strpos($url, '#')); + $url_temp .= $anchor; + } + $url = $url_temp; + } - $ext = strtolower(pathinfo($url, PATHINFO_EXTENSION)); - if (!empty($mime[$ext])) { - return $mime[$ext]; - } + $ext = strtolower(pathinfo($url, PATHINFO_EXTENSION)); + if (!empty($mime[$ext])) { + return $mime[$ext]; + } - return 'application/octet-stream'; + return 'application/octet-stream'; } diff --git a/lib/error.php b/lib/error.php index c2f26247..f9950cea 100644 --- a/lib/error.php +++ b/lib/error.php @@ -1,4 +1,5 @@ <?php + /** * This file is part of RSS-Bridge, a PHP project capable of generating RSS and * Atom feeds for websites that don't have one. @@ -6,9 +7,9 @@ * For the full license information, please view the UNLICENSE file distributed * with this source code. * - * @package Core - * @license http://unlicense.org/ UNLICENSE - * @link https://github.com/rss-bridge/rss-bridge + * @package Core + * @license http://unlicense.org/ UNLICENSE + * @link https://github.com/rss-bridge/rss-bridge */ /** @@ -20,8 +21,9 @@ * @link https://en.wikipedia.org/wiki/List_of_HTTP_status_codes List of HTTP * status codes */ -function returnError($message, $code){ - throw new \Exception($message, $code); +function returnError($message, $code) +{ + throw new \Exception($message, $code); } /** @@ -29,8 +31,9 @@ function returnError($message, $code){ * * @param string $message The error message */ -function returnClientError($message){ - returnError($message, 400); +function returnClientError($message) +{ + returnError($message, 400); } /** @@ -38,8 +41,9 @@ function returnClientError($message){ * * @param string $message The error message */ -function returnServerError($message){ - returnError($message, 500); +function returnServerError($message) +{ + returnError($message, 500); } /** @@ -50,27 +54,28 @@ function returnServerError($message){ * * @return int The total number the same error has appeared */ -function logBridgeError($bridgeName, $code) { - $cacheFac = new CacheFactory(); +function logBridgeError($bridgeName, $code) +{ + $cacheFac = new CacheFactory(); - $cache = $cacheFac->create(Configuration::getConfig('cache', 'type')); - $cache->setScope('error_reporting'); - $cache->setkey($bridgeName . '_' . $code); - $cache->purgeCache(86400); // 24 hours + $cache = $cacheFac->create(Configuration::getConfig('cache', 'type')); + $cache->setScope('error_reporting'); + $cache->setkey($bridgeName . '_' . $code); + $cache->purgeCache(86400); // 24 hours - if($report = $cache->loadData()) { - $report = json_decode($report, true); - $report['time'] = time(); - $report['count']++; - } else { - $report = array( - 'error' => $code, - 'time' => time(), - 'count' => 1, - ); - } + if ($report = $cache->loadData()) { + $report = json_decode($report, true); + $report['time'] = time(); + $report['count']++; + } else { + $report = [ + 'error' => $code, + 'time' => time(), + 'count' => 1, + ]; + } - $cache->saveData(json_encode($report)); + $cache->saveData(json_encode($report)); - return $report['count']; + return $report['count']; } diff --git a/lib/html.php b/lib/html.php index 69bd1424..e82d5e0e 100644 --- a/lib/html.php +++ b/lib/html.php @@ -1,4 +1,5 @@ <?php + /** * This file is part of RSS-Bridge, a PHP project capable of generating RSS and * Atom feeds for websites that don't have one. @@ -6,9 +7,9 @@ * For the full license information, please view the UNLICENSE file distributed * with this source code. * - * @package Core - * @license http://unlicense.org/ UNLICENSE - * @link https://github.com/rss-bridge/rss-bridge + * @package Core + * @license http://unlicense.org/ UNLICENSE + * @link https://github.com/rss-bridge/rss-bridge */ /** @@ -25,27 +26,29 @@ * @todo Check if this implementation is still necessary, because simplehtmldom * already removes some of the tags (search for `remove_noise` in simple_html_dom.php). */ -function sanitize($html, - $tags_to_remove = array('script', 'iframe', 'input', 'form'), - $attributes_to_keep = array('title', 'href', 'src'), - $text_to_keep = array()){ - - $htmlContent = str_get_html($html); - - foreach($htmlContent->find('*') as $element) { - if(in_array($element->tag, $text_to_keep)) { - $element->outertext = $element->plaintext; - } elseif(in_array($element->tag, $tags_to_remove)) { - $element->outertext = ''; - } else { - foreach($element->getAllAttributes() as $attributeName => $attribute) { - if(!in_array($attributeName, $attributes_to_keep)) - $element->removeAttribute($attributeName); - } - } - } - - return $htmlContent; +function sanitize( + $html, + $tags_to_remove = ['script', 'iframe', 'input', 'form'], + $attributes_to_keep = ['title', 'href', 'src'], + $text_to_keep = [] +) { + $htmlContent = str_get_html($html); + + foreach ($htmlContent->find('*') as $element) { + if (in_array($element->tag, $text_to_keep)) { + $element->outertext = $element->plaintext; + } elseif (in_array($element->tag, $tags_to_remove)) { + $element->outertext = ''; + } else { + foreach ($element->getAllAttributes() as $attributeName => $attribute) { + if (!in_array($attributeName, $attributes_to_keep)) { + $element->removeAttribute($attributeName); + } + } + } + } + + return $htmlContent; } /** @@ -74,23 +77,18 @@ function sanitize($html, * @param string $htmlContent The HTML content * @return string The HTML content with all ocurrences replaced */ -function backgroundToImg($htmlContent) { - - $regex = '/background-image[ ]{0,}:[ ]{0,}url\([\'"]{0,}(.*?)[\'"]{0,}\)/'; - $htmlContent = str_get_html($htmlContent); - - foreach($htmlContent->find('*') as $element) { - - if(preg_match($regex, $element->style, $matches) > 0) { - - $element->outertext = '<img style="display:block;" src="' . $matches[1] . '" />'; - - } - - } - - return $htmlContent; - +function backgroundToImg($htmlContent) +{ + $regex = '/background-image[ ]{0,}:[ ]{0,}url\([\'"]{0,}(.*?)[\'"]{0,}\)/'; + $htmlContent = str_get_html($htmlContent); + + foreach ($htmlContent->find('*') as $element) { + if (preg_match($regex, $element->style, $matches) > 0) { + $element->outertext = '<img style="display:block;" src="' . $matches[1] . '" />'; + } + } + + return $htmlContent; } /** @@ -104,26 +102,27 @@ function backgroundToImg($htmlContent) { * @param string $server Fully qualified URL to the page containing relative links * @return object Content with fixed URLs. */ -function defaultLinkTo($content, $server){ - $string_convert = false; - if (is_string($content)) { - $string_convert = true; - $content = str_get_html($content); - } - - foreach($content->find('img') as $image) { - $image->src = urljoin($server, $image->src); - } - - foreach($content->find('a') as $anchor) { - $anchor->href = urljoin($server, $anchor->href); - } - - if ($string_convert) { - $content = $content->outertext; - } - - return $content; +function defaultLinkTo($content, $server) +{ + $string_convert = false; + if (is_string($content)) { + $string_convert = true; + $content = str_get_html($content); + } + + foreach ($content->find('img') as $image) { + $image->src = urljoin($server, $image->src); + } + + foreach ($content->find('a') as $anchor) { + $anchor->href = urljoin($server, $anchor->href); + } + + if ($string_convert) { + $content = $content->outertext; + } + + return $content; } /** @@ -135,12 +134,13 @@ function defaultLinkTo($content, $server){ * @return string|bool Extracted string, e.g. `John Doe`, or false if the * delimiters were not found. */ -function extractFromDelimiters($string, $start, $end) { - if (strpos($string, $start) !== false) { - $section_retrieved = substr($string, strpos($string, $start) + strlen($start)); - $section_retrieved = substr($section_retrieved, 0, strpos($section_retrieved, $end)); - return $section_retrieved; - } return false; +function extractFromDelimiters($string, $start, $end) +{ + if (strpos($string, $start) !== false) { + $section_retrieved = substr($string, strpos($string, $start) + strlen($start)); + $section_retrieved = substr($section_retrieved, 0, strpos($section_retrieved, $end)); + return $section_retrieved; + } return false; } /** @@ -151,13 +151,14 @@ function extractFromDelimiters($string, $start, $end) { * @param string $end End delimiter, e.g. `</script>` * @return string Cleaned string, e.g. `foobar` */ -function stripWithDelimiters($string, $start, $end) { - while(strpos($string, $start) !== false) { - $section_to_remove = substr($string, strpos($string, $start)); - $section_to_remove = substr($section_to_remove, 0, strpos($section_to_remove, $end) + strlen($end)); - $string = str_replace($section_to_remove, '', $string); - } - return $string; +function stripWithDelimiters($string, $start, $end) +{ + while (strpos($string, $start) !== false) { + $section_to_remove = substr($string, strpos($string, $start)); + $section_to_remove = substr($section_to_remove, 0, strpos($section_to_remove, $end) + strlen($end)); + $string = str_replace($section_to_remove, '', $string); + } + return $string; } /** @@ -170,28 +171,29 @@ function stripWithDelimiters($string, $start, $end) { * * @todo This function needs more documentation to make it maintainable. */ -function stripRecursiveHTMLSection($string, $tag_name, $tag_start){ - $open_tag = '<' . $tag_name; - $close_tag = '</' . $tag_name . '>'; - $close_tag_length = strlen($close_tag); - if(strpos($tag_start, $open_tag) === 0) { - while(strpos($string, $tag_start) !== false) { - $max_recursion = 100; - $section_to_remove = null; - $section_start = strpos($string, $tag_start); - $search_offset = $section_start; - do { - $max_recursion--; - $section_end = strpos($string, $close_tag, $search_offset); - $search_offset = $section_end + $close_tag_length; - $section_to_remove = substr($string, $section_start, $section_end - $section_start + $close_tag_length); - $open_tag_count = substr_count($section_to_remove, $open_tag); - $close_tag_count = substr_count($section_to_remove, $close_tag); - } while ($open_tag_count > $close_tag_count && $max_recursion > 0); - $string = str_replace($section_to_remove, '', $string); - } - } - return $string; +function stripRecursiveHTMLSection($string, $tag_name, $tag_start) +{ + $open_tag = '<' . $tag_name; + $close_tag = '</' . $tag_name . '>'; + $close_tag_length = strlen($close_tag); + if (strpos($tag_start, $open_tag) === 0) { + while (strpos($string, $tag_start) !== false) { + $max_recursion = 100; + $section_to_remove = null; + $section_start = strpos($string, $tag_start); + $search_offset = $section_start; + do { + $max_recursion--; + $section_end = strpos($string, $close_tag, $search_offset); + $search_offset = $section_end + $close_tag_length; + $section_to_remove = substr($string, $section_start, $section_end - $section_start + $close_tag_length); + $open_tag_count = substr_count($section_to_remove, $open_tag); + $close_tag_count = substr_count($section_to_remove, $close_tag); + } while ($open_tag_count > $close_tag_count && $max_recursion > 0); + $string = str_replace($section_to_remove, '', $string); + } + } + return $string; } /** @@ -202,8 +204,8 @@ function stripRecursiveHTMLSection($string, $tag_name, $tag_start){ * @param string $string Input string in Markdown format * @return string output string in HTML format */ -function markdownToHtml($string) { - - $Parsedown = new Parsedown(); - return $Parsedown->text($string); +function markdownToHtml($string) +{ + $Parsedown = new Parsedown(); + return $Parsedown->text($string); } diff --git a/lib/php8backports.php b/lib/php8backports.php index 3b2bb966..30dfdbd9 100644 --- a/lib/php8backports.php +++ b/lib/php8backports.php @@ -1,4 +1,5 @@ <?php + /** * This file is part of RSS-Bridge, a PHP project capable of generating RSS and * Atom feeds for websites that don't have one. @@ -6,9 +7,9 @@ * For the full license information, please view the UNLICENSE file distributed * with this source code. * - * @package Core - * @license http://unlicense.org/ UNLICENSE - * @link https://github.com/rss-bridge/rss-bridge + * @package Core + * @license http://unlicense.org/ UNLICENSE + * @link https://github.com/rss-bridge/rss-bridge */ // based on https://github.com/laravel/framework/blob/8.x/src/Illuminate/Support/Str.php @@ -34,19 +35,22 @@ // THE SOFTWARE. if (!function_exists('str_starts_with')) { - function str_starts_with($haystack, $needle) { - return (string)$needle !== '' && strncmp($haystack, $needle, strlen($needle)) === 0; - } + function str_starts_with($haystack, $needle) + { + return (string)$needle !== '' && strncmp($haystack, $needle, strlen($needle)) === 0; + } } if (!function_exists('str_ends_with')) { - function str_ends_with($haystack, $needle) { - return $needle !== '' && substr($haystack, -strlen($needle)) === (string)$needle; - } + function str_ends_with($haystack, $needle) + { + return $needle !== '' && substr($haystack, -strlen($needle)) === (string)$needle; + } } if (!function_exists('str_contains')) { - function str_contains($haystack, $needle) { - return $needle !== '' && mb_strpos($haystack, $needle) !== false; - } + function str_contains($haystack, $needle) + { + return $needle !== '' && mb_strpos($haystack, $needle) !== false; + } } diff --git a/lib/rssbridge.php b/lib/rssbridge.php index cd156fe8..560c0fe4 100644 --- a/lib/rssbridge.php +++ b/lib/rssbridge.php @@ -1,4 +1,5 @@ <?php + /** * This file is part of RSS-Bridge, a PHP project capable of generating RSS and * Atom feeds for websites that don't have one. @@ -6,9 +7,9 @@ * For the full license information, please view the UNLICENSE file distributed * with this source code. * - * @package Core - * @license http://unlicense.org/ UNLICENSE - * @link https://github.com/rss-bridge/rss-bridge + * @package Core + * @license http://unlicense.org/ UNLICENSE + * @link https://github.com/rss-bridge/rss-bridge */ /** Path to the root folder of RSS-Bridge (where index.php is located) */ @@ -64,19 +65,19 @@ require_once PATH_LIB_VENDOR . 'php-urljoin/src/urljoin.php'; require_once PATH_LIB_VENDOR . 'simplehtmldom/simple_html_dom.php'; spl_autoload_register(function ($className) { - $folders = [ - __DIR__ . '/../actions/', - __DIR__ . '/../bridges/', - __DIR__ . '/../caches/', - __DIR__ . '/../formats/', - __DIR__ . '/../lib/', - ]; - foreach ($folders as $folder) { - $file = $folder . $className . '.php'; - if (is_file($file)) { - require $file; - } - } + $folders = [ + __DIR__ . '/../actions/', + __DIR__ . '/../bridges/', + __DIR__ . '/../caches/', + __DIR__ . '/../formats/', + __DIR__ . '/../lib/', + ]; + foreach ($folders as $folder) { + $file = $folder . $className . '.php'; + if (is_file($file)) { + require $file; + } + } }); Configuration::verifyInstallation(); |