diff options
Diffstat (limited to 'lib')
-rw-r--r-- | lib/Authentication.php | 49 | ||||
-rw-r--r-- | lib/BridgeAbstract.php | 115 | ||||
-rw-r--r-- | lib/BridgeCard.php | 91 | ||||
-rw-r--r-- | lib/BridgeInterface.php | 53 | ||||
-rw-r--r-- | lib/BridgeList.php | 61 | ||||
-rw-r--r-- | lib/CacheInterface.php | 44 | ||||
-rw-r--r-- | lib/Configuration.php | 112 | ||||
-rw-r--r-- | lib/Exceptions.php | 78 | ||||
-rw-r--r-- | lib/FeedExpander.php | 176 | ||||
-rw-r--r-- | lib/FormatAbstract.php | 115 | ||||
-rw-r--r-- | lib/FormatInterface.php | 73 | ||||
-rw-r--r-- | lib/ParameterValidator.php | 72 | ||||
-rw-r--r-- | lib/contents.php | 104 | ||||
-rw-r--r-- | lib/error.php | 31 | ||||
-rw-r--r-- | lib/html.php | 117 |
15 files changed, 1206 insertions, 85 deletions
diff --git a/lib/Authentication.php b/lib/Authentication.php index da247630..f9683940 100644 --- a/lib/Authentication.php +++ b/lib/Authentication.php @@ -1,6 +1,48 @@ <?php +/** + * This file is part of RSS-Bridge, a PHP project capable of generating RSS and + * Atom feeds for websites that don't have one. + * + * For the full license information, please view the UNLICENSE file distributed + * with this source code. + * + * @package Core + * @license http://unlicense.org/ UNLICENSE + * @link https://github.com/rss-bridge/rss-bridge + */ + +/** + * Authentication module for RSS-Bridge. + * + * This class implements an authentication module for RSS-Bridge, utilizing the + * HTTP authentication capabilities of PHP. + * + * _Notice_: Authentication via HTTP does not prevent users from accessing files + * on your server. If your server supports `.htaccess`, you should globally restrict + * access to files instead. + * + * @link https://php.net/manual/en/features.http-auth.php HTTP authentication with PHP + * @link https://httpd.apache.org/docs/2.4/howto/htaccess.html Apache HTTP Server + * Tutorial: .htaccess files + * + * @todo This class should respond with an error when creating an object from it. + * See {@see Bridge}, {@see Cache} or {@see Format} for reference. + * @todo Configuration parameters should be stored internally instead of accessing + * the configuration class directly. + * @todo Add functions to detect if a user is authenticated or not. This can be + * utilized for limiting access to authorized users only. + */ class Authentication { + /** + * Requests the user for login credentials if necessary. + * + * Responds to an authentication request or returns the `WWW-Authenticate` + * header if authentication is enabled in the configuration of RSS-Bridge + * (`[authentication] enable = true`). + * + * @return void + */ public static function showPromptIfNeeded() { if(Configuration::getConfig('authentication', 'enable') === true) { @@ -13,6 +55,13 @@ class Authentication { } + /** + * Verifies if an authentication request was received and compares the + * provided username and password to the configuration of RSS-Bridge + * (`[authentication] username` and `[authentication] password`). + * + * @return bool True if authentication succeeded. + */ public static function verifyPrompt() { if(isset($_SERVER['PHP_AUTH_USER']) && isset($_SERVER['PHP_AUTH_PW'])) { diff --git a/lib/BridgeAbstract.php b/lib/BridgeAbstract.php index aa654111..b02280e4 100644 --- a/lib/BridgeAbstract.php +++ b/lib/BridgeAbstract.php @@ -1,32 +1,112 @@ <?php +/** + * This file is part of RSS-Bridge, a PHP project capable of generating RSS and + * Atom feeds for websites that don't have one. + * + * For the full license information, please view the UNLICENSE file distributed + * with this source code. + * + * @package Core + * @license http://unlicense.org/ UNLICENSE + * @link https://github.com/rss-bridge/rss-bridge + */ +/** + * An abstract class for bridges + * + * This class implements {@see BridgeInterface} with most common functions in + * order to reduce code duplication. Bridges should inherit from this class + * instead of implementing the interface manually. + * + * @todo Move constants to the interface (this is supported by PHP) + * @todo Change visibility of constants to protected + * @todo Return `self` on more functions to allow chaining + * @todo Add specification for PARAMETERS () + * @todo Add specification for $items + */ abstract class BridgeAbstract implements BridgeInterface { + /** + * Name of the bridge + * + * Use {@see BridgeAbstract::getName()} to read this parameter + */ const NAME = 'Unnamed bridge'; + + /** + * URI to the site the bridge is intended to be used for. + * + * Use {@see BridgeAbstract::getURI()} to read this parameter + */ const URI = ''; + + /** + * A brief description of what the bridge can do + * + * Use {@see BridgeAbstract::getDescription()} to read this parameter + */ const DESCRIPTION = 'No description provided'; + + /** + * The name of the maintainer. Multiple maintainers can be separated by comma + * + * Use {@see BridgeAbstract::getMaintainer()} to read this parameter + */ const MAINTAINER = 'No maintainer'; + + /** + * The default cache timeout for the bridge + * + * Use {@see BridgeAbstract::getCacheTimeout()} to read this parameter + */ const CACHE_TIMEOUT = 3600; + + /** + * Parameters for the bridge + * + * Use {@see BridgeAbstract::getParameters()} to read this parameter + */ const PARAMETERS = array(); + /** + * Holds the list of items collected by the bridge + * + * Items must be collected by {@see BridgeInterface::collectData()} + * + * Use {@see BridgeAbstract::getItems()} to access items. + * + * @var array + */ protected $items = array(); + + /** + * Holds the list of input parameters used by the bridge + * + * Do not access this parameter directly! + * Use {@see BridgeAbstract::setInputs()} and {@see BridgeAbstract::getInput()} instead! + * + * @var array + */ protected $inputs = array(); - protected $queriedContext = ''; /** - * Return items stored in the bridge - * @return mixed - */ + * Holds the name of the queried context + * + * @var string + */ + protected $queriedContext = ''; + + /** {@inheritdoc} */ public function getItems(){ return $this->items; } /** - * Sets the input values for a given context. Existing values are - * overwritten. + * Sets the input values for a given context. * * @param array $inputs Associative array of inputs - * @param string $context The context name + * @param string $queriedContext The context name + * @return void */ protected function setInputs(array $inputs, $queriedContext){ // Import and assign all inputs to their context @@ -103,9 +183,15 @@ abstract class BridgeAbstract implements BridgeInterface { } /** - * Defined datas with parameters depending choose bridge - * @param array array with expected bridge paramters - */ + * Set inputs for the bridge + * + * Returns errors and aborts execution if the provided input parameters are + * invalid. + * + * @param array List of input parameters. Each element in this list must + * relate to an item in {@see BridgeAbstract::PARAMETERS} + * @return void + */ public function setDatas(array $inputs){ if(empty(static::PARAMETERS)) { @@ -148,7 +234,7 @@ abstract class BridgeAbstract implements BridgeInterface { * Returns the value for the provided input * * @param string $input The input name - * @return mixed Returns the input value or null if the input is not defined + * @return mixed|null The input value or null if the input is not defined */ protected function getInput($input){ if(!isset($this->inputs[$this->queriedContext][$input]['value'])) { @@ -157,30 +243,37 @@ abstract class BridgeAbstract implements BridgeInterface { return $this->inputs[$this->queriedContext][$input]['value']; } + /** {@inheritdoc} */ public function getDescription(){ return static::DESCRIPTION; } + /** {@inheritdoc} */ public function getMaintainer(){ return static::MAINTAINER; } + /** {@inheritdoc} */ public function getName(){ return static::NAME; } + /** {@inheritdoc} */ public function getIcon(){ return ''; } + /** {@inheritdoc} */ public function getParameters(){ return static::PARAMETERS; } + /** {@inheritdoc} */ public function getURI(){ return static::URI; } + /** {@inheritdoc} */ public function getCacheTimeout(){ return static::CACHE_TIMEOUT; } diff --git a/lib/BridgeCard.php b/lib/BridgeCard.php index 28e74fe6..cb7e607b 100644 --- a/lib/BridgeCard.php +++ b/lib/BridgeCard.php @@ -1,6 +1,33 @@ <?php +/** + * This file is part of RSS-Bridge, a PHP project capable of generating RSS and + * Atom feeds for websites that don't have one. + * + * For the full license information, please view the UNLICENSE file distributed + * with this source code. + * + * @package Core + * @license http://unlicense.org/ UNLICENSE + * @link https://github.com/rss-bridge/rss-bridge + */ + +/** + * A generator class for a single bridge card on the home page of RSS-Bridge. + * + * This class generates the HTML content for a single bridge card for the home + * page of RSS-Bridge. + * + * @todo Return error if a caller creates an object of this class. + * @todo Use self:: instead of BridgeCard:: in this class + */ final class BridgeCard { + /** + * Build a HTML document string of buttons for each of the provided formats + * + * @param array $formats A list of format names + * @return string The document string + */ private static function buildFormatButtons($formats) { $buttons = ''; @@ -16,6 +43,13 @@ final class BridgeCard { return $buttons; } + /** + * Get the form header for a bridge card + * + * @param string $bridgeName The bridge name + * @param bool $isHttps If disabled, adds a warning to the form + * @return string The form header + */ private static function getFormHeader($bridgeName, $isHttps = false) { $form = <<<EOD <form method="GET" action="?"> @@ -31,6 +65,17 @@ This bridge is not fetching its content through a secure connection</div>'; return $form; } + /** + * Get the form body for a bridge + * + * @param string $bridgeName The bridge name + * @param array $formats A list of supported formats + * @param bool $isActive Indicates if a bridge is enabled or not + * @param bool $isHttps Indicates if a bridge uses HTTPS or not + * @param string $parameterName Sets the bridge context for the current form + * @param array $parameters The bridge parameters + * @return string The form body + */ private static function getForm($bridgeName, $formats, $isActive = false, @@ -88,6 +133,12 @@ This bridge is not fetching its content through a secure connection</div>'; return $form . '</form>' . PHP_EOL; } + /** + * Get input field attributes + * + * @param array $entry The current entry + * @return string The input field attributes + */ private static function getInputAttributes($entry) { $retVal = ''; @@ -103,6 +154,14 @@ This bridge is not fetching its content through a secure connection</div>'; return $retVal; } + /** + * Get text input + * + * @param array $entry The current entry + * @param string $id The field ID + * @param string $name The field name + * @return string The text input field + */ private static function getTextInput($entry, $id, $name) { return '<input ' . BridgeCard::getInputAttributes($entry) @@ -118,6 +177,14 @@ This bridge is not fetching its content through a secure connection</div>'; . PHP_EOL; } + /** + * Get number input + * + * @param array $entry The current entry + * @param string $id The field ID + * @param string $name The field name + * @return string The number input field + */ private static function getNumberInput($entry, $id, $name) { return '<input ' . BridgeCard::getInputAttributes($entry) @@ -133,6 +200,14 @@ This bridge is not fetching its content through a secure connection</div>'; . PHP_EOL; } + /** + * Get list input + * + * @param array $entry The current entry + * @param string $id The field ID + * @param string $name The field name + * @return string The list input field + */ private static function getListInput($entry, $id, $name) { $list = '<select ' . BridgeCard::getInputAttributes($entry) @@ -185,6 +260,14 @@ This bridge is not fetching its content through a secure connection</div>'; return $list; } + /** + * Get checkbox input + * + * @param array $entry The current entry + * @param string $id The field ID + * @param string $name The field name + * @return string The checkbox input field + */ private static function getCheckboxInput($entry, $id, $name) { return '<input ' . BridgeCard::getInputAttributes($entry) @@ -198,6 +281,14 @@ This bridge is not fetching its content through a secure connection</div>'; . PHP_EOL; } + /** + * Gets a single bridge card + * + * @param string $bridgeName The bridge name + * @param array $formats A list of formats + * @param bool $isActive Indicates if the bridge is active or not + * @return string The bridge card + */ static function displayBridgeCard($bridgeName, $formats, $isActive = true){ $bridge = Bridge::create($bridgeName); diff --git a/lib/BridgeInterface.php b/lib/BridgeInterface.php index f2ff11d5..bc48fe04 100644 --- a/lib/BridgeInterface.php +++ b/lib/BridgeInterface.php @@ -1,4 +1,57 @@ <?php +/** + * This file is part of RSS-Bridge, a PHP project capable of generating RSS and + * Atom feeds for websites that don't have one. + * + * For the full license information, please view the UNLICENSE file distributed + * with this source code. + * + * @package Core + * @license http://unlicense.org/ UNLICENSE + * @link https://github.com/rss-bridge/rss-bridge + */ + +/** + * The bridge interface + * + * A bridge is a class that is responsible for collecting and transforming data + * from one hosting provider into an internal representation of feed data, that + * can later be transformed into different feed formats (see {@see FormatInterface}). + * + * For this purpose, all bridges need to perform three common operations: + * + * 1. Collect data from a remote site. + * 2. Extract the required contents. + * 3. Add the contents to the internal data structure. + * + * Bridges can optionally specify parameters to customize bridge behavior based + * on user input. For example, a user could specify how many items to return in + * the feed and where to get them. + * + * In order to present a bridge on the home page, and for the purpose of bridge + * specific behaviour, additional information must be provided by the bridge: + * + * * **Name** + * The name of the bridge that can be displayed to users. + * + * * **Description** + * A brief description for the bridge that can be displayed to users. + * + * * **URI** + * A link to the hosting provider. + * + * * **Maintainer** + * The GitHub username of the bridge maintainer + * + * * **Parameters** + * A list of parameters for customization + * + * * **Icon** + * A link to the favicon of the hosting provider + * + * * **Cache timeout** + * The default cache timeout for the bridge. + */ interface BridgeInterface { /** diff --git a/lib/BridgeList.php b/lib/BridgeList.php index 1ac7192e..5ff13cf4 100644 --- a/lib/BridgeList.php +++ b/lib/BridgeList.php @@ -1,6 +1,31 @@ <?php +/** + * This file is part of RSS-Bridge, a PHP project capable of generating RSS and + * Atom feeds for websites that don't have one. + * + * For the full license information, please view the UNLICENSE file distributed + * with this source code. + * + * @package Core + * @license http://unlicense.org/ UNLICENSE + * @link https://github.com/rss-bridge/rss-bridge + */ + +/** + * A generator class for the home page of RSS-Bridge. + * + * This class generates the HTML content for displaying all bridges on the home + * page of RSS-Bridge. + * + * @todo Return error if a caller creates an object of this class. + */ final class BridgeList { + /** + * Get the document head + * + * @return string The document head + */ private static function getHead() { return <<<EOD <head> @@ -22,6 +47,15 @@ final class BridgeList { EOD; } + /** + * Get the document body for all bridge cards + * + * @param bool $showInactive Inactive bridges are visible on the home page if + * enabled. + * @param int $totalBridges (ref) Returns the total number of bridges. + * @param int $totalActiveBridges (ref) Returns the number of active bridges. + * @return string The document body for all bridge cards. + */ private static function getBridges($showInactive, &$totalBridges, &$totalActiveBridges) { $body = ''; @@ -54,6 +88,11 @@ EOD; return $body; } + /** + * Get the document header + * + * @return string The document header + */ private static function getHeader() { $warning = ''; @@ -80,6 +119,11 @@ EOD; EOD; } + /** + * Get the searchbar + * + * @return string The searchbar + */ private static function getSearchbar() { $query = filter_input(INPUT_GET, 'q'); @@ -93,6 +137,16 @@ EOD; EOD; } + /** + * Get the document footer + * + * @param int $totalBridges The total number of bridges, shown in the footer + * @param int $totalActiveBridges The total number of active bridges, shown + * in the footer. + * @param bool $showInactive Sets the 'Show active'/'Show inactive' text in + * the footer. + * @return string The document footer + */ private static function getFooter($totalBridges, $totalActiveBridges, $showInactive) { $version = Configuration::getVersion(); @@ -131,6 +185,13 @@ EOD; EOD; } + /** + * Create the entire home page + * + * @param bool $showInactive Inactive bridges are displayed on the home page, + * if enabled. + * @return string The home page + */ static function create($showInactive = true) { $totalBridges = 0; diff --git a/lib/CacheInterface.php b/lib/CacheInterface.php index 5753c0eb..bd2d5613 100644 --- a/lib/CacheInterface.php +++ b/lib/CacheInterface.php @@ -1,7 +1,51 @@ <?php +/** + * This file is part of RSS-Bridge, a PHP project capable of generating RSS and + * Atom feeds for websites that don't have one. + * + * For the full license information, please view the UNLICENSE file distributed + * with this source code. + * + * @package Core + * @license http://unlicense.org/ UNLICENSE + * @link https://github.com/rss-bridge/rss-bridge + */ + +/** + * The cache interface + * + * @todo Add missing function to the interface + * @todo Explain parameters and return values in more detail + * @todo Return self more often (to allow call chaining) + */ interface CacheInterface { + + /** + * Loads data from cache + * + * @return mixed The cache data + */ public function loadData(); + + /** + * Stores data to the cache + * + * @param mixed $datas The data to store + * @return self The cache object + */ public function saveData($datas); + + /** + * Returns the timestamp for the curent cache file + * + * @return int Timestamp + */ public function getTime(); + + /** + * Removes any data that is older than the specified duration from cache + * + * @param int $duration The cache duration in seconds + */ public function purgeCache($duration); } diff --git a/lib/Configuration.php b/lib/Configuration.php index e141ce93..2f17922e 100644 --- a/lib/Configuration.php +++ b/lib/Configuration.php @@ -1,10 +1,77 @@ <?php +/** + * This file is part of RSS-Bridge, a PHP project capable of generating RSS and + * Atom feeds for websites that don't have one. + * + * For the full license information, please view the UNLICENSE file distributed + * with this source code. + * + * @package Core + * @license http://unlicense.org/ UNLICENSE + * @link https://github.com/rss-bridge/rss-bridge + */ + +/** + * Configuration module for RSS-Bridge. + * + * This class implements a configuration module for RSS-Bridge. + * + * @todo Throw an exception if the caller tries to create objects of this class. + * @todo Make this class final. + */ class Configuration { + /** + * Holds the current release version of RSS-Bridge. + * + * Do not access this property directly! + * Use {@see Configuration::getVersion()} instead. + * + * @var string + * + * @todo Replace this property by a constant. + */ public static $VERSION = 'dev.2018-11-10'; + /** + * Holds the configuration data. + * + * Do not access this property directly! + * Use {@see Configuration::getConfig()} instead. + * + * @var array|null + * + * @todo Change the scope of this property to protected or private + */ public static $config = null; + /** + * Verifies the current installation of RSS-Bridge and PHP. + * + * Returns an error message and aborts execution if the installation does + * not satisfy the requirements of RSS-Bridge. + * + * **Requirements** + * - PHP 5.6.0 or higher + * - `openssl` extension + * - `libxml` extension + * - `mbstring` extension + * - `simplexml` extension + * - `curl` extension + * - `json` extension + * - The cache folder specified by {@see PATH_CACHE} requires write permission + * - The whitelist file specified by {@see WHITELIST} requires write permission + * + * @link http://php.net/supported-versions.php PHP Supported Versions + * @link http://php.net/manual/en/book.openssl.php OpenSSL + * @link http://php.net/manual/en/book.libxml.php libxml + * @link http://php.net/manual/en/book.mbstring.php Multibyte String (mbstring) + * @link http://php.net/manual/en/book.simplexml.php SimpleXML + * @link http://php.net/manual/en/book.curl.php Client URL Library (curl) + * @link http://php.net/manual/en/book.json.php JavaScript Object Notation (json) + * + * @return void + */ public static function verifyInstallation() { // Check PHP version @@ -40,6 +107,33 @@ class Configuration { } + /** + * Loads the configuration from disk and checks if the parameters are valid. + * + * Returns an error message and aborts execution if the configuration is invalid. + * + * The RSS-Bridge configuration is split into two files: + * - `config.default.ini.php`: The default configuration file that ships with + * every release of RSS-Bridge (do not modify this file!). + * - `config.ini.php`: The local configuration file that can be modified by + * server administrators. + * + * RSS-Bridge will first load `config.default.ini.php` into memory and then + * replace parameters with the contents of `config.ini.php`. That way new + * parameters are automatically initialized with default values and custom + * configurations can be reduced to the minimum set of parametes necessary + * (only the ones that changed). + * + * The configuration files must be placed in the root folder of RSS-Bridge + * (next to `index.php`). + * + * _Notice_: The configuration is stored in {@see Configuration::$config}. + * + * @return void + * + * @todo Use {@see PATH_ROOT} to locate configuration files. + * @todo Add documentation for constants defined by this function. + */ public static function loadConfiguration() { if(!file_exists('config.default.ini.php')) @@ -97,6 +191,15 @@ class Configuration { } + /** + * Returns the value of a parameter identified by category and key. + * + * @param string $category The section name (category). + * @param string $key The property name (key). + * @return mixed|null The parameter value. + * + * @todo Rename $category to $section for clarity. + */ public static function getConfig($category, $key) { if(array_key_exists($category, self::$config) && array_key_exists($key, self::$config[$category])) { @@ -107,6 +210,15 @@ class Configuration { } + /** + * Returns the current version string of RSS-Bridge. + * + * This function returns the contents of {@see Configuration::$VERSION} for + * regular installations and the git branch name and commit id for instances + * running in a git environment. + * + * @return string The version string. + */ public static function getVersion() { $headFile = '.git/HEAD'; diff --git a/lib/Exceptions.php b/lib/Exceptions.php index 32b33f2b..e9b4d7b8 100644 --- a/lib/Exceptions.php +++ b/lib/Exceptions.php @@ -1,17 +1,35 @@ <?php +/** + * This file is part of RSS-Bridge, a PHP project capable of generating RSS and + * Atom feeds for websites that don't have one. + * + * For the full license information, please view the UNLICENSE file distributed + * with this source code. + * + * @package Core + * @license http://unlicense.org/ UNLICENSE + * @link https://github.com/rss-bridge/rss-bridge + */ + +/** + * Implements a RSS-Bridge specific exception class + * + * @todo This class serves no purpose, remove it! + */ class HttpException extends \Exception{} /** * Returns an URL that automatically populates a new issue on GitHub based * on the information provided * - * @param $title string Sets the title of the issue - * @param $body string Sets the body of the issue (GitHub markdown applies) - * @param $labels mixed (optional) Specifies labels to add to the issue - * @param $maintainer string (optional) Specifies the maintainer for the issue. + * @param string $title string Sets the title of the issue + * @param string $body string Sets the body of the issue (GitHub markdown applies) + * @param string $labels mixed (optional) Specifies labels to add to the issue + * @param string $maintainer string (optional) Specifies the maintainer for the issue. * The maintainer only applies if part of the development team! - * @return string Returns a qualified URL to a new issue with populated conent. - * Returns null if title or body is null or empty + * @return string|null A qualified URL to a new issue with populated conent or null. + * + * @todo This function belongs inside a class */ function buildGitHubIssueQuery($title, $body, $labels = null, $maintainer = null){ if(!isset($title) || !isset($body) || empty($title) || empty($body)) { @@ -49,10 +67,11 @@ function buildGitHubIssueQuery($title, $body, $labels = null, $maintainer = null /** * Returns the exception message as HTML string * - * @param $e Exception The exception to show - * @param $bridge object The bridge object - * @return string Returns the exception as HTML string. Returns null if the - * provided parameter are invalid + * @param object $e Exception The exception to show + * @param object $bridge object The bridge object + * @return string|null Returns the exception as HTML string or null. + * + * @todo This function belongs inside a class */ function buildBridgeException($e, $bridge){ if(( !($e instanceof \Exception) && !($e instanceof \Error)) || !($bridge instanceof \BridgeInterface)) { @@ -87,10 +106,11 @@ EOD; /** * Returns the exception message as HTML string * - * @param $e Exception The exception to show - * @param $bridge object The bridge object - * @return string Returns the exception as HTML string. Returns null if the - * provided parameter are invalid + * @param object $e Exception The exception to show + * @param object $bridge object The bridge object + * @return string|null Returns the exception as HTML string or null. + * + * @todo This function belongs inside a class */ function buildTransformException($e, $bridge){ if(( !($e instanceof \Exception) && !($e instanceof \Error)) || !($bridge instanceof \BridgeInterface)) { @@ -114,6 +134,15 @@ function buildTransformException($e, $bridge){ return buildPage($title, $header, $section); } +/** + * Builds a new HTML header with data from a exception an a bridge + * + * @param object $e The exception object + * @param object $bridge The bridge object + * @return string The HTML header + * + * @todo This function belongs inside a class + */ function buildHeader($e, $bridge){ return <<<EOD <header> @@ -124,6 +153,17 @@ function buildHeader($e, $bridge){ EOD; } +/** + * Builds a new HTML section + * + * @param object $e The exception object + * @param object $bridge The bridge object + * @param string $message The message to display + * @param string $link The link to include in the anchor + * @return string The HTML section + * + * @todo This function belongs inside a class + */ function buildSection($e, $bridge, $message, $link){ return <<<EOD <section> @@ -142,6 +182,16 @@ function buildSection($e, $bridge, $message, $link){ EOD; } +/** + * Builds a new HTML page + * + * @param string $title The HTML title + * @param string $header The HTML header + * @param string $section The HTML section + * @return string The HTML page + * + * @todo This function belongs inside a class + */ function buildPage($title, $header, $section){ return <<<EOD <!DOCTYPE html> diff --git a/lib/FeedExpander.php b/lib/FeedExpander.php index 893c3ba1..04f835aa 100644 --- a/lib/FeedExpander.php +++ b/lib/FeedExpander.php @@ -1,11 +1,74 @@ <?php - +/** + * This file is part of RSS-Bridge, a PHP project capable of generating RSS and + * Atom feeds for websites that don't have one. + * + * For the full license information, please view the UNLICENSE file distributed + * with this source code. + * + * @package Core + * @license http://unlicense.org/ UNLICENSE + * @link https://github.com/rss-bridge/rss-bridge + */ + +/** + * An abstract class for bridges that need to transform existing RSS or Atom + * feeds. + * + * This class extends {@see BridgeAbstract} with functions to extract contents + * from existing RSS or Atom feeds. Bridges that need to transform existing feeds + * should inherit from this class instead of {@see BridgeAbstract}. + * + * Bridges that extend this class don't need to concern themselves with getting + * contents from existing feeds, but can focus on adding additional contents + * (i.e. by downloading additional data), filtering or just transforming a feed + * into another format. + * + * @link http://www.rssboard.org/rss-0-9-1 RSS 0.91 Specification + * @link http://web.resource.org/rss/1.0/spec RDF Site Summary (RSS) 1.0 + * @link http://www.rssboard.org/rss-specification RSS 2.0 Specification + * @link https://tools.ietf.org/html/rfc4287 The Atom Syndication Format + * + * @todo Return `self` on more functions to allow chaining + * @todo The parsing functions should all be private. This class is complicated + * enough without having to consider children overriding functions. + */ abstract class FeedExpander extends BridgeAbstract { + /** + * Holds the title of the current feed + * + * @var string + * @todo Rename this parameter to $title for clarity + */ private $name; + + /** + * Holds the URI of the feed + * + * @var string + */ private $uri; + + /** + * Holds the feed type during internal operations. + * + * @var string + * @todo Define possible values as constant instead of static strings + */ private $feedType; + /** + * Collects data from an existing feed. + * + * Children should call this function in {@see BridgeInterface::collectData()} + * to extract a feed. + * + * @param string $url URL to the feed. + * @param int $maxItems Maximum number of items to collect from the feed + * (`-1`: no limit). + * @return void + */ public function collectExpandableDatas($url, $maxItems = -1){ if(empty($url)) { returnServerError('There is no $url for this RSS expander'); @@ -44,6 +107,19 @@ abstract class FeedExpander extends BridgeAbstract { $this->{'collect_' . $this->feedType . '_data'}($rssContent, $maxItems); } + /** + * Collect data from a RSS 1.0 compatible feed + * + * @link http://web.resource.org/rss/1.0/spec RDF Site Summary (RSS) 1.0 + * + * @param string $rssContent The RSS content + * @param int $maxItems Maximum number of items to collect from the feed + * (`-1`: no limit). + * @return void + * + * @todo Instead of passing $maxItems to all functions, just add all items + * and remove excessive items later. + */ protected function collect_RSS_1_0_data($rssContent, $maxItems){ $this->load_RSS_2_0_feed_data($rssContent->channel[0]); foreach($rssContent->item as $item) { @@ -56,6 +132,19 @@ abstract class FeedExpander extends BridgeAbstract { } } + /** + * Collect data from a RSS 2.0 compatible feed + * + * @link http://www.rssboard.org/rss-specification RSS 2.0 Specification + * + * @param object $rssContent The RSS content + * @param int $maxItems Maximum number of items to collect from the feed + * (`-1`: no limit). + * @return void + * + * @todo Instead of passing $maxItems to all functions, just add all items + * and remove excessive items later. + */ protected function collect_RSS_2_0_data($rssContent, $maxItems){ $rssContent = $rssContent->channel[0]; Debug::log('RSS content is ===========\n' @@ -73,6 +162,19 @@ abstract class FeedExpander extends BridgeAbstract { } } + /** + * Collect data from a Atom 1.0 compatible feed + * + * @link https://tools.ietf.org/html/rfc4287 The Atom Syndication Format + * + * @param object $content The Atom content + * @param int $maxItems Maximum number of items to collect from the feed + * (`-1`: no limit). + * @return void + * + * @todo Instead of passing $maxItems to all functions, just add all items + * and remove excessive items later. + */ protected function collect_ATOM_1_0_data($content, $maxItems){ $this->load_ATOM_feed_data($content); foreach($content->entry as $item) { @@ -85,16 +187,35 @@ abstract class FeedExpander extends BridgeAbstract { } } + /** + * Convert RSS 2.0 time to timestamp + * + * @param object $item A feed item + * @return int The timestamp + */ protected function RSS_2_0_time_to_timestamp($item){ return DateTime::createFromFormat('D, d M Y H:i:s e', $item->pubDate)->getTimestamp(); } - // TODO set title, link, description, language, and so on + /** + * Load RSS 2.0 feed data into RSS-Bridge + * + * @param object $rssContent The RSS content + * @return void + * + * @todo set title, link, description, language, and so on + */ protected function load_RSS_2_0_feed_data($rssContent){ $this->name = trim((string)$rssContent->title); $this->uri = trim((string)$rssContent->link); } + /** + * Load Atom feed data into RSS-Bridge + * + * @param object $content The Atom content + * @return void + */ protected function load_ATOM_feed_data($content){ $this->name = (string)$content->title; @@ -114,6 +235,16 @@ abstract class FeedExpander extends BridgeAbstract { } } + /** + * Parse the contents of a single Atom feed item into a RSS-Bridge item for + * further transformation. + * + * @param object $feedItem A single feed item + * @return object The RSS-Bridge item + * + * @todo To reduce confusion, the RSS-Bridge item should maybe have a class + * of its own? + */ protected function parseATOMItem($feedItem){ // Some ATOM entries also contain RSS 2.0 fields $item = $this->parseRSS_2_0_Item($feedItem); @@ -139,6 +270,16 @@ abstract class FeedExpander extends BridgeAbstract { return $item; } + /** + * Parse the contents of a single RSS 0.91 feed item into a RSS-Bridge item + * for further transformation. + * + * @param object $feedItem A single feed item + * @return object The RSS-Bridge item + * + * @todo To reduce confusion, the RSS-Bridge item should maybe have a class + * of its own? + */ protected function parseRSS_0_9_1_Item($feedItem){ $item = array(); if(isset($feedItem->link)) $item['uri'] = (string)$feedItem->link; @@ -150,6 +291,16 @@ abstract class FeedExpander extends BridgeAbstract { return $item; } + /** + * Parse the contents of a single RSS 1.0 feed item into a RSS-Bridge item + * for further transformation. + * + * @param object $feedItem A single feed item + * @return object The RSS-Bridge item + * + * @todo To reduce confusion, the RSS-Bridge item should maybe have a class + * of its own? + */ protected function parseRSS_1_0_Item($feedItem){ // 1.0 adds optional elements around the 0.91 standard $item = $this->parseRSS_0_9_1_Item($feedItem); @@ -164,6 +315,16 @@ abstract class FeedExpander extends BridgeAbstract { return $item; } + /** + * Parse the contents of a single RSS 2.0 feed item into a RSS-Bridge item + * for further transformation. + * + * @param object $feedItem A single feed item + * @return object The RSS-Bridge item + * + * @todo To reduce confusion, the RSS-Bridge item should maybe have a class + * of its own? + */ protected function parseRSS_2_0_Item($feedItem){ // Primary data is compatible to 0.91 with some additional data $item = $this->parseRSS_0_9_1_Item($feedItem); @@ -211,9 +372,11 @@ abstract class FeedExpander extends BridgeAbstract { } /** - * Method should return, from a source RSS item given by lastRSS, one of our Items objects - * @param $item the input rss item - * @return a RSS-Bridge Item, with (hopefully) the whole content) + * Parse the contents of a single feed item, depending on the current feed + * type, into a RSS-Bridge item. + * + * @param object $item The current feed item + * @return object A RSS-Bridge item, with (hopefully) the whole content */ protected function parseItem($item){ switch($this->feedType) { @@ -230,14 +393,17 @@ abstract class FeedExpander extends BridgeAbstract { } } + /** {@inheritdoc} */ public function getURI(){ return !empty($this->uri) ? $this->uri : parent::getURI(); } + /** {@inheritdoc} */ public function getName(){ return !empty($this->name) ? $this->name : parent::getName(); } + /** {@inheritdoc} */ public function getIcon(){ return !empty($this->icon) ? $this->icon : parent::getIcon(); } diff --git a/lib/FormatAbstract.php b/lib/FormatAbstract.php index 8fb642c0..3315666d 100644 --- a/lib/FormatAbstract.php +++ b/lib/FormatAbstract.php @@ -1,41 +1,95 @@ <?php - +/** + * This file is part of RSS-Bridge, a PHP project capable of generating RSS and + * Atom feeds for websites that don't have one. + * + * For the full license information, please view the UNLICENSE file distributed + * with this source code. + * + * @package Core + * @license https://unlicense.org/ UNLICENSE + * @link https://github.com/rss-bridge/rss-bridge + */ + +/** + * An abstract class for format implementations + * + * This class implements {@see FormatInterface} + */ abstract class FormatAbstract implements FormatInterface { + + /** The default charset (UTF-8) */ const DEFAULT_CHARSET = 'UTF-8'; - protected - $contentType, - $charset, - $items, - $lastModified, - $extraInfos; + /** @var string $contentType The content type */ + protected $contentType; + + /** @var string $charset The charset */ + protected $charset; + /** @var array $items The items */ + protected $items; + + /** + * @var int $lastModified A timestamp to indicate the last modified time of + * the output data. + */ + protected $lastModified; + + /** @var array $extraInfos The extra infos */ + protected $extraInfos; + + /** + * {@inheritdoc} + * + * @param string $charset {@inheritdoc} + */ public function setCharset($charset){ $this->charset = $charset; return $this; } + /** {@inheritdoc} */ public function getCharset(){ $charset = $this->charset; return is_null($charset) ? static::DEFAULT_CHARSET : $charset; } + /** + * Set the content type + * + * @param string $contentType The content type + * @return self The format object + */ protected function setContentType($contentType){ $this->contentType = $contentType; return $this; } + /** + * Set the last modified time + * + * @param int $lastModified The last modified time + * @return void + */ public function setLastModified($lastModified){ $this->lastModified = $lastModified; } + /** + * Send header with the currently specified content type + * + * @return void + * @todo This should throw an error if no content type is set + */ protected function callContentType(){ header('Content-Type: ' . $this->contentType); } + /** {@inheritdoc} */ public function display(){ if ($this->lastModified) { header('Last-Modified: ' . gmdate('D, d M Y H:i:s ', $this->lastModified) . 'GMT'); @@ -45,12 +99,18 @@ abstract class FormatAbstract implements FormatInterface { return $this; } + /** + * {@inheritdoc} + * + * @param array $items {@inheritdoc} + */ public function setItems(array $items){ $this->items = array_map(array($this, 'array_trim'), $items); return $this; } + /** {@inheritdoc} */ public function getItems(){ if(!is_array($this->items)) throw new \LogicException('Feed the ' . get_class($this) . ' with "setItems" method before !'); @@ -59,10 +119,10 @@ abstract class FormatAbstract implements FormatInterface { } /** - * Define common informations can be required by formats and set default value for unknown values - * @param array $extraInfos array with know informations (there isn't merge !!!) - * @return this - */ + * {@inheritdoc} + * + * @param array $extraInfos {@inheritdoc} + */ public function setExtraInfos(array $extraInfos = array()){ foreach(array('name', 'uri', 'icon') as $infoName) { if(!isset($extraInfos[$infoName])) { @@ -75,10 +135,7 @@ abstract class FormatAbstract implements FormatInterface { return $this; } - /** - * Return extra infos - * @return array See "setExtraInfos" detail method to know what extra are disponibles - */ + /** {@inheritdoc} */ public function getExtraInfos(){ if(is_null($this->extraInfos)) { // No extra info ? $this->setExtraInfos(); // Define with default value @@ -88,12 +145,17 @@ abstract class FormatAbstract implements FormatInterface { } /** - * Sanitized html while leaving it functionnal. - * The aim is to keep html as-is (with clickable hyperlinks) - * while reducing annoying and potentially dangerous things. - * Yes, I know sanitizing HTML 100% is an impossible task. - * Maybe we'll switch to http://htmlpurifier.org/ - * or http://www.bioinformatics.org/phplabware/internal_utilities/htmLawed/index.php + * Sanitize HTML while leaving it functional. + * + * Keeps HTML as-is (with clickable hyperlinks) while reducing annoying and + * potentially dangerous things. + * + * @param string $html The HTML content + * @return string The sanitized HTML content + * + * @todo This belongs into `html.php` + * @todo Maybe switch to http://htmlpurifier.org/ + * @todo Maybe switch to http://www.bioinformatics.org/phplabware/internal_utilities/htmLawed/index.php */ protected function sanitizeHtml($html) { @@ -104,6 +166,17 @@ abstract class FormatAbstract implements FormatInterface { return $html; } + /** + * Trim each element of an array + * + * This function applies `trim()` to all elements in the array, if the element + * is a valid string. + * + * @param array $elements The array to trim + * @return array The trimmed array + * + * @todo This is a utility function that doesn't belong here, find a new home. + */ protected function array_trim($elements){ foreach($elements as $key => $value) { if(is_string($value)) diff --git a/lib/FormatInterface.php b/lib/FormatInterface.php index f99d214f..d59a4ef9 100644 --- a/lib/FormatInterface.php +++ b/lib/FormatInterface.php @@ -1,11 +1,84 @@ <?php +/** + * This file is part of RSS-Bridge, a PHP project capable of generating RSS and + * Atom feeds for websites that don't have one. + * + * For the full license information, please view the UNLICENSE file distributed + * with this source code. + * + * @package Core + * @license http://unlicense.org/ UNLICENSE + * @link https://github.com/rss-bridge/rss-bridge + */ + +/** + * The format interface + * + * @todo Add missing function to the interface + * @todo Explain parameters and return values in more detail + * @todo Return self more often (to allow call chaining) + */ interface FormatInterface { + + /** + * Generate a string representation of the current data + * + * @return string The string representation + */ public function stringify(); + + /** + * Display the current data to the user + * + * @return self The format object + */ public function display(); + + /** + * Set items + * + * @param array $bridges The items + * @return self The format object + * + * @todo Rename parameter `$bridges` to `$items` + */ public function setItems(array $bridges); + + /** + * Return items + * + * @throws \LogicException if the items are not set + * @return array The items + */ public function getItems(); + + /** + * Set extra information + * + * @param array $infos Extra information + * @return self The format object + */ public function setExtraInfos(array $infos); + + /** + * Return extra information + * + * @return array Extra information + */ public function getExtraInfos(); + + /** + * Set charset + * + * @param string $charset The charset + * @return self The format object + */ public function setCharset($charset); + + /** + * Return current charset + * + * @return string The charset + */ public function getCharset(); } diff --git a/lib/ParameterValidator.php b/lib/ParameterValidator.php index c278e4d1..91fe7c92 100644 --- a/lib/ParameterValidator.php +++ b/lib/ParameterValidator.php @@ -1,10 +1,35 @@ <?php /** - * Implements a validator for bridge parameters + * This file is part of RSS-Bridge, a PHP project capable of generating RSS and + * Atom feeds for websites that don't have one. + * + * For the full license information, please view the UNLICENSE file distributed + * with this source code. + * + * @package Core + * @license http://unlicense.org/ UNLICENSE + * @link https://github.com/rss-bridge/rss-bridge + */ + +/** + * Validator for bridge parameters */ class ParameterValidator { + + /** + * Holds the list of invalid parameters + * + * @var array + */ private $invalid = array(); + /** + * Add item to list of invalid parameters + * + * @param string $name The name of the parameter + * @param string $reason The reason for that parameter being invalid + * @return void + */ private function addInvalidParameter($name, $reason){ $this->invalid[] = array( 'name' => $name, @@ -13,13 +38,23 @@ class ParameterValidator { } /** - * Returns an array of invalid parameters, where each element is an - * array of 'name' and 'reason'. + * Return list of invalid parameters. + * + * Each element is an array of 'name' and 'reason'. + * + * @return array List of invalid parameters */ public function getInvalidParameters() { return $this->invalid; } + /** + * Validate value for a text input + * + * @param string $value The value of a text input + * @param string|null $pattern (optional) A regex pattern + * @return string|null The filtered value or null if the value is invalid + */ private function validateTextValue($value, $pattern = null){ if(!is_null($pattern)) { $filteredValue = filter_var($value, @@ -38,6 +73,12 @@ class ParameterValidator { return $filteredValue; } + /** + * Validate value for a number input + * + * @param int $value The value of a number input + * @return int|null The filtered value or null if the value is invalid + */ private function validateNumberValue($value){ $filteredValue = filter_var($value, FILTER_VALIDATE_INT); @@ -47,10 +88,23 @@ class ParameterValidator { return $filteredValue; } + /** + * Validate value for a checkbox + * + * @param bool $value The value of a checkbox + * @return bool The filtered value + */ private function validateCheckboxValue($value){ return filter_var($value, FILTER_VALIDATE_BOOLEAN, FILTER_NULL_ON_FAILURE); } + /** + * Validate value for a list + * + * @param string $value The value of a list + * @param array $expectedValues A list of expected values + * @return string|null The filtered value or null if the value is invalid + */ private function validateListValue($value, $expectedValues){ $filteredValue = filter_var($value); @@ -69,9 +123,11 @@ class ParameterValidator { } /** - * Checks if all required parameters are supplied by the user - * @param $data An array of parameters provided by the user - * @param $parameters An array of bridge parameters + * Check if all required parameters are satisfied + * + * @param array $data (ref) A list of input values + * @param array $parameters The bridge parameters + * @return bool True if all parameters are satisfied */ public function validateData(&$data, $parameters){ @@ -122,11 +178,11 @@ class ParameterValidator { } /** - * Returns the name of the context matching the provided inputs + * Get the name of the context matching the provided inputs * * @param array $data Associative array of user data * @param array $parameters Array of bridge parameters - * @return mixed Returns the context name or null if no match was found + * @return string|null Returns the context name or null if no match was found */ public function getQueriedContext($data, $parameters){ $queriedContexts = array(); diff --git a/lib/contents.php b/lib/contents.php index fbd93f1b..127638bc 100644 --- a/lib/contents.php +++ b/lib/contents.php @@ -1,4 +1,31 @@ <?php +/** + * This file is part of RSS-Bridge, a PHP project capable of generating RSS and + * Atom feeds for websites that don't have one. + * + * For the full license information, please view the UNLICENSE file distributed + * with this source code. + * + * @package Core + * @license http://unlicense.org/ UNLICENSE + * @link https://github.com/rss-bridge/rss-bridge + */ + +/** + * Gets contents from the Internet. + * + * @param string $url The URL. + * @param array $header (optional) A list of cURL header. + * For more information follow the links below. + * * https://php.net/manual/en/function.curl-setopt.php + * * https://curl.haxx.se/libcurl/c/CURLOPT_HTTPHEADER.html + * @param array $opts (optional) A list of cURL options as associative array in + * the format `$opts[$option] = $value;`, where `$option` is any `CURLOPT_XXX` + * option and `$value` the corresponding value. + * + * For more information see http://php.net/manual/en/function.curl-setopt.php + * @return string The contents. + */ function getContents($url, $header = array(), $opts = array()){ Debug::log('Reading contents from "' . $url . '"'); @@ -74,6 +101,32 @@ EOD return substr($data, $headerSize); } +/** + * Gets contents from the Internet as simplhtmldom object. + * + * @param string $url The URL. + * @param array $header (optional) A list of cURL header. + * For more information follow the links below. + * * https://php.net/manual/en/function.curl-setopt.php + * * https://curl.haxx.se/libcurl/c/CURLOPT_HTTPHEADER.html + * @param array $opts (optional) A list of cURL options as associative array in + * the format `$opts[$option] = $value;`, where `$option` is any `CURLOPT_XXX` + * option and `$value` the corresponding value. + * + * For more information see http://php.net/manual/en/function.curl-setopt.php + * @param bool $lowercase Force all selectors to lowercase. + * @param bool $forceTagsClosed Forcefully close tags in malformed HTML. + * + * _Remarks_: Forcefully closing tags is great for malformed HTML, but it can + * lead to parsing errors. + * @param string $target_charset Defines the target charset. + * @param bool $stripRN Replace all occurrences of `"\r"` and `"\n"` by `" "`. + * @param string $defaultBRText Specifies the replacement text for `<br>` tags + * when returning plaintext. + * @param string $defaultSpanText Specifies the replacement text for `<span />` + * tags when returning plaintext. + * @return string Contents as simplehtmldom object. + */ function getSimpleHTMLDOM($url, $header = array(), $opts = array(), @@ -94,10 +147,34 @@ $defaultSpanText = DEFAULT_SPAN_TEXT){ } /** - * Maintain locally cached versions of pages to avoid multiple downloads. - * @param url url to cache - * @param duration duration of the cache file in seconds (default: 24h/86400s) - * @return content of the file as string + * Gets contents from the Internet as simplhtmldom object. Contents are cached + * and re-used for subsequent calls until the cache duration elapsed. + * + * _Notice_: Cached contents are forcefully removed after 24 hours (86400 seconds). + * + * @param string $url The URL. + * @param int $duration Cache duration in seconds. + * @param array $header (optional) A list of cURL header. + * For more information follow the links below. + * * https://php.net/manual/en/function.curl-setopt.php + * * https://curl.haxx.se/libcurl/c/CURLOPT_HTTPHEADER.html + * @param array $opts (optional) A list of cURL options as associative array in + * the format `$opts[$option] = $value;`, where `$option` is any `CURLOPT_XXX` + * option and `$value` the corresponding value. + * + * For more information see http://php.net/manual/en/function.curl-setopt.php + * @param bool $lowercase Force all selectors to lowercase. + * @param bool $forceTagsClosed Forcefully close tags in malformed HTML. + * + * _Remarks_: Forcefully closing tags is great for malformed HTML, but it can + * lead to parsing errors. + * @param string $target_charset Defines the target charset. + * @param bool $stripRN Replace all occurrences of `"\r"` and `"\n"` by `" "`. + * @param string $defaultBRText Specifies the replacement text for `<br>` tags + * when returning plaintext. + * @param string $defaultSpanText Specifies the replacement text for `<span />` + * tags when returning plaintext. + * @return string Contents as simplehtmldom object. */ function getSimpleHTMLDOMCached($url, $duration = 86400, @@ -142,9 +219,12 @@ $defaultSpanText = DEFAULT_SPAN_TEXT){ } /** - * Parses the provided response header into an associative array + * Parses the cURL response header into an associative array * * Based on https://stackoverflow.com/a/18682872 + * + * @param string $header The cURL response header. + * @return array An associative array of response headers. */ function parseResponseHeader($header) { @@ -177,10 +257,18 @@ function parseResponseHeader($header) { } /** - * Determine MIME type from URL/Path file extension - * Remark: Built-in functions mime_content_type or fileinfo requires fetching remote content - * Remark: A bridge can hint for a MIME type by appending #.ext to a URL, e.g. #.image + * Determines the MIME type from a URL/Path file extension. + * + * _Remarks_: + * + * * The built-in functions `mime_content_type` and `fileinfo` require fetching + * remote contents. + * * A caller can hint for a MIME type by appending `#.ext` to the URL (i.e. `#.image`). + * * Based on https://stackoverflow.com/a/1147952 + * + * @param string $url The URL or path to the file. + * @return string The MIME type of the file. */ function getMimeType($url) { static $mime = null; diff --git a/lib/error.php b/lib/error.php index abf3c2db..8546f6cd 100644 --- a/lib/error.php +++ b/lib/error.php @@ -1,12 +1,43 @@ <?php +/** + * This file is part of RSS-Bridge, a PHP project capable of generating RSS and + * Atom feeds for websites that don't have one. + * + * For the full license information, please view the UNLICENSE file distributed + * with this source code. + * + * @package Core + * @license http://unlicense.org/ UNLICENSE + * @link https://github.com/rss-bridge/rss-bridge + */ + +/** + * Throws an exception when called. + * + * @throws \HttpException when called + * @param string $message The error message + * @param int $code The HTTP error code + * @link https://en.wikipedia.org/wiki/List_of_HTTP_status_codes List of HTTP + * status codes + */ function returnError($message, $code){ throw new \HttpException($message, $code); } +/** + * Returns HTTP Error 400 (Bad Request) when called. + * + * @param string $message The error message + */ function returnClientError($message){ returnError($message, 400); } +/** + * Returns HTTP Error 500 (Internal Server Error) when called. + * + * @param string $message The error message + */ function returnServerError($message){ returnError($message, 500); } diff --git a/lib/html.php b/lib/html.php index f87991f1..a9eccef6 100644 --- a/lib/html.php +++ b/lib/html.php @@ -1,4 +1,34 @@ <?php +/** + * This file is part of RSS-Bridge, a PHP project capable of generating RSS and + * Atom feeds for websites that don't have one. + * + * For the full license information, please view the UNLICENSE file distributed + * with this source code. + * + * @package Core + * @license http://unlicense.org/ UNLICENSE + * @link https://github.com/rss-bridge/rss-bridge + */ + +/** + * Removes unwanted tags from a given HTML text. + * + * @param string $textToSanitize The HTML text to sanitize. + * @param array $removedTags A list of tags to remove from the DOM. + * @param array $keptAttributes A list of attributes to keep on tags (other + * attributes are removed). + * @param array $keptText A list of tags where the innertext replaces the tag + * (i.e. `<p>Hello World!</p>` becomes `Hello World!`). + * @return object A simplehtmldom object of the remaining contents. + * + * @todo Check if this implementation is still necessary, because simplehtmldom + * already removes some of the tags (search for `remove_noise` in simple_html_dom.php). + * @todo Rename parameters to make more sense. `$textToSanitize` must be HTML, + * `$removedTags`, `$keptAttributes` and `$keptText` are past tense. + * @todo Clarify the meaning of `*[!b38fd2b1fe7f4747d6b1c1254ccd055e]`, which + * looks like a SHA1 hash (does simplehtmldom not support `find('*')`?). + */ function sanitize($textToSanitize, $removedTags = array('script', 'iframe', 'input', 'form'), $keptAttributes = array('title', 'href', 'src'), @@ -21,6 +51,35 @@ $keptText = array()){ return $htmlContent; } +/** + * Replace background by image + * + * Replaces tags with styles of `backgroud-image` by `<img />` tags. + * + * For example: + * + * ```HTML + * <html> + * <body style="background-image: url('bgimage.jpg');"> + * <h1>Hello world!</h1> + * </body> + * </html> + * ``` + * + * results in this output: + * + * ```HTML + * <html> + * <img style="display:block;" src="bgimage.jpg" /> + * </html> + * ``` + * + * @param string $htmlContent The HTML content + * @return string The HTML content with all ocurrences replaced + * + * @todo Clarify the meaning of `*[!b38fd2b1fe7f4747d6b1c1254ccd055e]`, which + * looks like a SHA1 hash (does simplehtmldom not support `find('*')`?). + */ function backgroundToImg($htmlContent) { $regex = '/background-image[ ]{0,}:[ ]{0,}url\([\'"]{0,}(.*?)[\'"]{0,}\)/'; @@ -42,9 +101,17 @@ function backgroundToImg($htmlContent) { /** * Convert relative links in HTML into absolute links - * @param $content HTML content to fix. Supports HTML objects or string objects - * @param $server full URL to the page containing relative links - * @return content with fixed URLs, as HTML object or string depending on input type + * + * This function is based on `php-urljoin`. + * + * @link https://github.com/plaidfluff/php-urljoin php-urljoin + * + * @param string|object $content The HTML content. Supports HTML objects or string objects + * @param string $server Fully qualified URL to the page containing relative links + * @return object Content with fixed URLs. + * + * @todo If the input type was a string, this function should return a string as + * well. This is currently done implicitly by how the simplehtmldom object works. */ function defaultLinkTo($content, $server){ $string_convert = false; @@ -70,10 +137,14 @@ function defaultLinkTo($content, $server){ /** * Extract the first part of a string matching the specified start and end delimiters - * @param $string input string, e.g. '<div>Post author: John Doe</div>' - * @param $start start delimiter, e.g. 'author: ' - * @param $end end delimiter, e.g. '<' - * @return extracted string, e.g. 'John Doe', or false if the delimiters were not found. + * + * @param string $string Input string, e.g. `<div>Post author: John Doe</div>` + * @param string $start Start delimiter, e.g. `author: ` + * @param string $end End delimiter, e.g. `<` + * @return string|bool Extracted string, e.g. `John Doe`, or false if the + * delimiters were not found. + * + * @todo This function can possibly be simplified to use a single `substr` command. */ function extractFromDelimiters($string, $start, $end) { if (strpos($string, $start) !== false) { @@ -85,10 +156,13 @@ function extractFromDelimiters($string, $start, $end) { /** * Remove one or more part(s) of a string using a start and end delmiters - * @param $string input string, e.g. 'foo<script>superscript()</script>bar' - * @param $start start delimiter, e.g. '<script' - * @param $end end delimiter, e.g. '</script>' - * @return cleaned string, e.g. 'foobar' + * + * @param string $string Input string, e.g. `foo<script>superscript()</script>bar` + * @param string $start Start delimiter, e.g. `<script` + * @param string $end End delimiter, e.g. `</script>` + * @return string Cleaned string, e.g. `foobar` + * + * @todo This function can possibly be simplified to use a single `substr` command. */ function stripWithDelimiters($string, $start, $end) { while(strpos($string, $start) !== false) { @@ -101,10 +175,13 @@ function stripWithDelimiters($string, $start, $end) { /** * Remove HTML sections containing one or more sections using the same HTML tag - * @param $string input string, e.g. 'foo<div class="ads"><div>ads</div>ads</div>bar' - * @param $tag_name name of the HTML tag, e.g. 'div' - * @param $tag_start start of the HTML tag to remove, e.g. '<div class="ads">' - * @return cleaned string, e.g. 'foobar' + * + * @param string $string Input string, e.g. `foo<div class="ads"><div>ads</div>ads</div>bar` + * @param string $tag_name Name of the HTML tag, e.g. `div` + * @param string $tag_start Start of the HTML tag to remove, e.g. `<div class="ads">` + * @return string Cleaned String, e.g. `foobar` + * + * @todo This function needs more documentation to make it maintainable. */ function stripRecursiveHTMLSection($string, $tag_name, $tag_start){ $open_tag = '<' . $tag_name; @@ -131,9 +208,13 @@ function stripRecursiveHTMLSection($string, $tag_name, $tag_start){ } /** - * Convert Markdown tags into HTML tags. Only a subset of the Markdown syntax is implemented. - * @param $string input string in Markdown format - * @return output string in HTML format + * Convert Markdown into HTML. Only a subset of the Markdown syntax is implemented. + * + * @link https://daringfireball.net/projects/markdown/ Markdown + * @link https://github.github.com/gfm/ GitHub Flavored Markdown Spec + * + * @param string $string Input string in Markdown format + * @return string output string in HTML format */ function markdownToHtml($string) { |