1 files changed, 594 insertions, 568 deletions
diff --git a/lib/XPathAbstract.php b/lib/XPathAbstract.php
index 0ca1587b..686addf4 100644
--- a/lib/XPathAbstract.php
+++ b/lib/XPathAbstract.php
@@ -15,572 +15,598 @@
  * This class extends {@see BridgeAbstract}, which means it incorporates and
  * extends all of its functionality.
  **/
-abstract class XPathAbstract extends BridgeAbstract {
-
-	/**
-	 * Source Web page URL (should provide either HTML or XML content)
-	 * You can specify any website URL which serves data suited for display in RSS feeds
-	 * (for example a news blog).
-	 *
-	 * Use {@see XPathAbstract::getSourceUrl()} to read this parameter
-	 */
-	const FEED_SOURCE_URL = '';
-
-	/**
-	 * XPath expression for extracting the feed title from the source page.
-	 * If this is left blank or does not provide any data {@see BridgeAbstract::getName()}
-	 * is used instead as the feed's title.
-	 *
-	 * Use {@see XPathAbstract::getExpressionTitle()} to read this parameter
-	 */
-	const XPATH_EXPRESSION_FEED_TITLE = './/title';
-
-	/**
-	 * XPath expression for extracting the feed favicon URL from the source page.
-	 * If this is left blank or does not provide any data {@see BridgeAbstract::getIcon()}
-	 * is used instead as the feed's favicon URL.
-	 *
-	 * Use {@see XPathAbstract::getExpressionIcon()} to read this parameter
-	 */
-	const XPATH_EXPRESSION_FEED_ICON = './/link[@rel="icon"]/@href';
-
-	/**
-	 * XPath expression for extracting the feed items from the source page
-	 * Enter an XPath expression matching a list of dom nodes, each node containing one
-	 * feed article item in total (usually a surrounding <div> or <span> tag). This will
-	 * be the context nodes for all of the following expressions. This expression usually
-	 * starts with a single forward slash.
-	 *
-	 * Use {@see XPathAbstract::getExpressionItem()} to read this parameter
-	 */
-	const XPATH_EXPRESSION_ITEM = '';
-
-	/**
-	 * XPath expression for extracting an item title from the item context
-	 * This expression should match a node contained within each article item node
-	 * containing the article headline. It should start with a dot followed by two
-	 * forward slashes, referring to any descendant nodes of the article item node.
-	 *
-	 * Use {@see XPathAbstract::getExpressionItemTitle()} to read this parameter
-	 */
-	const XPATH_EXPRESSION_ITEM_TITLE = '';
-
-	/**
-	 * XPath expression for extracting an item's content from the item context
-	 * This expression should match a node contained within each article item node
-	 * containing the article content or description. It should start with a dot
-	 * followed by two forward slashes, referring to any descendant nodes of the
-	 * article item node.
-	 *
-	 * Use {@see XPathAbstract::getExpressionItemContent()} to read this parameter
-	 */
-	const XPATH_EXPRESSION_ITEM_CONTENT = '';
-
-	/**
-	 * XPath expression for extracting an item link from the item context
-	 * This expression should match a node's attribute containing the article URL
-	 * (usually the href attribute of an <a> tag). It should start with a dot
-	 * followed by two forward slashes, referring to any descendant nodes of
-	 * the article item node. Attributes can be selected by prepending an @ char
-	 * before the attributes name.
-	 *
-	 * Use {@see XPathAbstract::getExpressionItemUri()} to read this parameter
-	 */
-	const XPATH_EXPRESSION_ITEM_URI = '';
-
-	/**
-	 * XPath expression for extracting an item author from the item context
-	 * This expression should match a node contained within each article item
-	 * node containing the article author's name. It should start with a dot
-	 * followed by two forward slashes, referring to any descendant nodes of
-	 * the article item node.
-	 *
-	 * Use {@see XPathAbstract::getExpressionItemAuthor()} to read this parameter
-	 */
-	const XPATH_EXPRESSION_ITEM_AUTHOR = '';
-
-	/**
-	 * XPath expression for extracting an item timestamp from the item context
-	 * This expression should match a node or node's attribute containing the
-	 * article timestamp or date (parsable by PHP's strtotime function). It
-	 * should start with a dot followed by two forward slashes, referring to
-	 * any descendant nodes of the article item node. Attributes can be
-	 * selected by prepending an @ char before the attributes name.
-	 *
-	 * Use {@see XPathAbstract::getExpressionItemTimestamp()} to read this parameter
-	 */
-	const XPATH_EXPRESSION_ITEM_TIMESTAMP = '';
-
-	/**
-	 * XPath expression for extracting item enclosures (media content like
-	 * images or movies) from the item context
-	 * This expression should match a node's attribute containing an article
-	 * image URL (usually the src attribute of an <img> tag or a style
-	 * attribute). It should start with a dot followed by two forward slashes,
-	 * referring to any descendant nodes of the article item node. Attributes
-	 * can be selected by prepending an @ char before the attributes name.
-	 *
-	 * Use {@see XPathAbstract::getExpressionItemEnclosures()} to read this parameter
-	 */
-	const XPATH_EXPRESSION_ITEM_ENCLOSURES = '';
-
-	/**
-	 * XPath expression for extracting an item category from the item context
-	 * This expression should match a node or node's attribute contained
-	 * within each article item node containing the article category. This
-	 * could be inside <div> or <span> tags or sometimes be hidden
-	 * in a data attribute. It should start with a dot followed by two
-	 * forward slashes, referring to any descendant nodes of the article
-	 * item node. Attributes can be selected by prepending an @ char
-	 * before the attributes name.
-	 *
-	 * Use {@see XPathAbstract::getExpressionItemCategories()} to read this parameter
-	 */
-	const XPATH_EXPRESSION_ITEM_CATEGORIES = '';
-
-	/**
-	 * Fix encoding
-	 * Set this to true for fixing feed encoding by invoking PHP's utf8_decode
-	 * function on all extracted texts. Try this in case you see "broken" or
-	 * "weird" characters in your feed where you'd normally expect umlauts
-	 * or any other non-ascii characters.
-	 *
-	 * Use {@see XPathAbstract::getSettingFixEncoding()} to read this parameter
-	 */
-	const SETTING_FIX_ENCODING = false;
-
-	/**
-	 * Internal storage for resulting feed name, automatically detected
-	 * @var string
-	 */
-	private $feedName;
-
-	/**
-	 * Internal storage for resulting feed name, automatically detected
-	 * @var string
-	 */
-	private $feedUri;
-
-	/**
-	 * Internal storage for resulting feed favicon, automatically detected
-	 * @var string
-	 */
-	private $feedIcon;
-
-	public function getName(){
-		return $this->feedName ?: parent::getName();
-	}
-
-	public function getURI() {
-		return $this->feedUri ?: parent::getURI();
-	}
-
-	public function getIcon() {
-		return $this->feedIcon ?: parent::getIcon();
-	}
-
-	/**
-	 * Source Web page URL (should provide either HTML or XML content)
-	 * @return string
-	 */
-	protected function getSourceUrl(){
-		return static::FEED_SOURCE_URL;
-	}
-
-	/**
-	 * XPath expression for extracting the feed title from the source page
-	 * @return string
-	 */
-	protected function getExpressionTitle(){
-		return static::XPATH_EXPRESSION_FEED_TITLE;
-	}
-
-	/**
-	 * XPath expression for extracting the feed favicon from the source page
-	 * @return string
-	 */
-	protected function getExpressionIcon(){
-		return static::XPATH_EXPRESSION_FEED_ICON;
-	}
-
-	/**
-	 * XPath expression for extracting the feed items from the source page
-	 * @return string
-	 */
-	protected function getExpressionItem(){
-		return static::XPATH_EXPRESSION_ITEM;
-	}
-
-	/**
-	 * XPath expression for extracting an item title from the item context
-	 * @return string
-	 */
-	protected function getExpressionItemTitle(){
-		return static::XPATH_EXPRESSION_ITEM_TITLE;
-	}
-
-	/**
-	 * XPath expression for extracting an item's content from the item context
-	 * @return string
-	 */
-	protected function getExpressionItemContent(){
-		return static::XPATH_EXPRESSION_ITEM_CONTENT;
-	}
-
-	/**
-	 * XPath expression for extracting an item link from the item context
-	 * @return string
-	 */
-	protected function getExpressionItemUri(){
-		return static::XPATH_EXPRESSION_ITEM_URI;
-	}
-
-	/**
-	 * XPath expression for extracting an item author from the item context
-	 * @return string
-	 */
-	protected function getExpressionItemAuthor(){
-		return static::XPATH_EXPRESSION_ITEM_AUTHOR;
-	}
-
-	/**
-	 * XPath expression for extracting an item timestamp from the item context
-	 * @return string
-	 */
-	protected function getExpressionItemTimestamp(){
-		return static::XPATH_EXPRESSION_ITEM_TIMESTAMP;
-	}
-
-	/**
-	 * XPath expression for extracting item enclosures (media content like
-	 * images or movies) from the item context
-	 * @return string
-	 */
-	protected function getExpressionItemEnclosures(){
-		return static::XPATH_EXPRESSION_ITEM_ENCLOSURES;
-	}
-
-	/**
-	 * XPath expression for extracting an item category from the item context
-	 * @return string
-	 */
-	protected function getExpressionItemCategories(){
-		return static::XPATH_EXPRESSION_ITEM_CATEGORIES;
-	}
-
-	/**
-	 * Fix encoding
-	 * @return string
-	 */
-	protected function getSettingFixEncoding(){
-		return static::SETTING_FIX_ENCODING;
-	}
-
-	/**
-	 * Internal helper method for quickly accessing all the user defined constants
-	 * in derived classes
-	 *
-	 * @param $name
-	 * @return bool|string
-	 */
-	private function getParam($name){
-		switch($name) {
-
-			case 'url':
-				return $this->getSourceUrl();
-			case 'feed_title':
-				return $this->getExpressionTitle();
-			case 'feed_icon':
-				return $this->getExpressionIcon();
-			case 'item':
-				return $this->getExpressionItem();
-			case 'title':
-				return $this->getExpressionItemTitle();
-			case 'content':
-				return $this->getExpressionItemContent();
-			case 'uri':
-				return $this->getExpressionItemUri();
-			case 'author':
-				return $this->getExpressionItemAuthor();
-			case 'timestamp':
-				return $this->getExpressionItemTimestamp();
-			case 'enclosures':
-				return $this->getExpressionItemEnclosures();
-			case 'categories':
-				return $this->getExpressionItemCategories();
-			case 'fix_encoding':
-				return $this->getSettingFixEncoding();
-		}
-	}
-
-	/**
-	 * Should provide the source website HTML content
-	 * can be easily overwritten for example if special headers or auth infos are required
-	 * @return string
-	 */
-	protected function provideWebsiteContent() {
-		return getContents($this->feedUri);
-	}
-
-	/**
-	 * Should provide the feeds title
-	 *
-	 * @param DOMXPath $xpath
-	 * @return string
-	 */
-	protected function provideFeedTitle(DOMXPath $xpath) {
-		$title = $xpath->query($this->getParam('feed_title'));
-		if(count($title) === 1) {
-			return $this->getItemValueOrNodeValue($title);
-		}
-	}
-
-	/**
-	 * Should provide the URL of the feed's favicon
-	 *
-	 * @param DOMXPath $xpath
-	 * @return string
-	 */
-	protected function provideFeedIcon(DOMXPath $xpath) {
-		$icon = $xpath->query($this->getParam('feed_icon'));
-		if(count($icon) === 1) {
-			return $this->cleanMediaUrl($this->getItemValueOrNodeValue($icon));
-		}
-	}
-
-	/**
-	 * Should provide the feed's items.
-	 *
-	 * @param DOMXPath $xpath
-	 * @return DOMNodeList
-	 */
-	protected function provideFeedItems(DOMXPath $xpath) {
-		return @$xpath->query($this->getParam('item'));
-	}
-
-	public function collectData() {
-
-		$this->feedUri = $this->getParam('url');
-
-		$webPageHtml = new DOMDocument();
-		libxml_use_internal_errors(true);
-		$webPageHtml->loadHTML($this->provideWebsiteContent());
-		libxml_clear_errors();
-		libxml_use_internal_errors(false);
-
-		$xpath = new DOMXPath($webPageHtml);
-
-		$this->feedName = $this->provideFeedTitle($xpath);
-		$this->feedIcon = $this->provideFeedIcon($xpath);
-
-		$entries = $this->provideFeedItems($xpath);
-		if($entries === false) {
-			return;
-		}
-
-		foreach ($entries as $entry) {
-			$item = new \FeedItem();
-			foreach(array('title', 'content', 'uri', 'author', 'timestamp', 'enclosures', 'categories') as $param) {
-
-				$expression = $this->getParam($param);
-				if('' === $expression) {
-					continue;
-				}
-
-				//can be a string or DOMNodeList, depending on the expression result
-				$typedResult = @$xpath->evaluate($expression, $entry);
-				if ($typedResult === false || ($typedResult instanceof DOMNodeList && count($typedResult) === 0)
-					|| (is_string($typedResult) && strlen(trim($typedResult)) === 0)) {
-					continue;
-				}
-
-				$item->__set($param, $this->formatParamValue($param, $this->getItemValueOrNodeValue($typedResult)));
-
-			}
-
-			$itemId = $this->generateItemId($item);
-			if(null !== $itemId) {
-				$item->setUid($itemId);
-			}
-
-			$this->items[] = $item;
-		}
-
-	}
-
-	/**
-	 * @param $param
-	 * @param $value
-	 * @return string|array
-	 */
-	protected function formatParamValue($param, $value)
-	{
-		$value = $this->fixEncoding($value);
-		switch ($param) {
-			case 'title':
-				return $this->formatItemTitle($value);
-			case 'content':
-				return $this->formatItemContent($value);
-			case 'uri':
-				return $this->formatItemUri($value);
-			case 'author':
-				return $this->formatItemAuthor($value);
-			case 'timestamp':
-				return $this->formatItemTimestamp($value);
-			case 'enclosures':
-				return $this->formatItemEnclosures($value);
-			case 'categories':
-				return $this->formatItemCategories($value);
-		}
-		return $value;
-	}
-
-	/**
-	 * Formats the title of a feed item. Takes extracted raw title and returns it formatted
-	 * as string.
-	 * Can be easily overwritten for in case the value needs to be transformed into something
-	 * else.
-	 * @param string $value
-	 * @return string
-	 */
-	protected function formatItemTitle($value) {
-		return $value;
-	}
-
-	/**
-	 * Formats the timestamp of a feed item. Takes extracted raw timestamp and returns unix
-	 * timestamp as integer.
-	 * Can be easily overwritten for example if a special format has to be expected on the
-	 * source website.
-	 * @param string $value
-	 * @return string
-	 */
-	protected function formatItemContent($value) {
-		return $value;
-	}
-
-	/**
-	 * Formats the URI of a feed item. Takes extracted raw URI and returns it formatted
-	 * as string.
-	 * Can be easily overwritten for in case the value needs to be transformed into something
-	 * else.
-	 * @param string $value
-	 * @return string
-	 */
-	protected function formatItemUri($value) {
-		if(strlen($value) === 0) {
-			return '';
-		}
-		if(strpos($value, 'http://') === 0 || strpos($value, 'https://') === 0) {
-			return $value;
-		}
-
-		return urljoin($this->feedUri, $value);
-	}
-
-	/**
-	 * Formats the author of a feed item. Takes extracted raw author and returns it formatted
-	 * as string.
-	 * Can be easily overwritten for in case the value needs to be transformed into something
-	 * else.
-	 * @param string $value
-	 * @return string
-	 */
-	protected function formatItemAuthor($value) {
-		return $value;
-	}
-
-	/**
-	 * Formats the timestamp of a feed item. Takes extracted raw timestamp and returns unix
-	 * timestamp as integer.
-	 * Can be easily overwritten for example if a special format has to be expected on the
-	 * source website.
-	 * @param string $value
-	 * @return false|int
-	 */
-	protected function formatItemTimestamp($value) {
-		return strtotime($value);
-	}
-
-	/**
-	 * Formats the enclosures of a feed item. Takes extracted raw enclosures and returns them
-	 * formatted as array.
-	 * Can be easily overwritten for in case the values need to be transformed into something
-	 * else.
-	 * @param string $value
-	 * @return array
-	 */
-	protected function formatItemEnclosures($value) {
-		return array($this->cleanMediaUrl($value));
-	}
-
-	/**
-	 * Formats the categories of a feed item. Takes extracted raw categories and returns them
-	 * formatted as array.
-	 * Can be easily overwritten for in case the values need to be transformed into something
-	 * else.
-	 * @param string $value
-	 * @return array
-	 */
-	protected function formatItemCategories($value) {
-		return array($value);
-	}
-
-	/**
-	 * @param $mediaUrl
-	 * @return string|void
-	 */
-	protected function cleanMediaUrl($mediaUrl)
-	{
-		$pattern = '~(?:http(?:s)?:)?[\/a-zA-Z0-9\-=_,\.\%]+\.(?:jpg|gif|png|jpeg|ico|mp3|webp){1}~i';
-		$result = preg_match($pattern, $mediaUrl, $matches);
-		if(1 !== $result) {
-			return;
-		}
-		return urljoin($this->feedUri, $matches[0]);
-	}
-
-	/**
-	 * @param $typedResult
-	 * @return string
-	 */
-	protected function getItemValueOrNodeValue($typedResult)
-	{
-		if($typedResult instanceof DOMNodeList) {
-			$item = $typedResult->item(0);
-			if ($item instanceof DOMElement) {
-				return trim($item->nodeValue);
-			} elseif ($item instanceof DOMAttr) {
-				return trim($item->value);
-			} elseif ($item instanceof DOMText) {
-				return trim($item->wholeText);
-			}
-		} elseif(is_string($typedResult) && strlen($typedResult) > 0) {
-			return trim($typedResult);
-		}
-		returnServerError('Unknown type of XPath expression result.');
-	}
-
-	/**
-	 * Fixes feed encoding by invoking PHP's utf8_decode function on extracted texts.
-	 * Useful in case of "broken" or "weird" characters in the feed where you'd normally
-	 * expect umlauts.
-	 *
-	 * @param $input
-	 * @return string
-	 */
-	protected function fixEncoding($input)
-	{
-		return $this->getParam('fix_encoding') ? utf8_decode($input) : $input;
-	}
-
-	/**
-	 * Allows overriding default mechanism determining items Uid's
-	 *
-	 * @param FeedItem $item
-	 * @return string|null
-	 */
-	protected function generateItemId(\FeedItem $item) {
-		return null; //auto generation
-	}
+abstract class XPathAbstract extends BridgeAbstract
+{
+    /**
+     * Source Web page URL (should provide either HTML or XML content)
+     * You can specify any website URL which serves data suited for display in RSS feeds
+     * (for example a news blog).
+     *
+     * Use {@see XPathAbstract::getSourceUrl()} to read this parameter
+     */
+    const FEED_SOURCE_URL = '';
+
+    /**
+     * XPath expression for extracting the feed title from the source page.
+     * If this is left blank or does not provide any data {@see BridgeAbstract::getName()}
+     * is used instead as the feed's title.
+     *
+     * Use {@see XPathAbstract::getExpressionTitle()} to read this parameter
+     */
+    const XPATH_EXPRESSION_FEED_TITLE = './/title';
+
+    /**
+     * XPath expression for extracting the feed favicon URL from the source page.
+     * If this is left blank or does not provide any data {@see BridgeAbstract::getIcon()}
+     * is used instead as the feed's favicon URL.
+     *
+     * Use {@see XPathAbstract::getExpressionIcon()} to read this parameter
+     */
+    const XPATH_EXPRESSION_FEED_ICON = './/link[@rel="icon"]/@href';
+
+    /**
+     * XPath expression for extracting the feed items from the source page
+     * Enter an XPath expression matching a list of dom nodes, each node containing one
+     * feed article item in total (usually a surrounding <div> or <span> tag). This will
+     * be the context nodes for all of the following expressions. This expression usually
+     * starts with a single forward slash.
+     *
+     * Use {@see XPathAbstract::getExpressionItem()} to read this parameter
+     */
+    const XPATH_EXPRESSION_ITEM = '';
+
+    /**
+     * XPath expression for extracting an item title from the item context
+     * This expression should match a node contained within each article item node
+     * containing the article headline. It should start with a dot followed by two
+     * forward slashes, referring to any descendant nodes of the article item node.
+     *
+     * Use {@see XPathAbstract::getExpressionItemTitle()} to read this parameter
+     */
+    const XPATH_EXPRESSION_ITEM_TITLE = '';
+
+    /**
+     * XPath expression for extracting an item's content from the item context
+     * This expression should match a node contained within each article item node
+     * containing the article content or description. It should start with a dot
+     * followed by two forward slashes, referring to any descendant nodes of the
+     * article item node.
+     *
+     * Use {@see XPathAbstract::getExpressionItemContent()} to read this parameter
+     */
+    const XPATH_EXPRESSION_ITEM_CONTENT = '';
+
+    /**
+     * XPath expression for extracting an item link from the item context
+     * This expression should match a node's attribute containing the article URL
+     * (usually the href attribute of an <a> tag). It should start with a dot
+     * followed by two forward slashes, referring to any descendant nodes of
+     * the article item node. Attributes can be selected by prepending an @ char
+     * before the attributes name.
+     *
+     * Use {@see XPathAbstract::getExpressionItemUri()} to read this parameter
+     */
+    const XPATH_EXPRESSION_ITEM_URI = '';
+
+    /**
+     * XPath expression for extracting an item author from the item context
+     * This expression should match a node contained within each article item
+     * node containing the article author's name. It should start with a dot
+     * followed by two forward slashes, referring to any descendant nodes of
+     * the article item node.
+     *
+     * Use {@see XPathAbstract::getExpressionItemAuthor()} to read this parameter
+     */
+    const XPATH_EXPRESSION_ITEM_AUTHOR = '';
+
+    /**
+     * XPath expression for extracting an item timestamp from the item context
+     * This expression should match a node or node's attribute containing the
+     * article timestamp or date (parsable by PHP's strtotime function). It
+     * should start with a dot followed by two forward slashes, referring to
+     * any descendant nodes of the article item node. Attributes can be
+     * selected by prepending an @ char before the attributes name.
+     *
+     * Use {@see XPathAbstract::getExpressionItemTimestamp()} to read this parameter
+     */
+    const XPATH_EXPRESSION_ITEM_TIMESTAMP = '';
+
+    /**
+     * XPath expression for extracting item enclosures (media content like
+     * images or movies) from the item context
+     * This expression should match a node's attribute containing an article
+     * image URL (usually the src attribute of an <img> tag or a style
+     * attribute). It should start with a dot followed by two forward slashes,
+     * referring to any descendant nodes of the article item node. Attributes
+     * can be selected by prepending an @ char before the attributes name.
+     *
+     * Use {@see XPathAbstract::getExpressionItemEnclosures()} to read this parameter
+     */
+    const XPATH_EXPRESSION_ITEM_ENCLOSURES = '';
+
+    /**
+     * XPath expression for extracting an item category from the item context
+     * This expression should match a node or node's attribute contained
+     * within each article item node containing the article category. This
+     * could be inside <div> or <span> tags or sometimes be hidden
+     * in a data attribute. It should start with a dot followed by two
+     * forward slashes, referring to any descendant nodes of the article
+     * item node. Attributes can be selected by prepending an @ char
+     * before the attributes name.
+     *
+     * Use {@see XPathAbstract::getExpressionItemCategories()} to read this parameter
+     */
+    const XPATH_EXPRESSION_ITEM_CATEGORIES = '';
+
+    /**
+     * Fix encoding
+     * Set this to true for fixing feed encoding by invoking PHP's utf8_decode
+     * function on all extracted texts. Try this in case you see "broken" or
+     * "weird" characters in your feed where you'd normally expect umlauts
+     * or any other non-ascii characters.
+     *
+     * Use {@see XPathAbstract::getSettingFixEncoding()} to read this parameter
+     */
+    const SETTING_FIX_ENCODING = false;
+
+    /**
+     * Internal storage for resulting feed name, automatically detected
+     * @var string
+     */
+    private $feedName;
+
+    /**
+     * Internal storage for resulting feed name, automatically detected
+     * @var string
+     */
+    private $feedUri;
+
+    /**
+     * Internal storage for resulting feed favicon, automatically detected
+     * @var string
+     */
+    private $feedIcon;
+
+    public function getName()
+    {
+        return $this->feedName ?: parent::getName();
+    }
+
+    public function getURI()
+    {
+        return $this->feedUri ?: parent::getURI();
+    }
+
+    public function getIcon()
+    {
+        return $this->feedIcon ?: parent::getIcon();
+    }
+
+    /**
+     * Source Web page URL (should provide either HTML or XML content)
+     * @return string
+     */
+    protected function getSourceUrl()
+    {
+        return static::FEED_SOURCE_URL;
+    }
+
+    /**
+     * XPath expression for extracting the feed title from the source page
+     * @return string
+     */
+    protected function getExpressionTitle()
+    {
+        return static::XPATH_EXPRESSION_FEED_TITLE;
+    }
+
+    /**
+     * XPath expression for extracting the feed favicon from the source page
+     * @return string
+     */
+    protected function getExpressionIcon()
+    {
+        return static::XPATH_EXPRESSION_FEED_ICON;
+    }
+
+    /**
+     * XPath expression for extracting the feed items from the source page
+     * @return string
+     */
+    protected function getExpressionItem()
+    {
+        return static::XPATH_EXPRESSION_ITEM;
+    }
+
+    /**
+     * XPath expression for extracting an item title from the item context
+     * @return string
+     */
+    protected function getExpressionItemTitle()
+    {
+        return static::XPATH_EXPRESSION_ITEM_TITLE;
+    }
+
+    /**
+     * XPath expression for extracting an item's content from the item context
+     * @return string
+     */
+    protected function getExpressionItemContent()
+    {
+        return static::XPATH_EXPRESSION_ITEM_CONTENT;
+    }
+
+    /**
+     * XPath expression for extracting an item link from the item context
+     * @return string
+     */
+    protected function getExpressionItemUri()
+    {
+        return static::XPATH_EXPRESSION_ITEM_URI;
+    }
+
+    /**
+     * XPath expression for extracting an item author from the item context
+     * @return string
+     */
+    protected function getExpressionItemAuthor()
+    {
+        return static::XPATH_EXPRESSION_ITEM_AUTHOR;
+    }
+
+    /**
+     * XPath expression for extracting an item timestamp from the item context
+     * @return string
+     */
+    protected function getExpressionItemTimestamp()
+    {
+        return static::XPATH_EXPRESSION_ITEM_TIMESTAMP;
+    }
+
+    /**
+     * XPath expression for extracting item enclosures (media content like
+     * images or movies) from the item context
+     * @return string
+     */
+    protected function getExpressionItemEnclosures()
+    {
+        return static::XPATH_EXPRESSION_ITEM_ENCLOSURES;
+    }
+
+    /**
+     * XPath expression for extracting an item category from the item context
+     * @return string
+     */
+    protected function getExpressionItemCategories()
+    {
+        return static::XPATH_EXPRESSION_ITEM_CATEGORIES;
+    }
+
+    /**
+     * Fix encoding
+     * @return string
+     */
+    protected function getSettingFixEncoding()
+    {
+        return static::SETTING_FIX_ENCODING;
+    }
+
+    /**
+     * Internal helper method for quickly accessing all the user defined constants
+     * in derived classes
+     *
+     * @param $name
+     * @return bool|string
+     */
+    private function getParam($name)
+    {
+        switch ($name) {
+            case 'url':
+                return $this->getSourceUrl();
+            case 'feed_title':
+                return $this->getExpressionTitle();
+            case 'feed_icon':
+                return $this->getExpressionIcon();
+            case 'item':
+                return $this->getExpressionItem();
+            case 'title':
+                return $this->getExpressionItemTitle();
+            case 'content':
+                return $this->getExpressionItemContent();
+            case 'uri':
+                return $this->getExpressionItemUri();
+            case 'author':
+                return $this->getExpressionItemAuthor();
+            case 'timestamp':
+                return $this->getExpressionItemTimestamp();
+            case 'enclosures':
+                return $this->getExpressionItemEnclosures();
+            case 'categories':
+                return $this->getExpressionItemCategories();
+            case 'fix_encoding':
+                return $this->getSettingFixEncoding();
+        }
+    }
+
+    /**
+     * Should provide the source website HTML content
+     * can be easily overwritten for example if special headers or auth infos are required
+     * @return string
+     */
+    protected function provideWebsiteContent()
+    {
+        return getContents($this->feedUri);
+    }
+
+    /**
+     * Should provide the feeds title
+     *
+     * @param DOMXPath $xpath
+     * @return string
+     */
+    protected function provideFeedTitle(DOMXPath $xpath)
+    {
+        $title = $xpath->query($this->getParam('feed_title'));
+        if (count($title) === 1) {
+            return $this->getItemValueOrNodeValue($title);
+        }
+    }
+
+    /**
+     * Should provide the URL of the feed's favicon
+     *
+     * @param DOMXPath $xpath
+     * @return string
+     */
+    protected function provideFeedIcon(DOMXPath $xpath)
+    {
+        $icon = $xpath->query($this->getParam('feed_icon'));
+        if (count($icon) === 1) {
+            return $this->cleanMediaUrl($this->getItemValueOrNodeValue($icon));
+        }
+    }
+
+    /**
+     * Should provide the feed's items.
+     *
+     * @param DOMXPath $xpath
+     * @return DOMNodeList
+     */
+    protected function provideFeedItems(DOMXPath $xpath)
+    {
+        return @$xpath->query($this->getParam('item'));
+    }
+
+    public function collectData()
+    {
+        $this->feedUri = $this->getParam('url');
+
+        $webPageHtml = new DOMDocument();
+        libxml_use_internal_errors(true);
+        $webPageHtml->loadHTML($this->provideWebsiteContent());
+        libxml_clear_errors();
+        libxml_use_internal_errors(false);
+
+        $xpath = new DOMXPath($webPageHtml);
+
+        $this->feedName = $this->provideFeedTitle($xpath);
+        $this->feedIcon = $this->provideFeedIcon($xpath);
+
+        $entries = $this->provideFeedItems($xpath);
+        if ($entries === false) {
+            return;
+        }
+
+        foreach ($entries as $entry) {
+            $item = new \FeedItem();
+            foreach (['title', 'content', 'uri', 'author', 'timestamp', 'enclosures', 'categories'] as $param) {
+                $expression = $this->getParam($param);
+                if ('' === $expression) {
+                    continue;
+                }
+
+                //can be a string or DOMNodeList, depending on the expression result
+                $typedResult = @$xpath->evaluate($expression, $entry);
+                if (
+                    $typedResult === false || ($typedResult instanceof DOMNodeList && count($typedResult) === 0)
+                    || (is_string($typedResult) && strlen(trim($typedResult)) === 0)
+                ) {
+                    continue;
+                }
+
+                $item->__set($param, $this->formatParamValue($param, $this->getItemValueOrNodeValue($typedResult)));
+            }
+
+            $itemId = $this->generateItemId($item);
+            if (null !== $itemId) {
+                $item->setUid($itemId);
+            }
+
+            $this->items[] = $item;
+        }
+    }
+
+    /**
+     * @param $param
+     * @param $value
+     * @return string|array
+     */
+    protected function formatParamValue($param, $value)
+    {
+        $value = $this->fixEncoding($value);
+        switch ($param) {
+            case 'title':
+                return $this->formatItemTitle($value);
+            case 'content':
+                return $this->formatItemContent($value);
+            case 'uri':
+                return $this->formatItemUri($value);
+            case 'author':
+                return $this->formatItemAuthor($value);
+            case 'timestamp':
+                return $this->formatItemTimestamp($value);
+            case 'enclosures':
+                return $this->formatItemEnclosures($value);
+            case 'categories':
+                return $this->formatItemCategories($value);
+        }
+        return $value;
+    }
+
+    /**
+     * Formats the title of a feed item. Takes extracted raw title and returns it formatted
+     * as string.
+     * Can be easily overwritten for in case the value needs to be transformed into something
+     * else.
+     * @param string $value
+     * @return string
+     */
+    protected function formatItemTitle($value)
+    {
+        return $value;
+    }
+
+    /**
+     * Formats the timestamp of a feed item. Takes extracted raw timestamp and returns unix
+     * timestamp as integer.
+     * Can be easily overwritten for example if a special format has to be expected on the
+     * source website.
+     * @param string $value
+     * @return string
+     */
+    protected function formatItemContent($value)
+    {
+        return $value;
+    }
+
+    /**
+     * Formats the URI of a feed item. Takes extracted raw URI and returns it formatted
+     * as string.
+     * Can be easily overwritten for in case the value needs to be transformed into something
+     * else.
+     * @param string $value
+     * @return string
+     */
+    protected function formatItemUri($value)
+    {
+        if (strlen($value) === 0) {
+            return '';
+        }
+        if (strpos($value, 'http://') === 0 || strpos($value, 'https://') === 0) {
+            return $value;
+        }
+
+        return urljoin($this->feedUri, $value);
+    }
+
+    /**
+     * Formats the author of a feed item. Takes extracted raw author and returns it formatted
+     * as string.
+     * Can be easily overwritten for in case the value needs to be transformed into something
+     * else.
+     * @param string $value
+     * @return string
+     */
+    protected function formatItemAuthor($value)
+    {
+        return $value;
+    }
+
+    /**
+     * Formats the timestamp of a feed item. Takes extracted raw timestamp and returns unix
+     * timestamp as integer.
+     * Can be easily overwritten for example if a special format has to be expected on the
+     * source website.
+     * @param string $value
+     * @return false|int
+     */
+    protected function formatItemTimestamp($value)
+    {
+        return strtotime($value);
+    }
+
+    /**
+     * Formats the enclosures of a feed item. Takes extracted raw enclosures and returns them
+     * formatted as array.
+     * Can be easily overwritten for in case the values need to be transformed into something
+     * else.
+     * @param string $value
+     * @return array
+     */
+    protected function formatItemEnclosures($value)
+    {
+        return [$this->cleanMediaUrl($value)];
+    }
+
+    /**
+     * Formats the categories of a feed item. Takes extracted raw categories and returns them
+     * formatted as array.
+     * Can be easily overwritten for in case the values need to be transformed into something
+     * else.
+     * @param string $value
+     * @return array
+     */
+    protected function formatItemCategories($value)
+    {
+        return [$value];
+    }
+
+    /**
+     * @param $mediaUrl
+     * @return string|void
+     */
+    protected function cleanMediaUrl($mediaUrl)
+    {
+        $pattern = '~(?:http(?:s)?:)?[\/a-zA-Z0-9\-=_,\.\%]+\.(?:jpg|gif|png|jpeg|ico|mp3|webp){1}~i';
+        $result = preg_match($pattern, $mediaUrl, $matches);
+        if (1 !== $result) {
+            return;
+        }
+        return urljoin($this->feedUri, $matches[0]);
+    }
+
+    /**
+     * @param $typedResult
+     * @return string
+     */
+    protected function getItemValueOrNodeValue($typedResult)
+    {
+        if ($typedResult instanceof DOMNodeList) {
+            $item = $typedResult->item(0);
+            if ($item instanceof DOMElement) {
+                return trim($item->nodeValue);
+            } elseif ($item instanceof DOMAttr) {
+                return trim($item->value);
+            } elseif ($item instanceof DOMText) {
+                return trim($item->wholeText);
+            }
+        } elseif (is_string($typedResult) && strlen($typedResult) > 0) {
+            return trim($typedResult);
+        }
+        returnServerError('Unknown type of XPath expression result.');
+    }
+
+    /**
+     * Fixes feed encoding by invoking PHP's utf8_decode function on extracted texts.
+     * Useful in case of "broken" or "weird" characters in the feed where you'd normally
+     * expect umlauts.
+     *
+     * @param $input
+     * @return string
+     */
+    protected function fixEncoding($input)
+    {
+        return $this->getParam('fix_encoding') ? utf8_decode($input) : $input;
+    }
+
+    /**
+     * Allows overriding default mechanism determining items Uid's
+     *
+     * @param FeedItem $item
+     * @return string|null
+     */
+    protected function generateItemId(\FeedItem $item)
+    {
+        return null; //auto generation
+    }
 }