aboutsummaryrefslogtreecommitdiff
path: root/bridges/XPathBridge.php
diff options
context:
space:
mode:
Diffstat (limited to 'bridges/XPathBridge.php')
-rw-r--r--bridges/XPathBridge.php401
1 files changed, 207 insertions, 194 deletions
diff --git a/bridges/XPathBridge.php b/bridges/XPathBridge.php
index 5aa280e0..98defddc 100644
--- a/bridges/XPathBridge.php
+++ b/bridges/XPathBridge.php
@@ -1,127 +1,128 @@
<?php
-class XPathBridge extends XPathAbstract {
- const NAME = 'XPathBridge';
- const URI = 'https://github.com/rss-bridge/rss-bridge';
- const DESCRIPTION
- = 'Parse any webpage using <a href="https://devhints.io/xpath" target="_blank">XPath expressions</a>';
- const MAINTAINER = 'Niehztog';
- const PARAMETERS = array(
- '' => array(
-
- 'url' => array(
- 'name' => 'Enter web page URL',
- 'title' => <<<"EOL"
+class XPathBridge extends XPathAbstract
+{
+ const NAME = 'XPathBridge';
+ const URI = 'https://github.com/rss-bridge/rss-bridge';
+ const DESCRIPTION
+ = 'Parse any webpage using <a href="https://devhints.io/xpath" target="_blank">XPath expressions</a>';
+ const MAINTAINER = 'Niehztog';
+ const PARAMETERS = [
+ '' => [
+
+ 'url' => [
+ 'name' => 'Enter web page URL',
+ 'title' => <<<"EOL"
You can specify any website URL which serves data suited for display in RSS feeds
(for example a news blog).
EOL
- , 'type' => 'text',
- 'exampleValue' => 'https://news.blizzard.com/en-en',
- 'defaultValue' => 'https://news.blizzard.com/en-en',
- 'required' => true
- ),
-
- 'item' => array(
- 'name' => 'Item selector',
- 'title' => <<<"EOL"
+ , 'type' => 'text',
+ 'exampleValue' => 'https://news.blizzard.com/en-en',
+ 'defaultValue' => 'https://news.blizzard.com/en-en',
+ 'required' => true
+ ],
+
+ 'item' => [
+ 'name' => 'Item selector',
+ 'title' => <<<"EOL"
Enter an XPath expression matching a list of dom nodes, each node containing one
feed article item in total (usually a surrounding &lt;div&gt; or &lt;span&gt; tag). This will
be the context nodes for all of the following expressions. This expression usually
starts with a single forward slash.
EOL
- , 'type' => 'text',
- 'exampleValue' => '/html/body/div/div[4]/div[2]/div[2]/div/div/section/ol/li/article',
- 'defaultValue' => '/html/body/div/div[4]/div[2]/div[2]/div/div/section/ol/li/article',
- 'required' => true
- ),
-
- 'title' => array(
- 'name' => 'Item title selector',
- 'title' => <<<"EOL"
+ , 'type' => 'text',
+ 'exampleValue' => '/html/body/div/div[4]/div[2]/div[2]/div/div/section/ol/li/article',
+ 'defaultValue' => '/html/body/div/div[4]/div[2]/div[2]/div/div/section/ol/li/article',
+ 'required' => true
+ ],
+
+ 'title' => [
+ 'name' => 'Item title selector',
+ 'title' => <<<"EOL"
This expression should match a node contained within each article item node
containing the article headline. It should start with a dot followed by two
forward slashes, referring to any descendant nodes of the article item node.
EOL
- , 'type' => 'text',
- 'exampleValue' => './/div/div[2]/h2',
- 'defaultValue' => './/div/div[2]/h2',
- 'required' => true
- ),
-
- 'content' => array(
- 'name' => 'Item description selector',
- 'title' => <<<"EOL"
+ , 'type' => 'text',
+ 'exampleValue' => './/div/div[2]/h2',
+ 'defaultValue' => './/div/div[2]/h2',
+ 'required' => true
+ ],
+
+ 'content' => [
+ 'name' => 'Item description selector',
+ 'title' => <<<"EOL"
This expression should match a node contained within each article item node
containing the article content or description. It should start with a dot
followed by two forward slashes, referring to any descendant nodes of the
article item node.
EOL
- , 'type' => 'text',
- 'exampleValue' => './/div[@class="ArticleListItem-description"]/div[@class="h6"]',
- 'defaultValue' => './/div[@class="ArticleListItem-description"]/div[@class="h6"]',
- 'required' => false
- ),
-
- 'uri' => array(
- 'name' => 'Item URL selector',
- 'title' => <<<"EOL"
+ , 'type' => 'text',
+ 'exampleValue' => './/div[@class="ArticleListItem-description"]/div[@class="h6"]',
+ 'defaultValue' => './/div[@class="ArticleListItem-description"]/div[@class="h6"]',
+ 'required' => false
+ ],
+
+ 'uri' => [
+ 'name' => 'Item URL selector',
+ 'title' => <<<"EOL"
This expression should match a node's attribute containing the article URL
(usually the href attribute of an &lt;a&gt; tag). It should start with a dot
followed by two forward slashes, referring to any descendant nodes of
the article item node. Attributes can be selected by prepending an @ char
before the attributes name.
EOL
- , 'type' => 'text',
- 'exampleValue' => './/a[@class="ArticleLink ArticleLink"]/@href',
- 'defaultValue' => './/a[@class="ArticleLink ArticleLink"]/@href',
- 'required' => false
- ),
-
- 'author' => array(
- 'name' => 'Item author selector',
- 'title' => <<<"EOL"
+ , 'type' => 'text',
+ 'exampleValue' => './/a[@class="ArticleLink ArticleLink"]/@href',
+ 'defaultValue' => './/a[@class="ArticleLink ArticleLink"]/@href',
+ 'required' => false
+ ],
+
+ 'author' => [
+ 'name' => 'Item author selector',
+ 'title' => <<<"EOL"
This expression should match a node contained within each article item
node containing the article author's name. It should start with a dot
followed by two forward slashes, referring to any descendant nodes of
the article item node.
EOL
- , 'type' => 'text',
- 'required' => false
- ),
+ , 'type' => 'text',
+ 'required' => false
+ ],
- 'timestamp' => array(
- 'name' => 'Item date selector',
- 'title' => <<<"EOL"
+ 'timestamp' => [
+ 'name' => 'Item date selector',
+ 'title' => <<<"EOL"
This expression should match a node or node's attribute containing the
article timestamp or date (parsable by PHP's strtotime function). It
should start with a dot followed by two forward slashes, referring to
any descendant nodes of the article item node. Attributes can be
selected by prepending an @ char before the attributes name.
EOL
- , 'type' => 'text',
- 'exampleValue' => './/time[@class="ArticleListItem-footerTimestamp"]/@timestamp',
- 'defaultValue' => './/time[@class="ArticleListItem-footerTimestamp"]/@timestamp',
- 'required' => false
- ),
-
- 'enclosures' => array(
- 'name' => 'Item image selector',
- 'title' => <<<"EOL"
+ , 'type' => 'text',
+ 'exampleValue' => './/time[@class="ArticleListItem-footerTimestamp"]/@timestamp',
+ 'defaultValue' => './/time[@class="ArticleListItem-footerTimestamp"]/@timestamp',
+ 'required' => false
+ ],
+
+ 'enclosures' => [
+ 'name' => 'Item image selector',
+ 'title' => <<<"EOL"
This expression should match a node's attribute containing an article
image URL (usually the src attribute of an &lt;img&gt; tag or a style
attribute). It should start with a dot followed by two forward slashes,
referring to any descendant nodes of the article item node. Attributes
can be selected by prepending an @ char before the attributes name.
EOL
- , 'type' => 'text',
- 'exampleValue' => './/div[@class="ArticleListItem-image"]/@style',
- 'defaultValue' => './/div[@class="ArticleListItem-image"]/@style',
- 'required' => false
- ),
-
- 'categories' => array(
- 'name' => 'Item category selector',
- 'title' => <<<"EOL"
+ , 'type' => 'text',
+ 'exampleValue' => './/div[@class="ArticleListItem-image"]/@style',
+ 'defaultValue' => './/div[@class="ArticleListItem-image"]/@style',
+ 'required' => false
+ ],
+
+ 'categories' => [
+ 'name' => 'Item category selector',
+ 'title' => <<<"EOL"
This expression should match a node or node's attribute contained
within each article item node containing the article category. This
could be inside &lt;div&gt; or &lt;span&gt; tags or sometimes be hidden
@@ -130,122 +131,134 @@ forward slashes, referring to any descendant nodes of the article
item node. Attributes can be selected by prepending an @ char
before the attributes name.
EOL
- , 'type' => 'text',
- 'exampleValue' => './/div[@class="ArticleListItem-label"]',
- 'defaultValue' => './/div[@class="ArticleListItem-label"]',
- 'required' => false
- ),
-
- 'fix_encoding' => array(
- 'name' => 'Fix encoding',
- 'title' => <<<"EOL"
+ , 'type' => 'text',
+ 'exampleValue' => './/div[@class="ArticleListItem-label"]',
+ 'defaultValue' => './/div[@class="ArticleListItem-label"]',
+ 'required' => false
+ ],
+
+ 'fix_encoding' => [
+ 'name' => 'Fix encoding',
+ 'title' => <<<"EOL"
Check this to fix feed encoding by invoking PHP's utf8_decode
function on all extracted texts. Try this in case you see "broken" or
"weird" characters in your feed where you'd normally expect umlauts
or any other non-ascii characters.
EOL
- , 'type' => 'checkbox',
- 'required' => false
- ),
-
- )
- );
-
- /**
- * Source Web page URL (should provide either HTML or XML content)
- * @return string
- */
- protected function getSourceUrl(){
- return $this->encodeUri($this->getInput('url'));
- }
-
- /**
- * XPath expression for extracting the feed items from the source page
- * @return string
- */
- protected function getExpressionItem(){
- return urldecode($this->getInput('item'));
- }
-
- /**
- * XPath expression for extracting an item title from the item context
- * @return string
- */
- protected function getExpressionItemTitle(){
- return urldecode($this->getInput('title'));
- }
-
- /**
- * XPath expression for extracting an item's content from the item context
- * @return string
- */
- protected function getExpressionItemContent(){
- return urldecode($this->getInput('content'));
- }
-
- /**
- * XPath expression for extracting an item link from the item context
- * @return string
- */
- protected function getExpressionItemUri(){
- return urldecode($this->getInput('uri'));
- }
-
- /**
- * XPath expression for extracting an item author from the item context
- * @return string
- */
- protected function getExpressionItemAuthor(){
- return urldecode($this->getInput('author'));
- }
-
- /**
- * XPath expression for extracting an item timestamp from the item context
- * @return string
- */
- protected function getExpressionItemTimestamp(){
- return urldecode($this->getInput('timestamp'));
- }
-
- /**
- * XPath expression for extracting item enclosures (media content like
- * images or movies) from the item context
- * @return string
- */
- protected function getExpressionItemEnclosures(){
- return urldecode($this->getInput('enclosures'));
- }
-
- /**
- * XPath expression for extracting an item category from the item context
- * @return string
- */
- protected function getExpressionItemCategories(){
- return urldecode($this->getInput('categories'));
- }
-
- /**
- * Fix encoding
- * @return string
- */
- protected function getSettingFixEncoding(){
- return $this->getInput('fix_encoding');
- }
-
- /**
- * Fixes URL encoding issues in input URL's
- * @param $uri
- * @return string|string[]
- */
- private function encodeUri($uri)
- {
- if (strpos($uri, 'https%3A%2F%2F') === 0
- || strpos($uri, 'http%3A%2F%2F') === 0) {
- $uri = urldecode($uri);
- }
-
- $uri = str_replace('|', '%7C', $uri);
-
- return $uri;
- }
+ , 'type' => 'checkbox',
+ 'required' => false
+ ],
+
+ ]
+ ];
+
+ /**
+ * Source Web page URL (should provide either HTML or XML content)
+ * @return string
+ */
+ protected function getSourceUrl()
+ {
+ return $this->encodeUri($this->getInput('url'));
+ }
+
+ /**
+ * XPath expression for extracting the feed items from the source page
+ * @return string
+ */
+ protected function getExpressionItem()
+ {
+ return urldecode($this->getInput('item'));
+ }
+
+ /**
+ * XPath expression for extracting an item title from the item context
+ * @return string
+ */
+ protected function getExpressionItemTitle()
+ {
+ return urldecode($this->getInput('title'));
+ }
+
+ /**
+ * XPath expression for extracting an item's content from the item context
+ * @return string
+ */
+ protected function getExpressionItemContent()
+ {
+ return urldecode($this->getInput('content'));
+ }
+
+ /**
+ * XPath expression for extracting an item link from the item context
+ * @return string
+ */
+ protected function getExpressionItemUri()
+ {
+ return urldecode($this->getInput('uri'));
+ }
+
+ /**
+ * XPath expression for extracting an item author from the item context
+ * @return string
+ */
+ protected function getExpressionItemAuthor()
+ {
+ return urldecode($this->getInput('author'));
+ }
+
+ /**
+ * XPath expression for extracting an item timestamp from the item context
+ * @return string
+ */
+ protected function getExpressionItemTimestamp()
+ {
+ return urldecode($this->getInput('timestamp'));
+ }
+
+ /**
+ * XPath expression for extracting item enclosures (media content like
+ * images or movies) from the item context
+ * @return string
+ */
+ protected function getExpressionItemEnclosures()
+ {
+ return urldecode($this->getInput('enclosures'));
+ }
+
+ /**
+ * XPath expression for extracting an item category from the item context
+ * @return string
+ */
+ protected function getExpressionItemCategories()
+ {
+ return urldecode($this->getInput('categories'));
+ }
+
+ /**
+ * Fix encoding
+ * @return string
+ */
+ protected function getSettingFixEncoding()
+ {
+ return $this->getInput('fix_encoding');
+ }
+
+ /**
+ * Fixes URL encoding issues in input URL's
+ * @param $uri
+ * @return string|string[]
+ */
+ private function encodeUri($uri)
+ {
+ if (
+ strpos($uri, 'https%3A%2F%2F') === 0
+ || strpos($uri, 'http%3A%2F%2F') === 0
+ ) {
+ $uri = urldecode($uri);
+ }
+
+ $uri = str_replace('|', '%7C', $uri);
+
+ return $uri;
+ }
}