aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--actions/DetectAction.php28
-rw-r--r--actions/FindfeedAction.php6
-rw-r--r--bridges/BMDSystemhausBlogBridge.php11
-rw-r--r--bridges/FirefoxAddonsBridge.php40
-rw-r--r--lib/FeedExpander.php12
5 files changed, 56 insertions, 41 deletions
diff --git a/actions/DetectAction.php b/actions/DetectAction.php
index 99d299bb..0c61f1b6 100644
--- a/actions/DetectAction.php
+++ b/actions/DetectAction.php
@@ -4,14 +4,14 @@ class DetectAction implements ActionInterface
{
public function execute(Request $request)
{
- $targetURL = $request->get('url');
+ $url = $request->get('url');
$format = $request->get('format');
- if (!$targetURL) {
- throw new \Exception('You must specify a url!');
+ if (!$url) {
+ return new Response(render(__DIR__ . '/../templates/error.html.php', ['message' => 'You must specify a url']));
}
if (!$format) {
- throw new \Exception('You must specify a format!');
+ return new Response(render(__DIR__ . '/../templates/error.html.php', ['message' => 'You must specify a format']));
}
$bridgeFactory = new BridgeFactory();
@@ -23,19 +23,23 @@ class DetectAction implements ActionInterface
$bridge = $bridgeFactory->create($bridgeClassName);
- $bridgeParams = $bridge->detectParameters($targetURL);
+ $bridgeParams = $bridge->detectParameters($url);
- if (is_null($bridgeParams)) {
+ if (!$bridgeParams) {
continue;
}
- $bridgeParams['bridge'] = $bridgeClassName;
- $bridgeParams['format'] = $format;
-
- $url = '?action=display&' . http_build_query($bridgeParams);
- return new Response('', 301, ['location' => $url]);
+ $query = [
+ 'action' => 'display',
+ 'bridge' => $bridgeClassName,
+ 'format' => $format,
+ ];
+ $query = array_merge($query, $bridgeParams);
+ return new Response('', 301, ['location' => '?' . http_build_query($query)]);
}
- throw new \Exception('No bridge found for given URL: ' . $targetURL);
+ return new Response(render(__DIR__ . '/../templates/error.html.php', [
+ 'message' => 'No bridge found for given URL: ' . $url,
+ ]));
}
}
diff --git a/actions/FindfeedAction.php b/actions/FindfeedAction.php
index cd0a0c74..94dc6b72 100644
--- a/actions/FindfeedAction.php
+++ b/actions/FindfeedAction.php
@@ -9,10 +9,10 @@ class FindfeedAction implements ActionInterface
{
public function execute(Request $request)
{
- $targetURL = $request->get('url');
+ $url = $request->get('url');
$format = $request->get('format');
- if (!$targetURL) {
+ if (!$url) {
return new Response('You must specify a url', 400);
}
if (!$format) {
@@ -29,7 +29,7 @@ class FindfeedAction implements ActionInterface
$bridge = $bridgeFactory->create($bridgeClassName);
- $bridgeParams = $bridge->detectParameters($targetURL);
+ $bridgeParams = $bridge->detectParameters($url);
if ($bridgeParams === null) {
continue;
diff --git a/bridges/BMDSystemhausBlogBridge.php b/bridges/BMDSystemhausBlogBridge.php
index 327f165d..50ed4639 100644
--- a/bridges/BMDSystemhausBlogBridge.php
+++ b/bridges/BMDSystemhausBlogBridge.php
@@ -102,12 +102,17 @@ class BMDSystemhausBlogBridge extends BridgeAbstract
//-----------------------------------------------------
public function detectParameters($url)
{
- $parsed_url = parse_url($url);
- if ($parsed_url['host'] != 'www.bmd.com') {
+ try {
+ $parsedUrl = Url::fromString($url);
+ } catch (UrlException $e) {
return null;
}
- $path = explode('/', $parsed_url['path']);
+ if ($parsedUrl->getHost() != 'www.bmd.com') {
+ return null;
+ }
+
+ $path = explode('/', $parsedUrl->getPath());
if ($this->getURIbyCountry($path[1]) == '') {
return null;
diff --git a/bridges/FirefoxAddonsBridge.php b/bridges/FirefoxAddonsBridge.php
index c8fb10f8..fcf2ca02 100644
--- a/bridges/FirefoxAddonsBridge.php
+++ b/bridges/FirefoxAddonsBridge.php
@@ -19,23 +19,6 @@ class FirefoxAddonsBridge extends BridgeAbstract
const CACHE_TIMEOUT = 3600;
private $feedName = '';
- private $releaseDateRegex = '/Released ([\w, ]+) - ([\w. ]+)/';
- private $xpiFileRegex = '/([A-Za-z0-9_.-]+)\.xpi$/';
- private $outgoingRegex = '/https:\/\/prod.outgoing\.prod\.webservices\.mozgcp\.net\/v1\/(?:[A-z0-9]+)\//';
-
- private $urlRegex = '/addons\.mozilla\.org\/(?:[\w-]+\/)?firefox\/addon\/([\w-]+)/';
-
- public function detectParameters($url)
- {
- $params = [];
-
- if (preg_match($this->urlRegex, $url, $matches)) {
- $params['id'] = $matches[1];
- return $params;
- }
-
- return null;
- }
public function collectData()
{
@@ -52,7 +35,8 @@ class FirefoxAddonsBridge extends BridgeAbstract
$item['uri'] = $this->getURI();
$item['author'] = $author;
- if (preg_match($this->releaseDateRegex, $li->find('div.AddonVersionCard-fileInfo', 0)->plaintext, $match)) {
+ $releaseDateRegex = '/Released ([\w, ]+) - ([\w. ]+)/';
+ if (preg_match($releaseDateRegex, $li->find('div.AddonVersionCard-fileInfo', 0)->plaintext, $match)) {
$item['timestamp'] = $match[1];
$size = $match[2];
}
@@ -68,7 +52,8 @@ class FirefoxAddonsBridge extends BridgeAbstract
$releaseNotes = $this->removeLinkRedirects($li->find('div.AddonVersionCard-releaseNotes', 0));
- if (preg_match($this->xpiFileRegex, $downloadlink, $match)) {
+ $xpiFileRegex = '/([A-Za-z0-9_.-]+)\.xpi$/';
+ if (preg_match($xpiFileRegex, $downloadlink, $match)) {
$xpiFilename = $match[0];
}
@@ -110,10 +95,25 @@ EOD;
*/
private function removeLinkRedirects($html)
{
+ $outgoingRegex = '/https:\/\/prod.outgoing\.prod\.webservices\.mozgcp\.net\/v1\/(?:[A-z0-9]+)\//';
foreach ($html->find('a') as $a) {
- $a->href = urldecode(preg_replace($this->outgoingRegex, '', $a->href));
+ $a->href = urldecode(preg_replace($outgoingRegex, '', $a->href));
}
return $html->innertext;
}
+
+ public function detectParameters($url)
+ {
+ $params = [];
+
+ // Example: https://addons.mozilla.org/en-US/firefox/addon/ublock-origin
+ $pattern = '/addons\.mozilla\.org\/(?:[\w-]+\/)?firefox\/addon\/([\w-]+)/';
+ if (preg_match($pattern, $url, $matches)) {
+ $params['id'] = $matches[1];
+ return $params;
+ }
+
+ return null;
+ }
}
diff --git a/lib/FeedExpander.php b/lib/FeedExpander.php
index be3a8ca4..abe964e1 100644
--- a/lib/FeedExpander.php
+++ b/lib/FeedExpander.php
@@ -23,14 +23,20 @@ abstract class FeedExpander extends BridgeAbstract
throw new \Exception(sprintf('Unable to parse xml from `%s` because we got the empty string', $url), 10);
}
// prepare/massage the xml to make it more acceptable
- $badStrings = [
+ $problematicStrings = [
' ',
'»',
'’',
];
- $xmlString = str_replace($badStrings, '', $xmlString);
+ $xmlString = str_replace($problematicStrings, '', $xmlString);
+
$feedParser = new FeedParser();
- $this->feed = $feedParser->parseFeed($xmlString);
+ try {
+ $this->feed = $feedParser->parseFeed($xmlString);
+ } catch (\Exception $e) {
+ throw new \Exception(sprintf('Failed to parse xml from %s: %s', $url, create_sane_exception_message($e)));
+ }
+
$items = array_slice($this->feed['items'], 0, $maxItems);
// todo: extract parse logic out from FeedParser
foreach ($items as $item) {