aboutsummaryrefslogtreecommitdiff
path: root/bridges/ScalableCapitalBlogBridge.php
blob: d95431c6267a7aac01ee3469ab70a80679158e8e (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
<?php

use Facebook\WebDriver\WebDriverBy;
use Facebook\WebDriver\WebDriverExpectedCondition;

class ScalableCapitalBlogBridge extends WebDriverAbstract
{
    const NAME = 'Scalable Capital Blog';
    const URI = 'https://de.scalable.capital/blog';
    const DESCRIPTION = 'Alle Artikel';
    const MAINTAINER = 'hleskien';

    /**
     * Adds accept language german to the Chrome Options.
     *
     * @return Facebook\WebDriver\Chrome\ChromeOptions
     */
    protected function getBrowserOptions()
    {
        $chromeOptions = parent::getBrowserOptions();
        $chromeOptions->addArguments(['--accept-lang=de']);
        return $chromeOptions;
    }

    /**
     * Puts the content of the first page into the $items array.
     *
     * @throws Facebook\WebDriver\Exception\NoSuchElementException
     * @throws Facebook\WebDriver\Exception\TimeoutException
     */
    public function collectData()
    {
        parent::collectData();

        try {
            // wait until last item is loaded
            $this->getDriver()->wait()->until(WebDriverExpectedCondition::visibilityOfElementLocated(
                WebDriverBy::xpath('//div[contains(@class, "articles")]//div[@class="items"]//div[contains(@class, "item")][15]')
            ));
            $this->setIcon($this->getDriver()->findElement(WebDriverBy::xpath('//link[@rel="shortcut icon"]'))->getAttribute('href'));

            $items = $this->getDriver()->findElements(WebDriverBy::xpath('//div[contains(@class, "articles")]//div[@class="items"]//div[contains(@class, "item")]'));
            foreach ($items as $item) {
                $feedItem = [];

                $feedItem['enclosures'] = ['https://de.scalable.capital' . $item->findElement(WebDriverBy::tagName('img'))->getAttribute('src')];

                $heading = $item->findElement(WebDriverBy::tagName('a'));
                $feedItem['title'] = $heading->getText();

                $feedItem['uri'] = 'https://de.scalable.capital' . $heading->getAttribute('href');
                $feedItem['content'] = $item->findElement(WebDriverBy::xpath('.//div[@class="summary"]'))->getText();

                $date = $item->findElement(WebDriverBy::xpath('.//div[@class="published-date"]'))->getText();
                $feedItem['timestamp'] = $this->formatItemTimestamp($date);

                $feedItem['author'] = $item->findElement(WebDriverBy::xpath('.//div[@class="author"]'))->getText();

                $this->items[] = $feedItem;
            }
        } finally {
            $this->cleanUp();
        }
    }

    /**
     * Converts the given date (dd.mm.yyyy) into a timestamp.
     *
     * @param $value string
     * @return int
     */
    protected function formatItemTimestamp($value)
    {
        $formatter = new IntlDateFormatter('de', IntlDateFormatter::LONG, IntlDateFormatter::NONE);
        return $formatter->parse($value);
    }
}