aboutsummaryrefslogtreecommitdiff
path: root/bridges/InstituteForTheStudyOfWarBridge.php
blob: 3f54094efa5e32abd6032933a5856dfe0d8added (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
<?php

class InstituteForTheStudyOfWarBridge extends BridgeAbstract
{
    const MAINTAINER = 'sqrtminusone';
    const NAME = 'Institute for the Study of War';
    const URI = 'https://www.understandingwar.org';

    const CACHE_TIMEOUT = 3600; // 1 hour
    const DESCRIPTION = 'Recent publications of the ISW.';

    const PARAMETERS = [
        '' => [
            'searchURL' => [
                'name' => 'Filter URL',
                'required' => false,
                'title' => 'Set a filter on https://www.understandingwar.org/publications and copy the URL parameters.'
            ],
        ]
    ];

    public function collectData()
    {
        $filter = $this->getInput('searchURL');
        $html = getSimpleHTMLDOM(self::URI . '/publications?' . $filter);
        $entries = $html->find('.view-content', 0);
        foreach ($entries->find('.views-row') as $entry) {
            $this->items[] = $this->processEntry($entry);
        }
    }

    private function processEntry($entry)
    {
        $h2 = $entry->find('h2', 0);
        $title = $h2->plaintext;
        $uri = $h2->find('a', 0)->href;

        $date_span = $entry->find('span.datespan', 0);
        list($date_string, $user) = explode('-', $date_span->innertext);
        $date = DateTime::createFromFormat('F d, Y', trim($date_string));

        $html = getSimpleHTMLDOMCached(self::URI . $uri);
        $content = $html->find('[property=content:encoded]', 0)->innertext;

        $enclosures = [];
        $pdfs_list = $html->find('.field-name-field-pdf-report', 0);
        if ($pdfs_list != null) {
            foreach ($pdfs_list->find('.field-item') as $pdf_item) {
                $a = $pdf_item->find('a', 0);
                array_push($enclosures, $a->href);
            }
        }

        return [
            'uri' => self::URI . $uri,
            'title' => $title,
            'uid' => $uri,
            'author' => trim($user),
            'timestamp' => $date->getTimestamp(),
            'content' => $content,
            'enclosures' => $enclosures
        ];
    }
}