aboutsummaryrefslogtreecommitdiff
path: root/bridges/BundestagParteispendenBridge.php
blob: 773f9129e72741cc75411739be85ac289dcd6886 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
<?php

class BundestagParteispendenBridge extends BridgeAbstract
{
    const MAINTAINER = 'mibe';
    const NAME = 'Deutscher Bundestag - Parteispenden';
    const URI = 'https://www.bundestag.de/parlament/praesidium/parteienfinanzierung/fundstellen50000';

    const CACHE_TIMEOUT = 86400; // 24h
    const DESCRIPTION = 'Returns the latest "soft money" donations to parties represented in the German Bundestag.';
    const CONTENT_TEMPLATE = <<<TMPL
<p><b>Partei:</b><br>%s</p>
<p><b>Spendenbetrag:</b><br>%s</p>
<p><b>Spender:</b><br>%s</p>
<p><b>Eingang der Spende:</b><br>%s</p>
TMPL;

    public function getIcon()
    {
        return 'https://www.bundestag.de/static/appdata/includes/images/layout/favicon.ico';
    }

    public function collectData()
    {
        $ajaxUri = <<<URI
https://www.bundestag.de/ajax/filterlist/de/parlament/praesidium/parteienfinanzierung/fundstellen50000/462002-462002
URI;
        // Get the main page
        $html = getSimpleHTMLDOMCached($ajaxUri, self::CACHE_TIMEOUT);

        // Build the URL from the first anchor element. The list is sorted by year, descending, so the first element is the current year.
        $firstAnchor = $html->find('a', 0)
            or returnServerError('Could not find the proper HTML element.');

        $url = $firstAnchor->href;

        // Get the actual page with the soft money donations
        $html = getSimpleHTMLDOMCached($url, self::CACHE_TIMEOUT);

        $rows = $html->find('table.table > tbody > tr')
            or returnServerError('Could not find the proper HTML elements.');

        foreach ($rows as $row) {
            $item = $this->generateItemFromRow($row);
            if (is_array($item)) {
                $item['uri'] = $url;
                $this->items[] = $item;
            }
        }
    }

    private function generateItemFromRow(simple_html_dom_node $row)
    {
        // The row must have 5 columns. There are monthly header rows, which are ignored here.
        if (count($row->children) != 5) {
            return null;
        }

        $item = [];

        //              | column     | paragraph inside column
        $party  = $row->children[0]->children[0]->innertext;
        $amount = $row->children[1]->children[0]->innertext . ' €';
        $donor  = $row->children[2]->children[0]->innertext;
        $date   = $row->children[3]->children[0]->innertext;
        $dip    = $row->children[4]->children[0]->find('a.dipLink', 0);

        // Strip whitespace from date string.
        $date = str_replace(' ', '', $date);

        $content = sprintf(self::CONTENT_TEMPLATE, $party, $amount, $donor, $date);

        $item = [
            'title' => $party . ': ' . $amount,
            'content' => $content,
            'uid' => sha1($content),
            ];

        // Try to get the link to the official document
        if ($dip != null) {
            $item['enclosures'] = [$dip->href];
        }

        // Try to parse the date
        $dateTime = DateTime::createFromFormat('d.m.Y', $date);
        if ($dateTime !== false) {
            $item['timestamp'] = $dateTime->getTimestamp();
        }

        return $item;
    }
}