aboutsummaryrefslogtreecommitdiff
path: root/bridges/LegifranceJOBridge.php
blob: cf8f9f7206ff38e44959bd82a49ebc7bd79197c5 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
<?php

class LegifranceJOBridge extends BridgeAbstract
{
    const MAINTAINER = 'Pierre Mazière';
    const NAME = 'Journal Officiel de la République Française';
    // This uri returns a snippet of js. Should probably be https://www.legifrance.gouv.fr/jorf/jo/
    const URI = 'https://www.legifrance.gouv.fr/affichJO.do';
    const DESCRIPTION = 'Returns the laws and decrees officially registered daily in France';

    const PARAMETERS = [];

    private $author;
    private $timestamp;
    private $uri;

    public function collectData()
    {
        $html = getSimpleHTMLDOM(self::URI);

        $title = $html->find('h2.titleJO', 0);

        //$this->author = trim($title->plaintext);
        $uri1 = $html->find('h2.titleELI', 0);
        //$uri = $uri1->plaintext;
        //$this->uri = trim(substr($uri, strpos($uri, 'https')));
        $this->timestamp = strtotime(substr($this->uri, strpos($this->uri, 'eli/jo/') + strlen('eli/jo/'), -5));

        foreach ($html->find('h3') as $section) {
            $subsections = $section->nextSibling()->find('h4');
            foreach ($subsections as $subsection) {
                $origins = $subsection->nextSibling()->find('h5');
                foreach ($origins as $origin) {
                    $this->items[] = $this->extractItem($section, $subsection, $origin);
                }
                if (!empty($origins)) {
                    continue;
                }
                $this->items[] = $this->extractItem($section, $subsection);
            }
            if (!empty($subsections)) {
                continue;
            }
            $this->items[] = $this->extractItem($section);
        }
    }

    private function extractItem($section, $subsection = null, $origin = null)
    {
        $item = [];
        $item['author'] = $this->author;
        $item['timestamp'] = $this->timestamp;
        $item['uri'] = $this->uri . '#' . count($this->items);
        $item['title'] = $section->plaintext;

        if (!is_null($origin)) {
            $item['title'] = '[ ' . $item['title'] . ' / ' . $subsection->plaintext . ' ] ' . $origin->plaintext;
            $data = $origin;
        } elseif (!is_null($subsection)) {
            $item['title'] = '[ ' . $item['title'] . ' ] ' . $subsection->plaintext;
            $data = $subsection;
        } else {
            $data = $section;
        }

        $item['content'] = '';
        foreach ($data->nextSibling()->find('a') as $content) {
            $text = $content->plaintext;
            $href = '';
            //$href = $content->nextSibling()->getAttribute('resource');

            $item['content'] .= '<p><a href="' . $href . '">' . $text . '</a></p>';
        }
        return $item;
    }

    public function getIcon()
    {
        return 'https://www.legifrance.gouv.fr/img/favicon.ico';
    }
}