aboutsummaryrefslogtreecommitdiff
path: root/bridges/TldrTechBridge.php
blob: 222cd49ef2cd2a34f93c17c3af859f0c455722fc (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
<?php

declare(strict_types=1);

class TldrTechBridge extends BridgeAbstract
{
    const MAINTAINER = 'sqrtminusone';
    const NAME = 'TLDR Tech Newsletter Bridge';
    const URI = 'https://tldr.tech/';
    const DESCRIPTION = 'Return newsletter articles from TLDR Tech';

    const PARAMETERS = [
        '' => [
            'limit' => [
                'name' => 'Maximum number of articles to return',
                'type' => 'number',
                'required' => true,
                'defaultValue' => 10
            ],
            'topic' => [
                'name' => 'Topic',
                'type' => 'list',
                'values' => [
                    'Tech' => 'tech',
                    'Web Dev' => 'webdev',
                    'AI' => 'ai',
                    'Information Security' => 'infosec',
                    'Product Management' => 'product',
                    'DevOps' => 'devops',
                    'Crypto' => 'crypto',
                    'Design' => 'design',
                    'Marketing' => 'marketing',
                    'Founders' => 'founders',
                ],
                'defaultValue' => 'tech'
            ]
        ]
    ];

    public function collectData()
    {
        $topic = $this->getInput('topic');
        $limit = $this->getInput('limit');

        $url = self::URI . 'api/latest/' . $topic;
        $response = getContents($url, [], [], true);
        $location = $response->getHeader('Location');
        $locationUrl = Url::fromString($location);

        $this->extractItem($locationUrl);

        $archives_url = self::URI . $topic . '/archives';
        $archives_html = getSimpleHTMLDOM($archives_url);
        $entries_root = $archives_html->find('div.content-center.mt-5', 0);
        foreach ($entries_root->children() as $child) {
            if ($child->tag != 'a') {
                continue;
            }
            $itemUrl = Url::fromString(self::URI . ltrim($child->href, '/'));
            $this->extractItem($itemUrl);
            if (count($this->items) >= $limit) {
                break;
            }
        }
    }

    private function extractItem(Url $url)
    {
        $pathParts = explode('/', $url->getPath());
        $date = strtotime(end($pathParts));
        try {
            [$content, $title] = $this->extractContent($url);

            $this->items[] = [
                'uri'       => (string) $url,
                'title'     => $title,
                'timestamp' => $date,
                'content'   => $content,
            ];
        } catch (HttpException $e) {
            // archive occasionally returns broken URLs
            return;
        }
    }

    private function extractContent($url)
    {
        $html = getSimpleHTMLDOMCached($url);
        $content = $html->find('div.content-center.mt-5', 0);
        if (!$content) {
            throw new \Exception('Could not find content');
        }
        $subscribe_form = $content->find('div.mt-5 > div > form', 0);
        if ($subscribe_form) {
            $content->removeChild($subscribe_form->parent->parent);
        }
        $privacy_link = $content->find("a[href='/privacy']", 0);
        if ($privacy_link) {
            $content->removeChild($privacy_link->parent->parent);
        }
        $headers = $content->find('h6.text-center.font-bold');
        foreach ($headers as $header) {
            $elem = $html->createElement('h3', $header->parent->plaintext);
            $elem->style = 'margin-top: 1.2em; margin-bottom: 0.5em;';
            $header_root = $header->parent;
            foreach ($header_root->children() as $child) {
                $header_root->removeChild($child);
            }
            $header_root->appendChild($elem);
        }

        foreach ($content->find('a.font-bold') as $a) {
            $a->removeAttribute('class');
            $elem = $html->createElement('b', $a->plaintext);
            $a->removeChild($a->firstChild());
            $a->appendChild($elem);
        }
        foreach ($content->children() as $child) {
            if ($child->tag != 'div') {
                continue;
            }
            foreach ($child->children() as $grandchild) {
                if ($grandchild->tag == 'div') {
                    $grandchild->style = 'margin-bottom: 12px;';
                }
            }
        }
        $title = $content->find('h2', 0);
        return [$content->innertext, $title->plaintext];
    }
}