aboutsummaryrefslogtreecommitdiff
path: root/bridges/HarvardHealthBlogBridge.php
blob: bb6a5ede419a8e47c2b3c5730cf8e110fbf96889 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
<?php

class HarvardHealthBlogBridge extends BridgeAbstract
{
    const NAME = 'Harvard Health Blog';
    const URI = 'https://www.health.harvard.edu/blog';
    const DESCRIPTION = 'Retrieve articles from health.harvard.edu';
    const MAINTAINER = 'tillcash';
    const MAX_ARTICLES = 10;
    const PARAMETERS = [
        [
            'image' => [
                'name' => 'Article Image',
                'type' => 'checkbox',
                'defaultValue' => 'checked',
            ],
        ],
    ];

    public function collectData()
    {
        $dom = getSimpleHTMLDOM(self::URI);
        $count = 0;

        foreach ($dom->find('div[class="mb-16 md:flex"]') as $element) {
            if ($count >= self::MAX_ARTICLES) {
                break;
            }

            $data = $element->find('a[class="hover:text-red transition-colors duration-200"]', 0);
            if (!$data) {
                continue;
            }

            $url = $data->href;

            $this->items[] = [
                'content'   => $this->constructContent($url),
                'timestamp' => $element->find('time', 0)->datetime,
                'title'     => $data->plaintext,
                'uid'       => $url,
                'uri'       => $url,
            ];

            $count++;
        }
    }

    private function constructContent($url)
    {
        $dom = getSimpleHTMLDOMCached($url);

        $article = $dom->find('div[class*="content-repository-content"]', 0);
        if (!$article) {
            return 'Content Not Found';
        }

        // remove article image
        if (!$this->getInput('image')) {
            $image = $article->find('p', 0);
            $image->remove();
        }

        // remove ads
        foreach ($article->find('.inline-ad') as $ad) {
            $ad->outertext = '';
        }

        return $article->innertext;
    }
}