aboutsummaryrefslogtreecommitdiff
path: root/bridges/UsenixBridge.php
blob: 4f785a0e8630f5723d6199e39d75004b8524696b (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
<?php
declare(strict_types=1);

final class UsenixBridge extends BridgeAbstract
{
	const NAME = 'USENIX';
	const URI = 'https://www.usenix.org/publications';
	const DESCRIPTION = 'Digital publications from USENIX (usenix.org)';
	const MAINTAINER = 'dvikan';
	const PARAMETERS = [
		'USENIX ;login:' => [
		],
	];

	public function collectData()
	{
		if ($this->queriedContext === 'USENIX ;login:') {
			$this->collectLoginOnlineItems();
			return;
		}
		returnClientError('Illegal Context');
	}

	private function collectLoginOnlineItems(): void
	{
		$url = 'https://www.usenix.org/publications/loginonline';
		$dom = getSimpleHTMLDOMCached($url);
		$items = $dom->find('div.view-content > div');

		foreach ($items as $item) {
			$title = $item->find('.views-field-title > span', 0);
			$author = $item->find('.views-field-pseudo-author-list > span.field-content', 0);
			$relativeUrl = $item->find('.views-field-nothing-1 > span > a', 0);
			$uri = sprintf('https://www.usenix.org%s', $relativeUrl->href);
			// June 2, 2022
			$createdAt = $item->find('div.views-field-field-lv2-publication-date > div > span', 0);

			$item = [
				'title' => $title->innertext,
				'author' => strstr($author->plaintext, ',', true) ?: $author->plaintext,
				'uri' => $uri,
				'timestamp' => $createdAt->innertext,
			];

			$this->items[] = array_merge($item, $this->getItemContent($uri));
		}
	}

	private function getItemContent(string $uri) : array
	{
		$html = getSimpleHTMLDOMCached($uri);
		$content = $html->find('.paragraphs-items-full', 0)->innertext;
		$extra = $html->find('fieldset', 0);
		if (!empty($extra)) {
			$content .= $extra->innertext;
		}

		$tags = [];
		foreach($html->find('.field-name-field-lv2-tags div.field-item') as $tag) {
			$tags[] = $tag->plaintext;
		}

		return [
			'content' => $content,
			'categories' => $tags
		];
	}
}