blob: 96319c97514e3c501800b2cee023f5ecfc6c9fd6 (
plain) (
blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
|
<?php
class TwitterEngineeringBridge extends FeedExpander
{
const MAINTAINER = 'corenting';
const NAME = 'Twitter Engineering Blog';
const URI = 'https://blog.twitter.com/engineering/';
const DESCRIPTION = 'Returns the newest articles.';
const CACHE_TIMEOUT = 21600; // 6h
public function collectData()
{
$url = 'https://blog.twitter.com/engineering/en_us/blog.rss';
$this->collectExpandableDatas($url);
}
protected function parseItem(array $item)
{
$dom = getSimpleHTMLDOMCached($item['uri']);
if (!$dom) {
$item['content'] .= '<p><em>Could not request ' . $this->getName() . ': ' . $item['uri'] . '</em></p>';
return $item;
}
$dom = defaultLinkTo($dom, $this->getURI());
$article_body = $dom->find('div.column.column-6', 0);
// Remove elements that are not part of article content
$unwanted_selector = 'div.bl02-blog-post-text-masthead, div.tweet-error-text, div.bl13-tweet-template';
foreach ($article_body->find($unwanted_selector) as $found) {
$found->outertext = '';
}
// Set src for images
foreach ($article_body->find('img') as $found) {
$found->setAttribute('src', $found->getAttribute('data-src'));
}
$item['content'] = $article_body;
$item['timestamp'] = strtotime($dom->find('span.b02-blog-post-no-masthead__date', 0)->innertext);
$item['categories'] = self::getCategoriesFromTags($dom);
return $item;
}
private static function getCategoriesFromTags($article_html)
{
$tags_list_items = [$article_html->find('.post__tags > ul > li')];
$categories = [];
foreach ($tags_list_items as $tag_list_item) {
foreach ($tag_list_item as $tag) {
$categories[] = trim($tag->plaintext);
}
}
return $categories;
}
public function getName()
{
// Else the original feed returns "English (US)" as the title
return 'Twitter Engineering Blog';
}
}
|