aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGravatar Dag <me@dvikan.no> 2025-01-04 19:00:26 +0100
committerGravatar GitHub <noreply@github.com> 2025-01-04 19:00:26 +0100
commit48cb7d71ed982e6b4145a825eb7c5579b9023999 (patch)
tree88f7a24e72f187d7826058ba4e20a84fa2be4c71
parentf9e9c8101e770126c151d98356de689279f64a9a (diff)
downloadrss-bridge-48cb7d71ed982e6b4145a825eb7c5579b9023999.tar.gz
rss-bridge-48cb7d71ed982e6b4145a825eb7c5579b9023999.tar.zst
rss-bridge-48cb7d71ed982e6b4145a825eb7c5579b9023999.zip
feat(telegram): add pagination fetching of messages (#4394)
* feat(telegram): add pagination fetching of messages * docs
-rw-r--r--bridges/TelegramBridge.php88
-rw-r--r--config.default.ini.php5
-rw-r--r--docs/10_Bridge_Specific/Telegram.md12
3 files changed, 74 insertions, 31 deletions
diff --git a/bridges/TelegramBridge.php b/bridges/TelegramBridge.php
index 81c5aeb9..1f82c606 100644
--- a/bridges/TelegramBridge.php
+++ b/bridges/TelegramBridge.php
@@ -15,6 +15,14 @@ class TelegramBridge extends BridgeAbstract
]
]
];
+
+ const CONFIGURATION = [
+ 'max_pages' => [
+ 'required' => false,
+ 'defaultValue' => 1,
+ ],
+ ];
+
const TEST_DETECT_PARAMETERS = [
'https://t.me/s/rssbridge' => ['username' => 'rssbridge'],
'https://t.me/rssbridge' => ['username' => 'rssbridge'],
@@ -26,7 +34,7 @@ class TelegramBridge extends BridgeAbstract
'https://rssbridge.t.me/' => ['username' => 'rssbridge'],
];
- const CACHE_TIMEOUT = 60 * 15; // 15 mins
+ const CACHE_TIMEOUT = 60 * 60; // 1h
private $feedName = '';
private $enclosures = [];
@@ -36,33 +44,56 @@ class TelegramBridge extends BridgeAbstract
public function collectData()
{
- $html = getSimpleHTMLDOM($this->getURI());
-
- $channelTitle = $html->find('div.tgme_channel_info_header_title span', 0)->plaintext ?? '';
- $channelTitle = htmlspecialchars_decode($channelTitle, ENT_QUOTES);
- $this->feedName = $channelTitle . ' (@' . $this->normalizeUsername() . ')';
- $posts = $html->find('div.tgme_widget_message_wrap.js-widget_message_wrap');
- if (!$channelTitle && !$posts) {
- throw new \Exception('Unable to find channel. The channel is non-existing or non-public.');
- }
- foreach ($posts as $messageDiv) {
- $this->itemTitle = '';
- $this->enclosures = [];
- $item = [];
-
- $item['uri'] = $messageDiv->find('a.tgme_widget_message_date', 0)->href;
- $item['content'] = $this->processContent($messageDiv);
- $item['title'] = $this->itemTitle;
- $item['timestamp'] = $messageDiv->find('span.tgme_widget_message_meta', 0)->find('time', 0)->datetime;
- $item['enclosures'] = $this->enclosures;
-
- $messageOwner = $messageDiv->find('a.tgme_widget_message_owner_name', 0);
- if ($messageOwner) {
- $item['author'] = html_entity_decode(trim($messageOwner->plaintext), ENT_QUOTES);
+ $pages = 0;
+ $url = 'https://t.me/s/' . $this->normalizeUsername();
+
+ $max_pages = $this->getOption('max_pages');
+
+ // Hard-coded upper bound of 100 loops
+ while ($pages < $max_pages && $pages < 100) {
+ $pages++;
+
+ $dom = getSimpleHTMLDOM($url);
+
+ $channelTitle = $dom->find('div.tgme_channel_info_header_title span', 0)->plaintext ?? '';
+ $channelTitle = htmlspecialchars_decode($channelTitle, ENT_QUOTES);
+ $this->feedName = $channelTitle . ' (@' . $this->normalizeUsername() . ')';
+
+ $messages = $dom->find('div.tgme_widget_message_wrap.js-widget_message_wrap');
+ if (!$channelTitle && !$messages) {
+ throw new \Exception('Unable to find channel. The channel is non-existing or non-public.');
}
- $this->items[] = $item;
+ foreach (array_reverse($messages) as $message) {
+ $this->itemTitle = '';
+ $this->enclosures = [];
+
+ $item = [];
+
+ $item['uri'] = $message->find('a.tgme_widget_message_date', 0)->href;
+ $item['content'] = $this->processContent($message);
+ $item['title'] = $this->itemTitle;
+ $item['timestamp'] = $message->find('span.tgme_widget_message_meta', 0)->find('time', 0)->datetime;
+ $item['enclosures'] = $this->enclosures;
+
+ $messageOwner = $message->find('a.tgme_widget_message_owner_name', 0);
+ if ($messageOwner) {
+ $item['author'] = html_entity_decode(trim($messageOwner->plaintext), ENT_QUOTES);
+ }
+
+ array_unshift($this->items, $item);
+ }
+
+ $more = $dom->find('> div.tgme_widget_message_centered.js-messages_more_wrap a', 0);
+ if ($more && str_contains($more->href, 'before')) {
+ $url = 'https://t.me/' . $more->href;
+ } else {
+ break;
+ }
}
+
+ $this->logger->info(sprintf('Fetched %s messages from %s pages (%s)', count($this->items), $pages, $url));
+
$this->items = array_reverse($this->items);
}
@@ -369,12 +400,7 @@ EOD;
private function normalizeUsername()
{
- // todo: can be replaced with ltrim($username, '@');
- $username = $this->getInput('username');
- if (substr($username, 0, 1) === '@') {
- return substr($username, 1);
- }
- return $username;
+ return ltrim($this->getInput('username'), '@');
}
public function detectParameters($url)
diff --git a/config.default.ini.php b/config.default.ini.php
index c23372d9..1045d6c3 100644
--- a/config.default.ini.php
+++ b/config.default.ini.php
@@ -155,6 +155,11 @@ port = 11211
; --- Bridge specific configuration ------
+[TelegramBridge]
+
+; Max pages to fetch (1 page => 20 messages), min=1 max=100
+max_pages = 1
+
[DiscogsBridge]
; Sets the personal access token for interactions with Discogs. When
diff --git a/docs/10_Bridge_Specific/Telegram.md b/docs/10_Bridge_Specific/Telegram.md
new file mode 100644
index 00000000..528de788
--- /dev/null
+++ b/docs/10_Bridge_Specific/Telegram.md
@@ -0,0 +1,12 @@
+# TelegramBridge
+
+By default, it fetches a single page with up to 20 messages.
+
+To increase this limit, tweak the `max_pages` config:
+
+```ini
+[TelegramBridge]
+
+; Fetch a maximum of 3 pages (requires 3 http requests)
+max_pages = 3
+```