#include "rssparser.h" #include "3rd-party/catch.hpp" #include "cache.h" #include "rss/feed.h" #include "rss/item.h" #include "rssfeed.h" #include "rssignores.h" using namespace newsboat; TEST_CASE("parse() ignores uninitialized upstream feed", "[RssParser]") { ConfigContainer cfg; Cache rsscache(":memory:", &cfg); RssIgnores ignores; RssParser parser("http://example.com", rsscache, cfg, &ignores); rsspp::Feed upstream_feed; const auto feed = parser.parse(upstream_feed); REQUIRE(feed == nullptr); } TEST_CASE("parse() with no item GUID falls back to link+pubdate, link, and title", "[RssParser]") { ConfigContainer cfg; Cache rsscache(":memory:", &cfg); RssIgnores ignores; RssParser parser("http://example.com", rsscache, cfg, &ignores); rsspp::Feed upstream_feed; upstream_feed.rss_version = rsspp::Feed::ATOM_1_0; upstream_feed.items.push_back({}); rsspp::Item& upstream_item = upstream_feed.items[0]; upstream_item.guid = "a real GUID"; upstream_item.title = "title of article"; upstream_item.link = "https://example.com/blog/post"; upstream_item.pubDate = "2023-07-31"; SECTION("uses GUID if it is available") { const auto feed = parser.parse(upstream_feed); REQUIRE(feed != nullptr); REQUIRE(feed->items().size() == 1); REQUIRE(feed->items().front()->guid() == "a real GUID"); } SECTION("uses link+pubdate if GUID is not available") { upstream_item.guid.clear(); const auto feed = parser.parse(upstream_feed); REQUIRE(feed != nullptr); REQUIRE(feed->items().size() == 1); REQUIRE(feed->items().front()->guid() == "https://example.com/blog/post2023-07-31"); } SECTION("uses link if GUID and pubdate are not available") { upstream_item.guid.clear(); upstream_item.pubDate.clear(); const auto feed = parser.parse(upstream_feed); REQUIRE(feed != nullptr); REQUIRE(feed->items().size() == 1); REQUIRE(feed->items().front()->guid() == "https://example.com/blog/post"); } SECTION("uses title if other options are not available") { upstream_item.guid.clear(); upstream_item.pubDate.clear(); upstream_item.link.clear(); const auto feed = parser.parse(upstream_feed); REQUIRE(feed != nullptr); REQUIRE(feed->items().size() == 1); REQUIRE(feed->items().front()->guid() == "title of article"); } } TEST_CASE("parse() renders html titles into plaintext if type indicates html", "[RssParser]") { ConfigContainer cfg; Cache rsscache(":memory:", &cfg); RssIgnores ignores; RssParser parser("http://example.com", rsscache, cfg, &ignores); rsspp::Feed upstream_feed; upstream_feed.rss_version = rsspp::Feed::ATOM_1_0; upstream_feed.items.push_back({}); rsspp::Item& upstream_item = upstream_feed.items[0]; upstream_feed.title = "title of feed"; upstream_item.title = "title of article"; SECTION("uses feed title varbatim if no html type is indicated") { const auto feed = parser.parse(upstream_feed); REQUIRE(feed != nullptr); REQUIRE(feed->title() == "title of feed"); } SECTION("renders out feed title if html type is indicated") { upstream_feed.title_type = "html"; const auto feed = parser.parse(upstream_feed); REQUIRE(feed != nullptr); REQUIRE(feed->title() == "title of feed"); } SECTION("uses item title varbatim if no html type is indicated") { const auto feed = parser.parse(upstream_feed); REQUIRE(feed != nullptr); REQUIRE(feed->items().size() == 1); REQUIRE(feed->items().front()->title() == "title of article"); } SECTION("renders out item title if html type is indicated") { upstream_item.title_type = "html"; const auto feed = parser.parse(upstream_feed); REQUIRE(feed != nullptr); REQUIRE(feed->items().size() == 1); REQUIRE(feed->items().front()->title() == "title of article"); } } TEST_CASE("parse() generates a title when title element is missing", "[RssParser]") { ConfigContainer cfg; Cache rsscache(":memory:", &cfg); RssIgnores ignores; RssParser parser("http://example.com", rsscache, cfg, &ignores); rsspp::Feed upstream_feed; upstream_feed.rss_version = rsspp::Feed::ATOM_1_0; upstream_feed.items.push_back({}); rsspp::Item& upstream_item = upstream_feed.items[0]; upstream_item.description = "Just saying hello"; SECTION("creates a title from the URL") { upstream_item.link = "http://example.com/2023/08/29/hello-world.html"; const auto feed = parser.parse(upstream_feed); auto item = feed->items()[0]; REQUIRE(item->title() == "Hello world"); } SECTION("creates a title from the content if the URL is numeric") { upstream_item.link = "http://example.com/1234567"; SECTION("title from description") { const auto feed = parser.parse(upstream_feed); auto item = feed->items()[0]; REQUIRE(item->title() == "Just saying hello"); } SECTION("title from content_encoded") { upstream_item.description.clear(); upstream_item.content_encoded = "article text"; const auto feed = parser.parse(upstream_feed); auto item = feed->items()[0]; REQUIRE(item->title() == "article text"); } } } TEST_CASE("parse() extracts best enclosure", "[RssParser]") { ConfigContainer cfg; Cache rsscache(":memory:", &cfg); RssIgnores ignores; RssParser parser("http://example.com", rsscache, cfg, &ignores); rsspp::Feed upstream_feed; upstream_feed.rss_version = rsspp::Feed::ATOM_1_0; upstream_feed.items.push_back({}); rsspp::Item& upstream_item = upstream_feed.items[0]; const auto make_enclosure = []( const std::string& url, const std::string& type, const std::string& description, const std::string& mime ) -> rsspp::Enclosure { rsspp::Enclosure result; result.url = url; result.type = type; result.description = description; result.description_mime_type = mime; return result; }; const auto image_enclosure1 = make_enclosure( "http://example.com/enclosure1", "image/png", "description1", "text/plain" ); const auto image_enclosure2 = make_enclosure( "http://example.com/enclosure2", "image/jpg", "description2", "text/plain" ); const auto audio_enclosure = make_enclosure( "http://example.com/enclosure3", "audio/ogg", "description3", "text/plain" ); SECTION("podcast preferred over non-podcast enclosure") { const auto run_validation = [&]() { const auto feed = parser.parse(upstream_feed); REQUIRE(feed != nullptr); REQUIRE(feed->items().size() == 1); REQUIRE(feed->items()[0]->enclosure_url() == "http://example.com/enclosure3"); REQUIRE(feed->items()[0]->enclosure_type() == "audio/ogg"); REQUIRE(feed->items()[0]->enclosure_description() == "description3"); REQUIRE(feed->items()[0]->enclosure_description_mime_type() == "text/plain"); }; SECTION("podcast first") { upstream_item.enclosures.push_back(audio_enclosure); upstream_item.enclosures.push_back(image_enclosure1); run_validation(); } SECTION("podcast last") { upstream_item.enclosures.push_back(image_enclosure1); upstream_item.enclosures.push_back(audio_enclosure); run_validation(); } } SECTION("last enclosure picked if both are non-podcast enclosures") { upstream_item.enclosures.push_back(image_enclosure1); upstream_item.enclosures.push_back(image_enclosure2); const auto feed = parser.parse(upstream_feed); REQUIRE(feed != nullptr); REQUIRE(feed->items().size() == 1); REQUIRE(feed->items()[0]->enclosure_url() == "http://example.com/enclosure2"); REQUIRE(feed->items()[0]->enclosure_type() == "image/jpg"); REQUIRE(feed->items()[0]->enclosure_description() == "description2"); REQUIRE(feed->items()[0]->enclosure_description_mime_type() == "text/plain"); } }