diff options
author | 2024-09-19 21:24:16 +0300 | |
---|---|---|
committer | 2024-09-19 21:24:16 +0300 | |
commit | c74882f54eeca1d788c0eaf1d1adbf159ecaebdb (patch) | |
tree | f39fadb0ee4b73ce6d7b55b6610070300ef86343 | |
parent | f137cee95088c8c586bff1d13c63889689597bc3 (diff) | |
parent | 5e7b1ec1cdab0e3732468283fecffd7ae4260b79 (diff) | |
download | newsboat-c74882f54eeca1d788c0eaf1d1adbf159ecaebdb.tar.gz newsboat-c74882f54eeca1d788c0eaf1d1adbf159ecaebdb.tar.zst newsboat-c74882f54eeca1d788c0eaf1d1adbf159ecaebdb.zip |
Merge pull request #2834 from mikhailyumanov/master
Handling author name ending with ')'
-rw-r--r-- | rss/rss09xparser.cpp | 8 | ||||
-rw-r--r-- | test/data/rss_091_with_bracket_author.xml | 32 | ||||
-rw-r--r-- | test/data/rss_092_with_bracket_author.xml | 29 | ||||
-rw-r--r-- | test/data/rss_094_with_bracket_author.xml | 29 | ||||
-rw-r--r-- | test/rsspp_parser.cpp | 54 |
5 files changed, 150 insertions, 2 deletions
diff --git a/rss/rss09xparser.cpp b/rss/rss09xparser.cpp index 197e64b3..2de3209c 100644 --- a/rss/rss09xparser.cpp +++ b/rss/rss09xparser.cpp @@ -106,8 +106,12 @@ Item Rss09xParser::parse_item(xmlNode* itemNode) start > 0 && authorfield[start] != '('; start--) { } - it.author = authorfield.substr( - start + 1, end - start); + if (start == 0) { + it.author_email = authorfield; + it.author = authorfield; + } else { + it.author = authorfield.substr(start + 1, end - start); + } } else { it.author_email = authorfield; it.author = authorfield; diff --git a/test/data/rss_091_with_bracket_author.xml b/test/data/rss_091_with_bracket_author.xml new file mode 100644 index 00000000..2707bcb2 --- /dev/null +++ b/test/data/rss_091_with_bracket_author.xml @@ -0,0 +1,32 @@ +<?xml version="1.0" ?> +<!DOCTYPE rss + SYSTEM 'http://my.netscape.com/publish/formats/rss-0.91.dtd'> +<rss version="0.91"> + <channel> + <title>A Channel with Authors With Names Containing Brackets</title> + <link>http://example.com/</link> + <description>an example feed</description> + <language>en</language> + + <item> + <title>This one has an author name ending with a closing bracket</title> + <link>http://example.com/test_1.html</link> + <author>Author name)</author> + <description>Non-empty description.</description> + </item> + + <item> + <title>This one has an author name with an email in brackets</title> + <link>http://example.com/test_2.html</link> + <author>email@example.com (Author)</author> + <description>This is empty description (no).</description> + </item> + + <item> + <title>This one has an author name with a non-email next in brackets</title> + <link>http://example.com/test_3.html</link> + <author>Author (name)</author> + <description>This is empty description (yes (no)).</description> + </item> + </channel> +</rss> diff --git a/test/data/rss_092_with_bracket_author.xml b/test/data/rss_092_with_bracket_author.xml new file mode 100644 index 00000000..cb1e4fa4 --- /dev/null +++ b/test/data/rss_092_with_bracket_author.xml @@ -0,0 +1,29 @@ +<rss version="0.92" xml:base="http://example.com/feed/rss_testing.html"> + <channel> + <title>A Channel with Authors With Names Containing Brackets</title> + <link>http://example.com/</link> + <description>an example feed</description> + <language>en</language> + + <item> + <title>This one has an author name ending with a closing bracket</title> + <link>http://example.com/test_1.html</link> + <author>Author name)</author> + <description>Non-empty description.</description> + </item> + + <item> + <title>This one has an author name with an email in brackets</title> + <link>http://example.com/test_2.html</link> + <author>email@example.com (Author)</author> + <description>This is empty description (no).</description> + </item> + + <item> + <title>This one has an author name with a non-email next in brackets</title> + <link>http://example.com/test_3.html</link> + <author>Author (name)</author> + <description>This is empty description (yes (no)).</description> + </item> + </channel> +</rss> diff --git a/test/data/rss_094_with_bracket_author.xml b/test/data/rss_094_with_bracket_author.xml new file mode 100644 index 00000000..7ce4df11 --- /dev/null +++ b/test/data/rss_094_with_bracket_author.xml @@ -0,0 +1,29 @@ +<rss version="0.94" xml:base="http://example.com/feed/rss_testing.html"> + <channel> + <title>A Channel with Authors With Names Containing Brackets</title> + <link>http://example.com/</link> + <description>an example feed</description> + <language>en</language> + + <item> + <title>This one has an author name ending with a closing bracket</title> + <link>http://example.com/test_1.html</link> + <author>Author name)</author> + <description>Non-empty description.</description> + </item> + + <item> + <title>This one has an author name with an email in brackets</title> + <link>http://example.com/test_2.html</link> + <author>email@example.com (Author)</author> + <description>This is empty description (no).</description> + </item> + + <item> + <title>This one has an author name with a non-email next in brackets</title> + <link>http://example.com/test_3.html</link> + <author>Author (name)</author> + <description>This is empty description (yes (no)).</description> + </item> + </channel> +</rss> diff --git a/test/rsspp_parser.cpp b/test/rsspp_parser.cpp index 4abbe8fe..7840654e 100644 --- a/test/rsspp_parser.cpp +++ b/test/rsspp_parser.cpp @@ -103,6 +103,60 @@ TEST_CASE("Doesn't crash or garble data if an item in RSS 0.9x contains " } } +TEST_CASE("Doesn't crash or garble data if an item in RSS 0.9x contains " + "an author tag which ends with a bracket", + "[rsspp::Parser][issue2834]") +{ + rsspp::Parser p; + rsspp::Feed f; + + const auto check = [&]() { + REQUIRE(f.title == "A Channel with Authors With Names Containing Brackets"); + REQUIRE(f.description == "an example feed"); + REQUIRE(f.link == "http://example.com/"); + REQUIRE(f.language == "en"); + + REQUIRE(f.items.size() == 3u); + + REQUIRE(f.items[0].title == "This one has an author name ending with a closing bracket"); + REQUIRE(f.items[0].link == "http://example.com/test_1.html"); + REQUIRE(f.items[0].description == "Non-empty description."); + REQUIRE(f.items[0].author == "Author name)"); + REQUIRE(f.items[0].guid == ""); + + REQUIRE(f.items[1].title == "This one has an author name with an email in brackets"); + REQUIRE(f.items[1].link == "http://example.com/test_2.html"); + REQUIRE(f.items[1].description == "This is empty description (no)."); + REQUIRE(f.items[1].author == "Author"); + REQUIRE(f.items[1].guid == ""); + + REQUIRE(f.items[2].title == + "This one has an author name with a non-email next in brackets"); + REQUIRE(f.items[2].link == "http://example.com/test_3.html"); + REQUIRE(f.items[2].description == "This is empty description (yes (no))."); + REQUIRE(f.items[2].author == "name"); + REQUIRE(f.items[2].guid == ""); + }; + + SECTION("RSS 0.91") { + REQUIRE_NOTHROW(f = p.parse_file("data/rss_091_with_bracket_author.xml")); + REQUIRE(f.rss_version == rsspp::Feed::RSS_0_91); + check(); + } + + SECTION("RSS 0.92") { + REQUIRE_NOTHROW(f = p.parse_file("data/rss_092_with_bracket_author.xml")); + REQUIRE(f.rss_version == rsspp::Feed::RSS_0_92); + check(); + } + + SECTION("RSS 0.94") { + REQUIRE_NOTHROW(f = p.parse_file("data/rss_094_with_bracket_author.xml")); + REQUIRE(f.rss_version == rsspp::Feed::RSS_0_94); + check(); + } +} + TEST_CASE("Extracts data from RSS 0.92", "[rsspp::Parser]") { rsspp::Parser p; |