summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGravatar Alexander Batischev <eual.jp@gmail.com> 2024-09-19 21:24:16 +0300
committerGravatar GitHub <noreply@github.com> 2024-09-19 21:24:16 +0300
commitc74882f54eeca1d788c0eaf1d1adbf159ecaebdb (patch)
treef39fadb0ee4b73ce6d7b55b6610070300ef86343
parentf137cee95088c8c586bff1d13c63889689597bc3 (diff)
parent5e7b1ec1cdab0e3732468283fecffd7ae4260b79 (diff)
downloadnewsboat-c74882f54eeca1d788c0eaf1d1adbf159ecaebdb.tar.gz
newsboat-c74882f54eeca1d788c0eaf1d1adbf159ecaebdb.tar.zst
newsboat-c74882f54eeca1d788c0eaf1d1adbf159ecaebdb.zip
Merge pull request #2834 from mikhailyumanov/master
Handling author name ending with ')'
-rw-r--r--rss/rss09xparser.cpp8
-rw-r--r--test/data/rss_091_with_bracket_author.xml32
-rw-r--r--test/data/rss_092_with_bracket_author.xml29
-rw-r--r--test/data/rss_094_with_bracket_author.xml29
-rw-r--r--test/rsspp_parser.cpp54
5 files changed, 150 insertions, 2 deletions
diff --git a/rss/rss09xparser.cpp b/rss/rss09xparser.cpp
index 197e64b3..2de3209c 100644
--- a/rss/rss09xparser.cpp
+++ b/rss/rss09xparser.cpp
@@ -106,8 +106,12 @@ Item Rss09xParser::parse_item(xmlNode* itemNode)
start > 0 && authorfield[start] != '(';
start--) {
}
- it.author = authorfield.substr(
- start + 1, end - start);
+ if (start == 0) {
+ it.author_email = authorfield;
+ it.author = authorfield;
+ } else {
+ it.author = authorfield.substr(start + 1, end - start);
+ }
} else {
it.author_email = authorfield;
it.author = authorfield;
diff --git a/test/data/rss_091_with_bracket_author.xml b/test/data/rss_091_with_bracket_author.xml
new file mode 100644
index 00000000..2707bcb2
--- /dev/null
+++ b/test/data/rss_091_with_bracket_author.xml
@@ -0,0 +1,32 @@
+<?xml version="1.0" ?>
+<!DOCTYPE rss
+ SYSTEM 'http://my.netscape.com/publish/formats/rss-0.91.dtd'>
+<rss version="0.91">
+ <channel>
+ <title>A Channel with Authors With Names Containing Brackets</title>
+ <link>http://example.com/</link>
+ <description>an example feed</description>
+ <language>en</language>
+
+ <item>
+ <title>This one has an author name ending with a closing bracket</title>
+ <link>http://example.com/test_1.html</link>
+ <author>Author name)</author>
+ <description>Non-empty description.</description>
+ </item>
+
+ <item>
+ <title>This one has an author name with an email in brackets</title>
+ <link>http://example.com/test_2.html</link>
+ <author>email@example.com (Author)</author>
+ <description>This is empty description (no).</description>
+ </item>
+
+ <item>
+ <title>This one has an author name with a non-email next in brackets</title>
+ <link>http://example.com/test_3.html</link>
+ <author>Author (name)</author>
+ <description>This is empty description (yes (no)).</description>
+ </item>
+ </channel>
+</rss>
diff --git a/test/data/rss_092_with_bracket_author.xml b/test/data/rss_092_with_bracket_author.xml
new file mode 100644
index 00000000..cb1e4fa4
--- /dev/null
+++ b/test/data/rss_092_with_bracket_author.xml
@@ -0,0 +1,29 @@
+<rss version="0.92" xml:base="http://example.com/feed/rss_testing.html">
+ <channel>
+ <title>A Channel with Authors With Names Containing Brackets</title>
+ <link>http://example.com/</link>
+ <description>an example feed</description>
+ <language>en</language>
+
+ <item>
+ <title>This one has an author name ending with a closing bracket</title>
+ <link>http://example.com/test_1.html</link>
+ <author>Author name)</author>
+ <description>Non-empty description.</description>
+ </item>
+
+ <item>
+ <title>This one has an author name with an email in brackets</title>
+ <link>http://example.com/test_2.html</link>
+ <author>email@example.com (Author)</author>
+ <description>This is empty description (no).</description>
+ </item>
+
+ <item>
+ <title>This one has an author name with a non-email next in brackets</title>
+ <link>http://example.com/test_3.html</link>
+ <author>Author (name)</author>
+ <description>This is empty description (yes (no)).</description>
+ </item>
+ </channel>
+</rss>
diff --git a/test/data/rss_094_with_bracket_author.xml b/test/data/rss_094_with_bracket_author.xml
new file mode 100644
index 00000000..7ce4df11
--- /dev/null
+++ b/test/data/rss_094_with_bracket_author.xml
@@ -0,0 +1,29 @@
+<rss version="0.94" xml:base="http://example.com/feed/rss_testing.html">
+ <channel>
+ <title>A Channel with Authors With Names Containing Brackets</title>
+ <link>http://example.com/</link>
+ <description>an example feed</description>
+ <language>en</language>
+
+ <item>
+ <title>This one has an author name ending with a closing bracket</title>
+ <link>http://example.com/test_1.html</link>
+ <author>Author name)</author>
+ <description>Non-empty description.</description>
+ </item>
+
+ <item>
+ <title>This one has an author name with an email in brackets</title>
+ <link>http://example.com/test_2.html</link>
+ <author>email@example.com (Author)</author>
+ <description>This is empty description (no).</description>
+ </item>
+
+ <item>
+ <title>This one has an author name with a non-email next in brackets</title>
+ <link>http://example.com/test_3.html</link>
+ <author>Author (name)</author>
+ <description>This is empty description (yes (no)).</description>
+ </item>
+ </channel>
+</rss>
diff --git a/test/rsspp_parser.cpp b/test/rsspp_parser.cpp
index 4abbe8fe..7840654e 100644
--- a/test/rsspp_parser.cpp
+++ b/test/rsspp_parser.cpp
@@ -103,6 +103,60 @@ TEST_CASE("Doesn't crash or garble data if an item in RSS 0.9x contains "
}
}
+TEST_CASE("Doesn't crash or garble data if an item in RSS 0.9x contains "
+ "an author tag which ends with a bracket",
+ "[rsspp::Parser][issue2834]")
+{
+ rsspp::Parser p;
+ rsspp::Feed f;
+
+ const auto check = [&]() {
+ REQUIRE(f.title == "A Channel with Authors With Names Containing Brackets");
+ REQUIRE(f.description == "an example feed");
+ REQUIRE(f.link == "http://example.com/");
+ REQUIRE(f.language == "en");
+
+ REQUIRE(f.items.size() == 3u);
+
+ REQUIRE(f.items[0].title == "This one has an author name ending with a closing bracket");
+ REQUIRE(f.items[0].link == "http://example.com/test_1.html");
+ REQUIRE(f.items[0].description == "Non-empty description.");
+ REQUIRE(f.items[0].author == "Author name)");
+ REQUIRE(f.items[0].guid == "");
+
+ REQUIRE(f.items[1].title == "This one has an author name with an email in brackets");
+ REQUIRE(f.items[1].link == "http://example.com/test_2.html");
+ REQUIRE(f.items[1].description == "This is empty description (no).");
+ REQUIRE(f.items[1].author == "Author");
+ REQUIRE(f.items[1].guid == "");
+
+ REQUIRE(f.items[2].title ==
+ "This one has an author name with a non-email next in brackets");
+ REQUIRE(f.items[2].link == "http://example.com/test_3.html");
+ REQUIRE(f.items[2].description == "This is empty description (yes (no)).");
+ REQUIRE(f.items[2].author == "name");
+ REQUIRE(f.items[2].guid == "");
+ };
+
+ SECTION("RSS 0.91") {
+ REQUIRE_NOTHROW(f = p.parse_file("data/rss_091_with_bracket_author.xml"));
+ REQUIRE(f.rss_version == rsspp::Feed::RSS_0_91);
+ check();
+ }
+
+ SECTION("RSS 0.92") {
+ REQUIRE_NOTHROW(f = p.parse_file("data/rss_092_with_bracket_author.xml"));
+ REQUIRE(f.rss_version == rsspp::Feed::RSS_0_92);
+ check();
+ }
+
+ SECTION("RSS 0.94") {
+ REQUIRE_NOTHROW(f = p.parse_file("data/rss_094_with_bracket_author.xml"));
+ REQUIRE(f.rss_version == rsspp::Feed::RSS_0_94);
+ check();
+ }
+}
+
TEST_CASE("Extracts data from RSS 0.92", "[rsspp::Parser]")
{
rsspp::Parser p;