From 3908f2df862895dab4d6bf5b226558412f01ce56 Mon Sep 17 00:00:00 2001 From: "Mikhail Iumanov (tufsampla)" Date: Sat, 31 Aug 2024 04:52:33 +0400 Subject: fix authorfield handling in Rss09xParser::parse_item --- rss/rss09xparser.cpp | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/rss/rss09xparser.cpp b/rss/rss09xparser.cpp index 197e64b3..952bc3ac 100644 --- a/rss/rss09xparser.cpp +++ b/rss/rss09xparser.cpp @@ -106,8 +106,12 @@ Item Rss09xParser::parse_item(xmlNode* itemNode) start > 0 && authorfield[start] != '('; start--) { } - it.author = authorfield.substr( - start + 1, end - start); + if (start <= 0) { + it.author_email = authorfield; + it.author = authorfield; + } else { + it.author = authorfield.substr(start + 1, end - start); + } } else { it.author_email = authorfield; it.author = authorfield; -- cgit v1.2.3 From d7fba3fada1baf37c38b26f9d724c91b1fd67065 Mon Sep 17 00:00:00 2001 From: Mikhail Yumanov <31183195+mikhailyumanov@users.noreply.github.com> Date: Mon, 2 Sep 2024 01:14:19 +0400 Subject: Update rss/rss09xparser.cpp Co-authored-by: Alexander Batischev --- rss/rss09xparser.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rss/rss09xparser.cpp b/rss/rss09xparser.cpp index 952bc3ac..2de3209c 100644 --- a/rss/rss09xparser.cpp +++ b/rss/rss09xparser.cpp @@ -106,7 +106,7 @@ Item Rss09xParser::parse_item(xmlNode* itemNode) start > 0 && authorfield[start] != '('; start--) { } - if (start <= 0) { + if (start == 0) { it.author_email = authorfield; it.author = authorfield; } else { -- cgit v1.2.3 From a230a422022942a746c22863ee3c5cb21e441634 Mon Sep 17 00:00:00 2001 From: "Mikhail Iumanov (tufsampla)" Date: Mon, 2 Sep 2024 01:15:59 +0400 Subject: Add tests on author name ending with a closing bracket --- test/data/rss_091_with_bracket_author.xml | 32 +++++++++++++++++++ test/data/rss_092_with_bracket_author.xml | 29 +++++++++++++++++ test/data/rss_094_with_bracket_author.xml | 29 +++++++++++++++++ test/rsspp_parser.cpp | 53 +++++++++++++++++++++++++++++++ 4 files changed, 143 insertions(+) create mode 100644 test/data/rss_091_with_bracket_author.xml create mode 100644 test/data/rss_092_with_bracket_author.xml create mode 100644 test/data/rss_094_with_bracket_author.xml diff --git a/test/data/rss_091_with_bracket_author.xml b/test/data/rss_091_with_bracket_author.xml new file mode 100644 index 00000000..ba74a9cf --- /dev/null +++ b/test/data/rss_091_with_bracket_author.xml @@ -0,0 +1,32 @@ + + + + + A Channel with Authors With Names Containing Brackets + http://example.com/ + an example feed + en + + + This one has an author name ending with a closing bracket + http://example.com/test_1.html + Author name) + Non-empty description. + + + + This one has an author name with an email in brackets + http://example.com/test_2.html + Author (email@example.com) + This is empty description (no). + + + + This one has an author name with a non-email next in brackets + http://example.com/test_3.html + Author (name) + This is empty description (yes (no)). + + + diff --git a/test/data/rss_092_with_bracket_author.xml b/test/data/rss_092_with_bracket_author.xml new file mode 100644 index 00000000..b50f7a60 --- /dev/null +++ b/test/data/rss_092_with_bracket_author.xml @@ -0,0 +1,29 @@ + + + A Channel with Authors With Names Containing Brackets + http://example.com/ + an example feed + en + + + This one has an author name ending with a closing bracket + http://example.com/test_1.html + Author name) + Non-empty description. + + + + This one has an author name with an email in brackets + http://example.com/test_2.html + Author (email@example.com) + This is empty description (no). + + + + This one has an author name with a non-email next in brackets + http://example.com/test_3.html + Author (name) + This is empty description (yes (no)). + + + diff --git a/test/data/rss_094_with_bracket_author.xml b/test/data/rss_094_with_bracket_author.xml new file mode 100644 index 00000000..8e9da0ab --- /dev/null +++ b/test/data/rss_094_with_bracket_author.xml @@ -0,0 +1,29 @@ + + + A Channel with Authors With Names Containing Brackets + http://example.com/ + an example feed + en + + + This one has an author name ending with a closing bracket + http://example.com/test_1.html + Author name) + Non-empty description. + + + + This one has an author name with an email in brackets + http://example.com/test_2.html + Author (email@example.com) + This is empty description (no). + + + + This one has an author name with a non-email next in brackets + http://example.com/test_3.html + Author (name) + This is empty description (yes (no)). + + + diff --git a/test/rsspp_parser.cpp b/test/rsspp_parser.cpp index 4abbe8fe..65b18b11 100644 --- a/test/rsspp_parser.cpp +++ b/test/rsspp_parser.cpp @@ -103,6 +103,59 @@ TEST_CASE("Doesn't crash or garble data if an item in RSS 0.9x contains " } } +TEST_CASE("Doesn't crash or garble data if an item in RSS 0.9x contains " + "an author tag which ends with a bracket", + "[rsspp::Parser][issue2834]") +{ + rsspp::Parser p; + rsspp::Feed f; + + const auto check = [&]() { + REQUIRE(f.title == "A Channel with Authors With Names Containing Brackets"); + REQUIRE(f.description == "an example feed"); + REQUIRE(f.link == "http://example.com/"); + REQUIRE(f.language == "en"); + + REQUIRE(f.items.size() == 3u); + + REQUIRE(f.items[0].title == "This one has an author name ending with a closing bracket"); + REQUIRE(f.items[0].link == "http://example.com/test_1.html"); + REQUIRE(f.items[0].description == "Non-empty description."); + REQUIRE(f.items[0].author == "Author name)"); + REQUIRE(f.items[0].guid == ""); + + REQUIRE(f.items[1].title == "This one has an author name with an email in brackets"); + REQUIRE(f.items[1].link == "http://example.com/test_2.html"); + REQUIRE(f.items[1].description == "This is empty description (no)."); + REQUIRE(f.items[1].author == "email@example.com"); + REQUIRE(f.items[1].guid == ""); + + REQUIRE(f.items[2].title == "This one has an author name with a non-email next in brackets"); + REQUIRE(f.items[2].link == "http://example.com/test_3.html"); + REQUIRE(f.items[2].description == "This is empty description (yes (no))."); + REQUIRE(f.items[2].author == "name"); + REQUIRE(f.items[2].guid == ""); + }; + + SECTION("RSS 0.91") { + REQUIRE_NOTHROW(f = p.parse_file("data/rss_091_with_bracket_author.xml")); + REQUIRE(f.rss_version == rsspp::Feed::RSS_0_91); + check(); + } + + SECTION("RSS 0.92") { + REQUIRE_NOTHROW(f = p.parse_file("data/rss_092_with_bracket_author.xml")); + REQUIRE(f.rss_version == rsspp::Feed::RSS_0_92); + check(); + } + + SECTION("RSS 0.94") { + REQUIRE_NOTHROW(f = p.parse_file("data/rss_094_with_bracket_author.xml")); + REQUIRE(f.rss_version == rsspp::Feed::RSS_0_94); + check(); + } +} + TEST_CASE("Extracts data from RSS 0.92", "[rsspp::Parser]") { rsspp::Parser p; -- cgit v1.2.3 From 69cd94150808ed09a598b8bab9842dda914d9944 Mon Sep 17 00:00:00 2001 From: "Mikhail Iumanov (tufsampla)" Date: Mon, 2 Sep 2024 01:39:32 +0400 Subject: fix codestyle --- test/rsspp_parser.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/test/rsspp_parser.cpp b/test/rsspp_parser.cpp index 65b18b11..a2f5b961 100644 --- a/test/rsspp_parser.cpp +++ b/test/rsspp_parser.cpp @@ -130,7 +130,8 @@ TEST_CASE("Doesn't crash or garble data if an item in RSS 0.9x contains " REQUIRE(f.items[1].author == "email@example.com"); REQUIRE(f.items[1].guid == ""); - REQUIRE(f.items[2].title == "This one has an author name with a non-email next in brackets"); + REQUIRE(f.items[2].title == + "This one has an author name with a non-email next in brackets"); REQUIRE(f.items[2].link == "http://example.com/test_3.html"); REQUIRE(f.items[2].description == "This is empty description (yes (no))."); REQUIRE(f.items[2].author == "name"); -- cgit v1.2.3 From 5e7b1ec1cdab0e3732468283fecffd7ae4260b79 Mon Sep 17 00:00:00 2001 From: Alexander Batischev Date: Thu, 19 Sep 2024 20:47:06 +0300 Subject: Fix order of email and name in RSS 0.9x test data --- test/data/rss_091_with_bracket_author.xml | 2 +- test/data/rss_092_with_bracket_author.xml | 2 +- test/data/rss_094_with_bracket_author.xml | 2 +- test/rsspp_parser.cpp | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/test/data/rss_091_with_bracket_author.xml b/test/data/rss_091_with_bracket_author.xml index ba74a9cf..2707bcb2 100644 --- a/test/data/rss_091_with_bracket_author.xml +++ b/test/data/rss_091_with_bracket_author.xml @@ -18,7 +18,7 @@ This one has an author name with an email in brackets http://example.com/test_2.html - Author (email@example.com) + email@example.com (Author) This is empty description (no). diff --git a/test/data/rss_092_with_bracket_author.xml b/test/data/rss_092_with_bracket_author.xml index b50f7a60..cb1e4fa4 100644 --- a/test/data/rss_092_with_bracket_author.xml +++ b/test/data/rss_092_with_bracket_author.xml @@ -15,7 +15,7 @@ This one has an author name with an email in brackets http://example.com/test_2.html - Author (email@example.com) + email@example.com (Author) This is empty description (no). diff --git a/test/data/rss_094_with_bracket_author.xml b/test/data/rss_094_with_bracket_author.xml index 8e9da0ab..7ce4df11 100644 --- a/test/data/rss_094_with_bracket_author.xml +++ b/test/data/rss_094_with_bracket_author.xml @@ -15,7 +15,7 @@ This one has an author name with an email in brackets http://example.com/test_2.html - Author (email@example.com) + email@example.com (Author) This is empty description (no). diff --git a/test/rsspp_parser.cpp b/test/rsspp_parser.cpp index a2f5b961..7840654e 100644 --- a/test/rsspp_parser.cpp +++ b/test/rsspp_parser.cpp @@ -127,7 +127,7 @@ TEST_CASE("Doesn't crash or garble data if an item in RSS 0.9x contains " REQUIRE(f.items[1].title == "This one has an author name with an email in brackets"); REQUIRE(f.items[1].link == "http://example.com/test_2.html"); REQUIRE(f.items[1].description == "This is empty description (no)."); - REQUIRE(f.items[1].author == "email@example.com"); + REQUIRE(f.items[1].author == "Author"); REQUIRE(f.items[1].guid == ""); REQUIRE(f.items[2].title == -- cgit v1.2.3