summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGravatar Alexander Batischev <eual.jp@gmail.com> 2021-03-21 22:12:58 +0300
committerGravatar GitHub <noreply@github.com> 2021-03-21 22:12:58 +0300
commit09d1e8f81c05840b6ad47bf9b9e8f44c233d1b7d (patch)
treef2d926cf46f88341dbe2de86467d97eb03c5d2cb
parent48ae63d47f425eb542e981df3e06cbf608df1123 (diff)
parent793113f4e3ac1eb848df46b1e73fca486bc1df10 (diff)
downloadnewsboat-09d1e8f81c05840b6ad47bf9b9e8f44c233d1b7d.tar.gz
newsboat-09d1e8f81c05840b6ad47bf9b9e8f44c233d1b7d.tar.zst
newsboat-09d1e8f81c05840b6ad47bf9b9e8f44c233d1b7d.zip
Merge pull request #1539 from rnestler/remove-dead-code
Remove C++ utils::convert_text since it isn't used anywhere
-rw-r--r--include/utils.h3
-rw-r--r--rust/libnewsboat-ffi/src/utils.rs1
-rw-r--r--src/utils.cpp12
-rw-r--r--test/utils.cpp169
4 files changed, 0 insertions, 185 deletions
diff --git a/include/utils.h b/include/utils.h
index b6b06275..ceea31f2 100644
--- a/include/utils.h
+++ b/include/utils.h
@@ -48,9 +48,6 @@ std::string consolidate_whitespace(const std::string& str);
std::string translit(const std::string& tocode,
const std::string& fromcode);
-std::string convert_text(const std::string& text,
- const std::string& tocode,
- const std::string& fromcode);
/// Converts input string from UTF-8 to the locale's encoding (as detected by
/// nl_langinfo(CODESET)).
diff --git a/rust/libnewsboat-ffi/src/utils.rs b/rust/libnewsboat-ffi/src/utils.rs
index c0dceef5..b1577d0d 100644
--- a/rust/libnewsboat-ffi/src/utils.rs
+++ b/rust/libnewsboat-ffi/src/utils.rs
@@ -84,7 +84,6 @@ mod bridged {
fn run_program(argv: &Vec<String>, input: &str) -> String;
fn translit(tocode: &str, fromcode: &str) -> String;
- fn convert_text(text: &[u8], tocode: &str, fromcode: &str) -> Vec<u8>;
fn utf8_to_locale(text: &str) -> Vec<u8>;
fn locale_to_utf8(text: &[u8]) -> String;
}
diff --git a/src/utils.cpp b/src/utils.cpp
index 3408c2bf..68149633 100644
--- a/src/utils.cpp
+++ b/src/utils.cpp
@@ -261,18 +261,6 @@ std::string utils::translit(const std::string& tocode, const std::string& fromco
return std::string(utils::bridged::translit(tocode, fromcode));
}
-std::string utils::convert_text(const std::string& text,
- const std::string& tocode,
- const std::string& fromcode)
-{
- const auto text_slice =
- rust::Slice<unsigned char>(
- reinterpret_cast<const unsigned char*>(text.c_str()),
- text.length());
- const auto result = utils::bridged::convert_text(text_slice, tocode, fromcode);
- return std::string(reinterpret_cast<const char*>(result.data()), result.size());
-}
-
std::string utils::utf8_to_locale(const std::string& text)
{
const auto result = utils::bridged::utf8_to_locale(text);
diff --git a/test/utils.cpp b/test/utils.cpp
index 5721ef2a..fad18a38 100644
--- a/test/utils.cpp
+++ b/test/utils.cpp
@@ -1722,175 +1722,6 @@ TEST_CASE("mkdir_parents() doesn't care if the path ends in a slash or not",
}
}
-TEST_CASE("convert_text() returns input string if `fromcode` and `tocode` are the same",
- "[utils]")
-{
- const std::vector<std::string> inputs {
- "\x81\x13\xa0", // \x81 is not valid UTF-8
- "\x01", // incomplete UTF-16
- "\x01\x1f\x80\x9b", // those bytes are not defined in ISO-8859-1
- "\x7f\x1e\x03", // these bytes are not defined in KOI8-R
- };
-
- const std::vector<std::string> codes {
- "utf-8",
- "utf-16",
- "iso-8859-1",
- "koi8-r",
- };
-
- SECTION("Codes are literally the same") {
- for (const auto& code : codes) {
- for (const auto& input : inputs) {
- INFO("code: " << code);
- INFO("input: " << input);
- REQUIRE(utils::convert_text(input, code, code) == input);
- }
- }
- }
-
- const auto to_uppercase = [](std::string input) -> std::string {
- std::transform(input.begin(), input.end(), input.begin(), ::toupper);
- return input;
- };
-
- SECTION("From-code is an uppercase version of to-code") {
- for (const auto& code : codes) {
- for (const auto& input : inputs) {
- const auto fromcode = to_uppercase(code);
- const auto& tocode = code;
- INFO("from-code: " << fromcode);
- INFO("to-code: " << tocode);
- INFO("input: " << input);
- REQUIRE(utils::convert_text(input, tocode, fromcode) == input);
- }
- }
- }
-
- SECTION("To-code is an uppercase version of from-code") {
- for (const auto& code : codes) {
- for (const auto& input : inputs) {
- const auto& fromcode = code;
- const auto tocode = to_uppercase(code);
- INFO("from-code: " << fromcode);
- INFO("to-code: " << tocode);
- INFO("input: " << input);
- REQUIRE(utils::convert_text(input, tocode, fromcode) == input);
- }
- }
- }
-}
-
-TEST_CASE("convert_text() replaces incomplete multi-byte sequences with a question mark",
- "[utils]")
-{
- SECTION("From UTF-8 to UTF-16LE") {
- // "ой", "oops" in Russian, but the last byte is missing
- const std::string input("\xd0\xbe\xd0");
- const std::string expected("\x3e\x04\x3f\x00", 4);
- REQUIRE(utils::convert_text(input, "UTF-16LE", "UTF-8") == expected);
- }
-
- SECTION("From UTF-16LE to UTF-8") {
- SECTION("Input contains zero bytes") {
- // "hi", but the last byte is missing
- const std::string input("\x68\x00\x69", 3);
- const std::string expected("h?");
- REQUIRE(utils::convert_text(input, "UTF-8", "UTF-16LE") == expected);
- }
-
- SECTION("Input doesn't contain zero bytes") {
- // "эй", "hey" in Russian, but the last byte is missing
- const std::string input("\x4d\x04\x39", 3);
- const std::string expected("\xd1\x8d?");
- REQUIRE(utils::convert_text(input, "UTF-8", "UTF-16LE") == expected);
- }
- }
-}
-
-TEST_CASE("convert_text() replaces invalid multi-byte sequences with "
- "a question mark (as best as it can)",
- "[utils]")
-{
- SECTION("From UTF-8 to UTF-16LE") {
- // "日本", "Japan", but the third byte of the first character (0xa5) is
- // missing, making the whole first character an illegal sequence.
- const std::string input("\xe6\x97\xe6\x9c\xac");
- const std::string expected("\x3f\x00\x3f\x00\x2c\x67", 6);
- REQUIRE(utils::convert_text(input, "UTF-16LE", "UTF-8") == expected);
- }
-
- SECTION("From UTF-16LE to UTF-8") {
- // The first two bytes here are part of a surrogate pair, i.e. they
- // imply that the next two bytes encode additional info. However, the
- // next two bytes are an ordinary character. This breaks the decoding
- // process, so some things get turned into a question mark while others
- // are decoded incorrectly.
- const std::string input("\x01\xd8\xd7\x03");
- const std::string expected("?\xed\x9f\x98?");
- REQUIRE(utils::convert_text(input, "UTF-8", "UTF-16LE") == expected);
- }
-}
-
-TEST_CASE("convert_text() converts text between encodings", "[utils]")
-{
- SECTION("From UTF-8 to UTF-16LE") {
- // "Тестирую", "Testing" in Russian.
- const std::string input("\xd0\xa2\xd0\xb5\xd1\x81\xd1\x82\xd0\xb8\xd1"
- "\x80\xd1\x83\xd1\x8e");
- const std::string expected("\x22\x04\x35\x04\x41\x04\x42\x04"
- "\x38\x04\x40\x04\x43\x04\x4e\x04");
- REQUIRE(utils::convert_text(input, "UTF-16LE", "UTF-8") == expected);
- }
-
- SECTION("From UTF-8 to KOI8-R") {
- // "Проверка", "Check" in Russian.
- const std::string input("\xd0\x9f\xd1\x80\xd0\xbe\xd0\xb2\xd0\xb5\xd1"
- "\x80\xd0\xba\xd0\xb0");
- const std::string expected("\xf0\xd2\xcf\xd7\xc5\xd2\xcb\xc1");
- REQUIRE(utils::convert_text(input, "KOI8-R", "UTF-8") == expected);
- }
-
- SECTION("From UTF-8 to ISO-8859-1 (transliterating if need be)") {
- // "вау °±²³´µ¶·¸¹º»¼½¾¿ÀÁÂÃ": a mix of Cyrillic (unsupported by
- // ISO-8859-1) and ISO-8859-1 characters.
- const std::string input("\xd0\xb2\xd0\xb0\xd1\x83\x20\xc2\xb0\xc2\xb1"
- "\xc2\xb2\xc2\xb3\xc2\xb4\xc2\xb5\xc2\xb6\xc2\xb7\xc2\xb8\xc2\xb9"
- "\xc2\xba\xc2\xbb\xc2\xbc\xc2\xbd\xc2\xbe\xc2\xbf\xc3\x80\xc3\x81"
- "\xc3\x82\xc3\x83");
-
- const auto result = utils::convert_text(input, "ISO-8859-1", "UTF-8");
- // We can't spell out an expected result because different platforms
- // might follow different transliteration rules.
- REQUIRE(result != "");
- REQUIRE(result != input);
- }
-
- SECTION("From UTF-16LE to UTF-8") {
- // "Успех", "Success" in Russian.
- const std::string input("\xff\xfe\x23\x04\x41\x04\x3f\x04\x35\x04\x45\x04");
- const std::string expected("\xef\xbb\xbf\xd0\xa3\xd1\x81\xd0\xbf\xd0\xb5"
- "\xd1\x85");
- REQUIRE(utils::convert_text(input, "UTF-8", "UTF-16LE") == expected);
- }
-
- SECTION("From KOI8-R to UTF-8") {
- // "История", "History" in Russian.
- const std::string input("\xe9\xd3\xd4\xcf\xd2\xc9\xd1");
- const std::string expected("\xd0\x98\xd1\x81\xd1\x82\xd0\xbe\xd1\x80"
- "\xd0\xb8\xd1\x8f");
- REQUIRE(utils::convert_text(input, "UTF-8", "KOI8-R") == expected);
- }
-
- SECTION("From ISO-8859-1 to UTF-8") {
- // "ÄÅÆÇÈÉÊËÌÍÎÏ": some umlauts and Latin letters.
- const std::string input("\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf");
- const std::string expected("\xc3\x84\xc3\x85\xc3\x86\xc3\x87\xc3\x88\xc3"
- "\x89\xc3\x8a\xc3\x8b\xc3\x8c\xc3\x8d\xc3\x8e\xc3\x8f");
- REQUIRE(utils::convert_text(input, "UTF-8", "ISO-8859-1") == expected);
- }
-}
-
TEST_CASE("utf8_to_locale() converts text from UTF-8 to the encoding specified "
"by locale in LC_CTYPE class",
"[utils]")