diff options
author | 2021-03-21 22:12:58 +0300 | |
---|---|---|
committer | 2021-03-21 22:12:58 +0300 | |
commit | 09d1e8f81c05840b6ad47bf9b9e8f44c233d1b7d (patch) | |
tree | f2d926cf46f88341dbe2de86467d97eb03c5d2cb | |
parent | 48ae63d47f425eb542e981df3e06cbf608df1123 (diff) | |
parent | 793113f4e3ac1eb848df46b1e73fca486bc1df10 (diff) | |
download | newsboat-09d1e8f81c05840b6ad47bf9b9e8f44c233d1b7d.tar.gz newsboat-09d1e8f81c05840b6ad47bf9b9e8f44c233d1b7d.tar.zst newsboat-09d1e8f81c05840b6ad47bf9b9e8f44c233d1b7d.zip |
Merge pull request #1539 from rnestler/remove-dead-code
Remove C++ utils::convert_text since it isn't used anywhere
-rw-r--r-- | include/utils.h | 3 | ||||
-rw-r--r-- | rust/libnewsboat-ffi/src/utils.rs | 1 | ||||
-rw-r--r-- | src/utils.cpp | 12 | ||||
-rw-r--r-- | test/utils.cpp | 169 |
4 files changed, 0 insertions, 185 deletions
diff --git a/include/utils.h b/include/utils.h index b6b06275..ceea31f2 100644 --- a/include/utils.h +++ b/include/utils.h @@ -48,9 +48,6 @@ std::string consolidate_whitespace(const std::string& str); std::string translit(const std::string& tocode, const std::string& fromcode); -std::string convert_text(const std::string& text, - const std::string& tocode, - const std::string& fromcode); /// Converts input string from UTF-8 to the locale's encoding (as detected by /// nl_langinfo(CODESET)). diff --git a/rust/libnewsboat-ffi/src/utils.rs b/rust/libnewsboat-ffi/src/utils.rs index c0dceef5..b1577d0d 100644 --- a/rust/libnewsboat-ffi/src/utils.rs +++ b/rust/libnewsboat-ffi/src/utils.rs @@ -84,7 +84,6 @@ mod bridged { fn run_program(argv: &Vec<String>, input: &str) -> String; fn translit(tocode: &str, fromcode: &str) -> String; - fn convert_text(text: &[u8], tocode: &str, fromcode: &str) -> Vec<u8>; fn utf8_to_locale(text: &str) -> Vec<u8>; fn locale_to_utf8(text: &[u8]) -> String; } diff --git a/src/utils.cpp b/src/utils.cpp index 3408c2bf..68149633 100644 --- a/src/utils.cpp +++ b/src/utils.cpp @@ -261,18 +261,6 @@ std::string utils::translit(const std::string& tocode, const std::string& fromco return std::string(utils::bridged::translit(tocode, fromcode)); } -std::string utils::convert_text(const std::string& text, - const std::string& tocode, - const std::string& fromcode) -{ - const auto text_slice = - rust::Slice<unsigned char>( - reinterpret_cast<const unsigned char*>(text.c_str()), - text.length()); - const auto result = utils::bridged::convert_text(text_slice, tocode, fromcode); - return std::string(reinterpret_cast<const char*>(result.data()), result.size()); -} - std::string utils::utf8_to_locale(const std::string& text) { const auto result = utils::bridged::utf8_to_locale(text); diff --git a/test/utils.cpp b/test/utils.cpp index 5721ef2a..fad18a38 100644 --- a/test/utils.cpp +++ b/test/utils.cpp @@ -1722,175 +1722,6 @@ TEST_CASE("mkdir_parents() doesn't care if the path ends in a slash or not", } } -TEST_CASE("convert_text() returns input string if `fromcode` and `tocode` are the same", - "[utils]") -{ - const std::vector<std::string> inputs { - "\x81\x13\xa0", // \x81 is not valid UTF-8 - "\x01", // incomplete UTF-16 - "\x01\x1f\x80\x9b", // those bytes are not defined in ISO-8859-1 - "\x7f\x1e\x03", // these bytes are not defined in KOI8-R - }; - - const std::vector<std::string> codes { - "utf-8", - "utf-16", - "iso-8859-1", - "koi8-r", - }; - - SECTION("Codes are literally the same") { - for (const auto& code : codes) { - for (const auto& input : inputs) { - INFO("code: " << code); - INFO("input: " << input); - REQUIRE(utils::convert_text(input, code, code) == input); - } - } - } - - const auto to_uppercase = [](std::string input) -> std::string { - std::transform(input.begin(), input.end(), input.begin(), ::toupper); - return input; - }; - - SECTION("From-code is an uppercase version of to-code") { - for (const auto& code : codes) { - for (const auto& input : inputs) { - const auto fromcode = to_uppercase(code); - const auto& tocode = code; - INFO("from-code: " << fromcode); - INFO("to-code: " << tocode); - INFO("input: " << input); - REQUIRE(utils::convert_text(input, tocode, fromcode) == input); - } - } - } - - SECTION("To-code is an uppercase version of from-code") { - for (const auto& code : codes) { - for (const auto& input : inputs) { - const auto& fromcode = code; - const auto tocode = to_uppercase(code); - INFO("from-code: " << fromcode); - INFO("to-code: " << tocode); - INFO("input: " << input); - REQUIRE(utils::convert_text(input, tocode, fromcode) == input); - } - } - } -} - -TEST_CASE("convert_text() replaces incomplete multi-byte sequences with a question mark", - "[utils]") -{ - SECTION("From UTF-8 to UTF-16LE") { - // "ой", "oops" in Russian, but the last byte is missing - const std::string input("\xd0\xbe\xd0"); - const std::string expected("\x3e\x04\x3f\x00", 4); - REQUIRE(utils::convert_text(input, "UTF-16LE", "UTF-8") == expected); - } - - SECTION("From UTF-16LE to UTF-8") { - SECTION("Input contains zero bytes") { - // "hi", but the last byte is missing - const std::string input("\x68\x00\x69", 3); - const std::string expected("h?"); - REQUIRE(utils::convert_text(input, "UTF-8", "UTF-16LE") == expected); - } - - SECTION("Input doesn't contain zero bytes") { - // "эй", "hey" in Russian, but the last byte is missing - const std::string input("\x4d\x04\x39", 3); - const std::string expected("\xd1\x8d?"); - REQUIRE(utils::convert_text(input, "UTF-8", "UTF-16LE") == expected); - } - } -} - -TEST_CASE("convert_text() replaces invalid multi-byte sequences with " - "a question mark (as best as it can)", - "[utils]") -{ - SECTION("From UTF-8 to UTF-16LE") { - // "日本", "Japan", but the third byte of the first character (0xa5) is - // missing, making the whole first character an illegal sequence. - const std::string input("\xe6\x97\xe6\x9c\xac"); - const std::string expected("\x3f\x00\x3f\x00\x2c\x67", 6); - REQUIRE(utils::convert_text(input, "UTF-16LE", "UTF-8") == expected); - } - - SECTION("From UTF-16LE to UTF-8") { - // The first two bytes here are part of a surrogate pair, i.e. they - // imply that the next two bytes encode additional info. However, the - // next two bytes are an ordinary character. This breaks the decoding - // process, so some things get turned into a question mark while others - // are decoded incorrectly. - const std::string input("\x01\xd8\xd7\x03"); - const std::string expected("?\xed\x9f\x98?"); - REQUIRE(utils::convert_text(input, "UTF-8", "UTF-16LE") == expected); - } -} - -TEST_CASE("convert_text() converts text between encodings", "[utils]") -{ - SECTION("From UTF-8 to UTF-16LE") { - // "Тестирую", "Testing" in Russian. - const std::string input("\xd0\xa2\xd0\xb5\xd1\x81\xd1\x82\xd0\xb8\xd1" - "\x80\xd1\x83\xd1\x8e"); - const std::string expected("\x22\x04\x35\x04\x41\x04\x42\x04" - "\x38\x04\x40\x04\x43\x04\x4e\x04"); - REQUIRE(utils::convert_text(input, "UTF-16LE", "UTF-8") == expected); - } - - SECTION("From UTF-8 to KOI8-R") { - // "Проверка", "Check" in Russian. - const std::string input("\xd0\x9f\xd1\x80\xd0\xbe\xd0\xb2\xd0\xb5\xd1" - "\x80\xd0\xba\xd0\xb0"); - const std::string expected("\xf0\xd2\xcf\xd7\xc5\xd2\xcb\xc1"); - REQUIRE(utils::convert_text(input, "KOI8-R", "UTF-8") == expected); - } - - SECTION("From UTF-8 to ISO-8859-1 (transliterating if need be)") { - // "вау °±²³´µ¶·¸¹º»¼½¾¿ÀÁÂÃ": a mix of Cyrillic (unsupported by - // ISO-8859-1) and ISO-8859-1 characters. - const std::string input("\xd0\xb2\xd0\xb0\xd1\x83\x20\xc2\xb0\xc2\xb1" - "\xc2\xb2\xc2\xb3\xc2\xb4\xc2\xb5\xc2\xb6\xc2\xb7\xc2\xb8\xc2\xb9" - "\xc2\xba\xc2\xbb\xc2\xbc\xc2\xbd\xc2\xbe\xc2\xbf\xc3\x80\xc3\x81" - "\xc3\x82\xc3\x83"); - - const auto result = utils::convert_text(input, "ISO-8859-1", "UTF-8"); - // We can't spell out an expected result because different platforms - // might follow different transliteration rules. - REQUIRE(result != ""); - REQUIRE(result != input); - } - - SECTION("From UTF-16LE to UTF-8") { - // "Успех", "Success" in Russian. - const std::string input("\xff\xfe\x23\x04\x41\x04\x3f\x04\x35\x04\x45\x04"); - const std::string expected("\xef\xbb\xbf\xd0\xa3\xd1\x81\xd0\xbf\xd0\xb5" - "\xd1\x85"); - REQUIRE(utils::convert_text(input, "UTF-8", "UTF-16LE") == expected); - } - - SECTION("From KOI8-R to UTF-8") { - // "История", "History" in Russian. - const std::string input("\xe9\xd3\xd4\xcf\xd2\xc9\xd1"); - const std::string expected("\xd0\x98\xd1\x81\xd1\x82\xd0\xbe\xd1\x80" - "\xd0\xb8\xd1\x8f"); - REQUIRE(utils::convert_text(input, "UTF-8", "KOI8-R") == expected); - } - - SECTION("From ISO-8859-1 to UTF-8") { - // "ÄÅÆÇÈÉÊËÌÍÎÏ": some umlauts and Latin letters. - const std::string input("\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"); - const std::string expected("\xc3\x84\xc3\x85\xc3\x86\xc3\x87\xc3\x88\xc3" - "\x89\xc3\x8a\xc3\x8b\xc3\x8c\xc3\x8d\xc3\x8e\xc3\x8f"); - REQUIRE(utils::convert_text(input, "UTF-8", "ISO-8859-1") == expected); - } -} - TEST_CASE("utf8_to_locale() converts text from UTF-8 to the encoding specified " "by locale in LC_CTYPE class", "[utils]") |