diff options
author | 2022-12-25 13:28:25 +0300 | |
---|---|---|
committer | 2022-12-25 13:34:50 +0300 | |
commit | fa363edd2d61e598aff0de5848ae955a5b35d3c0 (patch) | |
tree | 7ed0db759be7b24f650c47ef4e83b6977b36685c /src/utils.cpp | |
parent | 521ed1fa9028797cd927e21e7e4dcc617f11a9eb (diff) | |
download | newsboat-fa363edd2d61e598aff0de5848ae955a5b35d3c0.tar.gz newsboat-fa363edd2d61e598aff0de5848ae955a5b35d3c0.tar.zst newsboat-fa363edd2d61e598aff0de5848ae955a5b35d3c0.zip |
Do not convert feeds based on Content-Type
This reverts PRs #2214 and #2243, containing the following commits:
203bbf80206180740f24137ce0f35fcb64f2e56e
Make utils::convert_text() available to C++
5dfdc5d64d79a2c080956acf599d202b4aa2988b
Convert feed data to utf-8 if charset specifies non-"utf-8" encoding
c7fd68105d35b4c3c7e03c4013768272b6c3d015
retrieve_url(): Convert data to utf-8 if non-utf8 charset is
specified
7eb721fdfab39d64a455ec8c6d90681864e057c7
Avoid ODR violation by defining HeaderValues in anonymous namespaces
7ba7470af1635e736f2e688fb46303e3569656f0
Get rid of raw new/delete useage in parser.cpp's handle_headers
1c3e1618f780655375b465e6a1d7ef030cb4b460
Reset HTTP headers when detecting a new response
64ad6de02d7f3fd8239772c70e964895360c7524
Update rss/parser.cpp
4b0108a1a501ac69ca969c4790990c35148fd7b0
Update rss/parser.cpp
These commits led to problems with feeds that specify the same encoding
both in the Content-Type and the XML tag; these were converted to UTF-8,
but then the XML parser tried to convert them to UTF-8 again. We don't
have a good fix for this yet. The problem is tracked in #1436.
Diffstat (limited to 'src/utils.cpp')
-rw-r--r-- | src/utils.cpp | 66 |
1 files changed, 1 insertions, 65 deletions
diff --git a/src/utils.cpp b/src/utils.cpp index 683cc396..8e499376 100644 --- a/src/utils.cpp +++ b/src/utils.cpp @@ -21,7 +21,6 @@ #include <regex> #include <sstream> #include <stfl.h> -#include <string> #include <sys/param.h> #include <sys/types.h> #include <sys/utsname.h> @@ -31,7 +30,6 @@ #include "config.h" #include "curlhandle.h" #include "htmlrenderer.h" -#include "libnewsboat-ffi/src/utils.rs.h" #include "logger.h" #include "strprintf.h" @@ -188,19 +186,6 @@ std::string utils::locale_to_utf8(const std::string& text) return std::string(utils::bridged::locale_to_utf8(text_slice)); } -std::string utils::convert_text(const std::string& text, const std::string& tocode, - const std::string& fromcode) -{ - const auto text_slice = - rust::Slice<const unsigned char>( - reinterpret_cast<const unsigned char*>(text.c_str()), - text.length()); - - const auto result = utils::bridged::convert_text(text_slice, tocode, fromcode); - - return std::string(reinterpret_cast<const char*>(result.data()), result.size()); -} - std::string utils::get_command_output(const std::string& cmd) { return std::string(utils::bridged::get_command_output(cmd)); @@ -268,50 +253,6 @@ std::string utils::retrieve_url(const std::string& url, return retrieve_url(url, handle, cfgcont, authinfo, body, method); } -namespace { - -struct HeaderValues { - std::string charset; - - HeaderValues() - { - reset(); - } - - void reset() - { - charset = "utf-8"; - } -}; - -} - -static size_t handle_headers(void* ptr, size_t size, size_t nmemb, void* data) -{ - const auto header = std::string(reinterpret_cast<const char*>(ptr), size * nmemb); - HeaderValues* values = static_cast<HeaderValues*>(data); - - if (header.find("HTTP/") == 0) { - // Reset headers if a new response is detected (there might be multiple responses per request in case of a redirect) - values->reset(); - } else if (header.find("Content-Type:") == 0) { - const std::string key = "charset="; - const auto charset_index = header.find(key); - if (charset_index != std::string::npos) { - auto charset = header.substr(charset_index + key.size()); - utils::trim(charset); - if (charset.size() >= 2 && charset[0] == '"' && charset[charset.size() - 1] == '"') { - charset = charset.substr(1, charset.size() - 2); - } - if (charset.size() > 0) { - values->charset = charset; - } - } - } - - return size * nmemb; -} - std::string utils::retrieve_url(const std::string& url, CurlHandle& easyhandle, ConfigContainer* cfgcont, @@ -326,10 +267,6 @@ std::string utils::retrieve_url(const std::string& url, curl_easy_setopt(easyhandle.ptr(), CURLOPT_WRITEFUNCTION, my_write_data); curl_easy_setopt(easyhandle.ptr(), CURLOPT_WRITEDATA, &buf); - HeaderValues hdrs; - curl_easy_setopt(easyhandle.ptr(), CURLOPT_HEADERDATA, &hdrs); - curl_easy_setopt(easyhandle.ptr(), CURLOPT_HEADERFUNCTION, handle_headers); - switch (method) { case HTTPMethod::GET: break; @@ -389,8 +326,7 @@ std::string utils::retrieve_url(const std::string& url, // See the clobbering note above. curl_easy_setopt(easyhandle.ptr(), CURLOPT_ERRORBUFFER, NULL); - LOG(Level::DEBUG, "Parser::parse_url: converting data from %s to utf-8", hdrs.charset); - return utils::convert_text(buf, "utf-8", hdrs.charset); + return buf; } std::string utils::run_program(const char* argv[], const std::string& input) |