summaryrefslogtreecommitdiff
path: root/src/utils.cpp
diff options
context:
space:
mode:
authorGravatar Alexander Batischev <eual.jp@gmail.com> 2022-12-25 13:28:25 +0300
committerGravatar Alexander Batischev <eual.jp@gmail.com> 2022-12-25 13:34:50 +0300
commitfa363edd2d61e598aff0de5848ae955a5b35d3c0 (patch)
tree7ed0db759be7b24f650c47ef4e83b6977b36685c /src/utils.cpp
parent521ed1fa9028797cd927e21e7e4dcc617f11a9eb (diff)
downloadnewsboat-fa363edd2d61e598aff0de5848ae955a5b35d3c0.tar.gz
newsboat-fa363edd2d61e598aff0de5848ae955a5b35d3c0.tar.zst
newsboat-fa363edd2d61e598aff0de5848ae955a5b35d3c0.zip
Do not convert feeds based on Content-Type
This reverts PRs #2214 and #2243, containing the following commits: 203bbf80206180740f24137ce0f35fcb64f2e56e Make utils::convert_text() available to C++ 5dfdc5d64d79a2c080956acf599d202b4aa2988b Convert feed data to utf-8 if charset specifies non-"utf-8" encoding c7fd68105d35b4c3c7e03c4013768272b6c3d015 retrieve_url(): Convert data to utf-8 if non-utf8 charset is specified 7eb721fdfab39d64a455ec8c6d90681864e057c7 Avoid ODR violation by defining HeaderValues in anonymous namespaces 7ba7470af1635e736f2e688fb46303e3569656f0 Get rid of raw new/delete useage in parser.cpp's handle_headers 1c3e1618f780655375b465e6a1d7ef030cb4b460 Reset HTTP headers when detecting a new response 64ad6de02d7f3fd8239772c70e964895360c7524 Update rss/parser.cpp 4b0108a1a501ac69ca969c4790990c35148fd7b0 Update rss/parser.cpp These commits led to problems with feeds that specify the same encoding both in the Content-Type and the XML tag; these were converted to UTF-8, but then the XML parser tried to convert them to UTF-8 again. We don't have a good fix for this yet. The problem is tracked in #1436.
Diffstat (limited to 'src/utils.cpp')
-rw-r--r--src/utils.cpp66
1 files changed, 1 insertions, 65 deletions
diff --git a/src/utils.cpp b/src/utils.cpp
index 683cc396..8e499376 100644
--- a/src/utils.cpp
+++ b/src/utils.cpp
@@ -21,7 +21,6 @@
#include <regex>
#include <sstream>
#include <stfl.h>
-#include <string>
#include <sys/param.h>
#include <sys/types.h>
#include <sys/utsname.h>
@@ -31,7 +30,6 @@
#include "config.h"
#include "curlhandle.h"
#include "htmlrenderer.h"
-#include "libnewsboat-ffi/src/utils.rs.h"
#include "logger.h"
#include "strprintf.h"
@@ -188,19 +186,6 @@ std::string utils::locale_to_utf8(const std::string& text)
return std::string(utils::bridged::locale_to_utf8(text_slice));
}
-std::string utils::convert_text(const std::string& text, const std::string& tocode,
- const std::string& fromcode)
-{
- const auto text_slice =
- rust::Slice<const unsigned char>(
- reinterpret_cast<const unsigned char*>(text.c_str()),
- text.length());
-
- const auto result = utils::bridged::convert_text(text_slice, tocode, fromcode);
-
- return std::string(reinterpret_cast<const char*>(result.data()), result.size());
-}
-
std::string utils::get_command_output(const std::string& cmd)
{
return std::string(utils::bridged::get_command_output(cmd));
@@ -268,50 +253,6 @@ std::string utils::retrieve_url(const std::string& url,
return retrieve_url(url, handle, cfgcont, authinfo, body, method);
}
-namespace {
-
-struct HeaderValues {
- std::string charset;
-
- HeaderValues()
- {
- reset();
- }
-
- void reset()
- {
- charset = "utf-8";
- }
-};
-
-}
-
-static size_t handle_headers(void* ptr, size_t size, size_t nmemb, void* data)
-{
- const auto header = std::string(reinterpret_cast<const char*>(ptr), size * nmemb);
- HeaderValues* values = static_cast<HeaderValues*>(data);
-
- if (header.find("HTTP/") == 0) {
- // Reset headers if a new response is detected (there might be multiple responses per request in case of a redirect)
- values->reset();
- } else if (header.find("Content-Type:") == 0) {
- const std::string key = "charset=";
- const auto charset_index = header.find(key);
- if (charset_index != std::string::npos) {
- auto charset = header.substr(charset_index + key.size());
- utils::trim(charset);
- if (charset.size() >= 2 && charset[0] == '"' && charset[charset.size() - 1] == '"') {
- charset = charset.substr(1, charset.size() - 2);
- }
- if (charset.size() > 0) {
- values->charset = charset;
- }
- }
- }
-
- return size * nmemb;
-}
-
std::string utils::retrieve_url(const std::string& url,
CurlHandle& easyhandle,
ConfigContainer* cfgcont,
@@ -326,10 +267,6 @@ std::string utils::retrieve_url(const std::string& url,
curl_easy_setopt(easyhandle.ptr(), CURLOPT_WRITEFUNCTION, my_write_data);
curl_easy_setopt(easyhandle.ptr(), CURLOPT_WRITEDATA, &buf);
- HeaderValues hdrs;
- curl_easy_setopt(easyhandle.ptr(), CURLOPT_HEADERDATA, &hdrs);
- curl_easy_setopt(easyhandle.ptr(), CURLOPT_HEADERFUNCTION, handle_headers);
-
switch (method) {
case HTTPMethod::GET:
break;
@@ -389,8 +326,7 @@ std::string utils::retrieve_url(const std::string& url,
// See the clobbering note above.
curl_easy_setopt(easyhandle.ptr(), CURLOPT_ERRORBUFFER, NULL);
- LOG(Level::DEBUG, "Parser::parse_url: converting data from %s to utf-8", hdrs.charset);
- return utils::convert_text(buf, "utf-8", hdrs.charset);
+ return buf;
}
std::string utils::run_program(const char* argv[], const std::string& input)