#include "htmlrenderer.h" #include #include #include #include #include #include #include #include "config.h" #include "logger.h" #include "strprintf.h" #include "tagsouppullparser.h" #include "utils.h" namespace newsboat { HtmlRenderer::HtmlRenderer(bool raw) : raw_(raw) { tags["a"] = HtmlTag::A; tags["embed"] = HtmlTag::EMBED; tags["iframe"] = HtmlTag::IFRAME; tags["br"] = HtmlTag::BR; tags["pre"] = HtmlTag::PRE; tags["ituneshack"] = HtmlTag::ITUNESHACK; tags["img"] = HtmlTag::IMG; tags["blockquote"] = HtmlTag::BLOCKQUOTE; tags["aside"] = HtmlTag::BLOCKQUOTE; tags["p"] = HtmlTag::P; tags["div"] = HtmlTag::DIV; tags["h1"] = HtmlTag::H1; tags["h2"] = HtmlTag::H2; tags["h3"] = HtmlTag::H3; tags["h4"] = HtmlTag::H4; tags["h5"] = HtmlTag::H5; tags["h6"] = HtmlTag::H6; tags["ol"] = HtmlTag::OL; tags["ul"] = HtmlTag::UL; tags["li"] = HtmlTag::LI; tags["dt"] = HtmlTag::DT; tags["dd"] = HtmlTag::DD; tags["dl"] = HtmlTag::DL; tags["sup"] = HtmlTag::SUP; tags["sub"] = HtmlTag::SUB; tags["hr"] = HtmlTag::HR; tags["b"] = HtmlTag::STRONG; tags["strong"] = HtmlTag::STRONG; tags["u"] = HtmlTag::UNDERLINE; tags["q"] = HtmlTag::QUOTATION; tags["script"] = HtmlTag::SCRIPT; tags["style"] = HtmlTag::STYLE; tags["table"] = HtmlTag::TABLE; tags["th"] = HtmlTag::TH; tags["tr"] = HtmlTag::TR; tags["td"] = HtmlTag::TD; tags["video"] = HtmlTag::VIDEO; tags["audio"] = HtmlTag::AUDIO; tags["source"] = HtmlTag::SOURCE; } void HtmlRenderer::render(const std::string& source, std::vector>& lines, Links& links, const std::string& url) { std::istringstream input(source); render(input, lines, links, url); } HtmlTag HtmlRenderer::extract_tag(TagSoupPullParser& parser) { std::string tagname = parser.get_text(); std::transform(tagname.begin(), tagname.end(), tagname.begin(), ::tolower); return tags[tagname]; } void HtmlRenderer::render(std::istream& input, std::vector>& lines, Links& links, const std::string& url) { unsigned int image_count = 0; unsigned int video_count = 0; unsigned int audio_count = 0; unsigned int source_count = 0; unsigned int iframe_count = 0; std::string curline; int indent_level = 0; std::vector list_elements_stack; bool inside_pre = false; bool pre_just_started = false; size_t pre_consecutive_nl = 0; bool itunes_hack = false; bool inside_script = false; size_t inside_style = 0; bool inside_video = false; bool inside_audio = false; std::vector ol_counts; std::vector ol_types; int link_num = -1; std::vector tables; /* * to render the HTML, we use a self-developed "XML" pull parser. * * A pull parser works like this: * - we feed it with an XML stream * - we then gather an iterator * - we then can iterate over all continuous elements, such as start * tag, close tag, text element, ... */ TagSoupPullParser xpp(input); for (TagSoupPullParser::Event e = xpp.next(); e != TagSoupPullParser::Event::END_DOCUMENT; e = xpp.next()) { if (inside_script) { //