#include "htmlrenderer.h"
#include
#include
#include
#include
#include
#include
#include
#include "config.h"
#include "logger.h"
#include "strprintf.h"
#include "tagsouppullparser.h"
#include "utils.h"
namespace newsboat {
HtmlRenderer::HtmlRenderer(bool raw)
: raw_(raw)
{
tags["a"] = HtmlTag::A;
tags["embed"] = HtmlTag::EMBED;
tags["iframe"] = HtmlTag::IFRAME;
tags["br"] = HtmlTag::BR;
tags["pre"] = HtmlTag::PRE;
tags["ituneshack"] = HtmlTag::ITUNESHACK;
tags["img"] = HtmlTag::IMG;
tags["blockquote"] = HtmlTag::BLOCKQUOTE;
tags["aside"] = HtmlTag::BLOCKQUOTE;
tags["p"] = HtmlTag::P;
tags["div"] = HtmlTag::DIV;
tags["h1"] = HtmlTag::H1;
tags["h2"] = HtmlTag::H2;
tags["h3"] = HtmlTag::H3;
tags["h4"] = HtmlTag::H4;
tags["h5"] = HtmlTag::H5;
tags["h6"] = HtmlTag::H6;
tags["ol"] = HtmlTag::OL;
tags["ul"] = HtmlTag::UL;
tags["li"] = HtmlTag::LI;
tags["dt"] = HtmlTag::DT;
tags["dd"] = HtmlTag::DD;
tags["dl"] = HtmlTag::DL;
tags["sup"] = HtmlTag::SUP;
tags["sub"] = HtmlTag::SUB;
tags["hr"] = HtmlTag::HR;
tags["b"] = HtmlTag::STRONG;
tags["strong"] = HtmlTag::STRONG;
tags["u"] = HtmlTag::UNDERLINE;
tags["q"] = HtmlTag::QUOTATION;
tags["script"] = HtmlTag::SCRIPT;
tags["style"] = HtmlTag::STYLE;
tags["table"] = HtmlTag::TABLE;
tags["th"] = HtmlTag::TH;
tags["tr"] = HtmlTag::TR;
tags["td"] = HtmlTag::TD;
tags["video"] = HtmlTag::VIDEO;
tags["audio"] = HtmlTag::AUDIO;
tags["source"] = HtmlTag::SOURCE;
}
void HtmlRenderer::render(const std::string& source,
std::vector>& lines,
Links& links,
const std::string& url)
{
std::istringstream input(source);
render(input, lines, links, url);
}
HtmlTag HtmlRenderer::extract_tag(TagSoupPullParser& parser)
{
std::string tagname = parser.get_text();
std::transform(tagname.begin(),
tagname.end(),
tagname.begin(),
::tolower);
return tags[tagname];
}
void HtmlRenderer::render(std::istream& input,
std::vector>& lines,
Links& links,
const std::string& url)
{
unsigned int image_count = 0;
unsigned int video_count = 0;
unsigned int audio_count = 0;
unsigned int source_count = 0;
unsigned int iframe_count = 0;
std::string curline;
int indent_level = 0;
std::vector list_elements_stack;
bool inside_pre = false;
bool pre_just_started = false;
size_t pre_consecutive_nl = 0;
bool itunes_hack = false;
bool inside_script = false;
size_t inside_style = 0;
bool inside_video = false;
bool inside_audio = false;
std::vector ol_counts;
std::vector ol_types;
int link_num = -1;
std::vector tables;
/*
* to render the HTML, we use a self-developed "XML" pull parser.
*
* A pull parser works like this:
* - we feed it with an XML stream
* - we then gather an iterator
* - we then can iterate over all continuous elements, such as start
* tag, close tag, text element, ...
*/
TagSoupPullParser xpp(input);
for (TagSoupPullParser::Event e = xpp.next();
e != TagSoupPullParser::Event::END_DOCUMENT;
e = xpp.next()) {
if (inside_script) {
//