#include
#include
#include
#include
#include
#include
using namespace newsbeuter;
htmlrenderer::htmlrenderer(unsigned int width) : w(width) { }
void htmlrenderer::render(const std::string& source, std::vector& lines) {
std::istringstream input(source);
render(input, lines);
}
void htmlrenderer::render(std::istream& input, std::vector& lines) {
std::vector links;
unsigned int link_count = 0;
std::string curline;
int indent_level = 0;
bool inside_list = false, inside_li = false, is_ol = false;
unsigned int ol_count = 1;
xmlpullparser xpp;
xpp.setInput(input);
for (xmlpullparser::event e = xpp.next(); e != xmlpullparser::END_DOCUMENT; e = xpp.next()) {
switch (e) {
case xmlpullparser::START_TAG:
if (xpp.getText() == "a") {
std::string link;
try {
link = xpp.getAttributeValue("href");
} catch (const std::invalid_argument& ) {
link = "";
}
if (link.length() > 0) {
links.push_back(link);
std::ostringstream ref;
ref << "[" << link_count << "]";
link_count++;
curline.append(ref.str());
}
} else if (xpp.getText() == "br") {
if (curline.length() > 0)
lines.push_back(curline);
prepare_newline(curline, indent_level);
} else if (xpp.getText() == "img") {
std::string imgurl = xpp.getAttributeValue("src");
if (imgurl.length() > 0) {
links.push_back(imgurl);
std::ostringstream ref;
ref << "[" << link_count << "]";
link_count++;
curline.append(ref.str());
}
} else if (xpp.getText() == "blockquote") {
++indent_level;
if (curline.length() > 0)
lines.push_back(curline);
lines.push_back(std::string(""));
prepare_newline(curline, indent_level);
} else if (xpp.getText() == "pre") {
if (curline.length() > 0)
lines.push_back(curline);
lines.push_back(std::string(""));
prepare_newline(curline, indent_level);
} else if (xpp.getText() == "p") {
if (curline.length() > 0)
lines.push_back(curline);
if (lines.size() > 0 && lines[lines.size()-1].length() > static_cast(indent_level*2))
lines.push_back(std::string(""));
prepare_newline(curline, indent_level);
} else if (xpp.getText() == "ol") {
inside_list = true;
is_ol = true;
ol_count = 1;
if (curline.length() > 0)
lines.push_back(curline);
lines.push_back(std::string(""));
prepare_newline(curline, indent_level);
} else if (xpp.getText() == "ul") {
inside_list = true;
is_ol = false;
if (curline.length() > 0)
lines.push_back(curline);
lines.push_back(std::string(""));
prepare_newline(curline, indent_level);
} else if (xpp.getText() == "li") {
if (inside_li) {
indent_level-=2;
if (curline.length() > 0)
lines.push_back(curline);
prepare_newline(curline, indent_level);
}
inside_li = true;
if (curline.length() > 0)
lines.push_back(curline);
prepare_newline(curline, indent_level);
indent_level+=2;
if (is_ol) {
std::ostringstream num;
num << ol_count;
if (ol_count < 10)
curline.append(" ");
curline.append(num.str());
curline.append(". ");
++ol_count;
} else {
curline.append(" * ");
}
}
break;
case xmlpullparser::END_TAG:
if (xpp.getText() == "blockquote") {
--indent_level;
if (indent_level < 0)
indent_level = 0;
if (curline.length() > 0)
lines.push_back(curline);
lines.push_back(std::string(""));
prepare_newline(curline, indent_level);
} else if (xpp.getText() == "pre") {
if (curline.length() > 0)
lines.push_back(curline);
lines.push_back(std::string(""));
prepare_newline(curline, indent_level);
} else if (xpp.getText() == "ol" || xpp.getText() == "ul") {
inside_list = false;
if (inside_li) {
indent_level-=2;
if (curline.length() > 0)
lines.push_back(curline);
prepare_newline(curline, indent_level);
}
if (curline.length() > 0)
lines.push_back(curline);
lines.push_back(std::string(""));
prepare_newline(curline, indent_level);
} else if (xpp.getText() == "li") {
indent_level-=2;
inside_li = false;
if (curline.length() > 0)
lines.push_back(curline);
prepare_newline(curline, indent_level);
} else if (xpp.getText() == "p") {
if (curline.length() > 0)
lines.push_back(curline);
prepare_newline(curline, indent_level);
}
break;
case xmlpullparser::TEXT:
{
std::vector words = utils::tokenize(xpp.getText());
unsigned int i=0;
for (std::vector::iterator it=words.begin();it!=words.end();++it,++i) {
if ((curline.length() + it->length()) >= w) {
if (curline.length() > 0)
lines.push_back(curline);
prepare_newline(curline, indent_level);
}
curline.append(*it);
if (i < words.size()-1)
curline.append(" ");
}
}
break;
default:
/* do nothing */
break;
}
}
if (curline.length() > 0)
lines.push_back(curline);
if (links.size() > 0) {
lines.push_back(std::string(""));
lines.push_back(std::string("Links: "));
for (unsigned int i=0;i