#include #include #include #include #include #include using namespace newsbeuter; htmlrenderer::htmlrenderer(unsigned int width) : w(width) { } void htmlrenderer::render(const std::string& source, std::vector& lines) { std::istringstream input(source); render(input, lines); } void htmlrenderer::render(std::istream& input, std::vector& lines) { std::vector links; unsigned int link_count = 0; std::string curline; int indent_level = 0; bool inside_list = false, inside_li = false, is_ol = false; unsigned int ol_count = 1; xmlpullparser xpp; xpp.setInput(input); for (xmlpullparser::event e = xpp.next(); e != xmlpullparser::END_DOCUMENT; e = xpp.next()) { switch (e) { case xmlpullparser::START_TAG: if (xpp.getText() == "a") { std::string link; try { link = xpp.getAttributeValue("href"); } catch (const std::invalid_argument& ) { link = ""; } if (link.length() > 0) { links.push_back(link); std::ostringstream ref; ref << "[" << link_count << "]"; link_count++; curline.append(ref.str()); } } else if (xpp.getText() == "br") { if (curline.length() > 0) lines.push_back(curline); prepare_newline(curline, indent_level); } else if (xpp.getText() == "img") { std::string imgurl = xpp.getAttributeValue("src"); if (imgurl.length() > 0) { links.push_back(imgurl); std::ostringstream ref; ref << "[" << link_count << "]"; link_count++; curline.append(ref.str()); } } else if (xpp.getText() == "blockquote") { ++indent_level; if (curline.length() > 0) lines.push_back(curline); lines.push_back(std::string("")); prepare_newline(curline, indent_level); } else if (xpp.getText() == "pre") { if (curline.length() > 0) lines.push_back(curline); lines.push_back(std::string("")); prepare_newline(curline, indent_level); } else if (xpp.getText() == "p") { if (curline.length() > 0) lines.push_back(curline); if (lines.size() > 0 && lines[lines.size()-1].length() > static_cast(indent_level*2)) lines.push_back(std::string("")); prepare_newline(curline, indent_level); } else if (xpp.getText() == "ol") { inside_list = true; is_ol = true; ol_count = 1; if (curline.length() > 0) lines.push_back(curline); lines.push_back(std::string("")); prepare_newline(curline, indent_level); } else if (xpp.getText() == "ul") { inside_list = true; is_ol = false; if (curline.length() > 0) lines.push_back(curline); lines.push_back(std::string("")); prepare_newline(curline, indent_level); } else if (xpp.getText() == "li") { if (inside_li) { indent_level-=2; if (curline.length() > 0) lines.push_back(curline); prepare_newline(curline, indent_level); } inside_li = true; if (curline.length() > 0) lines.push_back(curline); prepare_newline(curline, indent_level); indent_level+=2; if (is_ol) { std::ostringstream num; num << ol_count; if (ol_count < 10) curline.append(" "); curline.append(num.str()); curline.append(". "); ++ol_count; } else { curline.append(" * "); } } break; case xmlpullparser::END_TAG: if (xpp.getText() == "blockquote") { --indent_level; if (indent_level < 0) indent_level = 0; if (curline.length() > 0) lines.push_back(curline); lines.push_back(std::string("")); prepare_newline(curline, indent_level); } else if (xpp.getText() == "pre") { if (curline.length() > 0) lines.push_back(curline); lines.push_back(std::string("")); prepare_newline(curline, indent_level); } else if (xpp.getText() == "ol" || xpp.getText() == "ul") { inside_list = false; if (inside_li) { indent_level-=2; if (curline.length() > 0) lines.push_back(curline); prepare_newline(curline, indent_level); } if (curline.length() > 0) lines.push_back(curline); lines.push_back(std::string("")); prepare_newline(curline, indent_level); } else if (xpp.getText() == "li") { indent_level-=2; inside_li = false; if (curline.length() > 0) lines.push_back(curline); prepare_newline(curline, indent_level); } else if (xpp.getText() == "p") { if (curline.length() > 0) lines.push_back(curline); prepare_newline(curline, indent_level); } break; case xmlpullparser::TEXT: { std::vector words = utils::tokenize(xpp.getText()); unsigned int i=0; for (std::vector::iterator it=words.begin();it!=words.end();++it,++i) { if ((curline.length() + it->length()) >= w) { if (curline.length() > 0) lines.push_back(curline); prepare_newline(curline, indent_level); } curline.append(*it); if (i < words.size()-1) curline.append(" "); } } break; default: /* do nothing */ break; } } if (curline.length() > 0) lines.push_back(curline); if (links.size() > 0) { lines.push_back(std::string("")); lines.push_back(std::string("Links: ")); for (unsigned int i=0;i