summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGravatar Andreas Krennmair <ak@synflood.at> 2009-10-22 23:54:55 +0200
committerGravatar Andreas Krennmair <ak@synflood.at> 2009-10-22 23:55:40 +0200
commit877f67a93ffacfb0cc7256f7b255e0bb54362216 (patch)
tree10ef59d53cd8baee65bb9c55c9d4d12c90a73cc5
parent58abaa50b1c1556a6e31084a82df5bbdbd68db8b (diff)
downloadnewsboat-877f67a93ffacfb0cc7256f7b255e0bb54362216.tar.gz
newsboat-877f67a93ffacfb0cc7256f7b255e0bb54362216.tar.zst
newsboat-877f67a93ffacfb0cc7256f7b255e0bb54362216.zip
tag soup pull parser: added support for spaces in attributes.
-rw-r--r--src/tagsouppullparser.cpp51
-rw-r--r--test/test.cpp11
2 files changed, 49 insertions, 13 deletions
diff --git a/src/tagsouppullparser.cpp b/src/tagsouppullparser.cpp
index b5c42133..3d9e5148 100644
--- a/src/tagsouppullparser.cpp
+++ b/src/tagsouppullparser.cpp
@@ -317,20 +317,47 @@ void tagsouppullparser::remove_trailing_whitespace(std::string& s) {
}
void tagsouppullparser::parse_tag(const std::string& tagstr) {
- std::vector<std::string> tokens = utils::tokenize(tagstr);
- if (tokens.size() > 0) {
- text = tokens[0];
- if (tokens.size() > 1) {
- std::vector<std::string>::iterator it = tokens.begin();
- ++it;
- while (it != tokens.end()) {
- add_attribute(*it);
- ++it;
- }
+ std::string::size_type last_pos = tagstr.find_first_not_of(" \r\n\t", 0);
+ std::string::size_type pos = tagstr.find_first_of(" \r\n\t", last_pos);
+ unsigned int count = 0;
+
+ LOG(LOG_DEBUG, "parse_tag: parsing '%s', pos = %d, last_pos = %d", tagstr.c_str(), pos, last_pos);
+
+ while (last_pos != std::string::npos) {
+ if (count == 0) {
+ // first token: tag name
+ if (pos == std::string::npos)
+ pos = tagstr.length();
+ text = tagstr.substr(last_pos, pos - last_pos);
+ LOG(LOG_DEBUG, "parse_tag: tag name = %s", text.c_str());
} else {
- if (text.length() > 0 && text[text.length()-1] == '/')
- text.erase(text.length()-1, 1);
+ pos = tagstr.find_first_of("= ", last_pos);
+ std::string attr;
+ if (pos != std::string::npos) {
+ LOG(LOG_DEBUG, "parse_tag: found = or space");
+ if (tagstr[pos] == '=') {
+ LOG(LOG_DEBUG, "parse_tag: found =");
+ if (tagstr[pos+1] == '\'' || tagstr[pos+1] == '"') {
+ pos = tagstr.find_first_of("'\"", pos+2);
+ if (pos != std::string::npos)
+ pos++;
+ LOG(LOG_DEBUG, "parse_tag: finding ending quote, pos = %d", pos);
+ } else {
+ pos = tagstr.find_first_of(" \r\n\t", pos+1);
+ LOG(LOG_DEBUG, "parse_tag: finding end of unquoted attribute");
+ }
+ }
+ }
+ if (pos == std::string::npos) {
+ LOG(LOG_DEBUG, "parse_tag: found end of string, correcting end position");
+ pos = tagstr.length();
+ }
+ attr = tagstr.substr(last_pos, pos - last_pos);
+ LOG(LOG_DEBUG, "parse_tag: extracted attribute is '%s', adding", attr.c_str());
+ add_attribute(attr);
}
+ last_pos = tagstr.find_first_not_of(" \r\n\t", pos);
+ count++;
}
}
diff --git a/test/test.cpp b/test/test.cpp
index d5ad44be..b4a00926 100644
--- a/test/test.cpp
+++ b/test/test.cpp
@@ -180,7 +180,7 @@ BOOST_AUTO_TEST_CASE(TestConfigParserContainerAndKeymap) {
}
BOOST_AUTO_TEST_CASE(TestTagSoupPullParser) {
- std::istringstream is("<test><foo quux='asdf' bar=\"qqq\">text</foo>more text<more>&quot;&#33;&#x40;</more></test>");
+ std::istringstream is("<test><foo quux='asdf' bar=\"qqq\">text</foo>more text<more>&quot;&#33;&#x40;</more><xxx foo=bar baz=\"qu ux\" hi='ho ho ho'></xxx></test>");
tagsouppullparser xpp;
tagsouppullparser::event e;
xpp.setInput(is);
@@ -214,6 +214,15 @@ BOOST_AUTO_TEST_CASE(TestTagSoupPullParser) {
BOOST_CHECK_EQUAL(e, tagsouppullparser::END_TAG);
BOOST_CHECK_EQUAL(xpp.getText(), "more");
e = xpp.next();
+ BOOST_CHECK_EQUAL(e, tagsouppullparser::START_TAG);
+ BOOST_CHECK_EQUAL(xpp.getText(), "xxx");
+ BOOST_CHECK_EQUAL(xpp.getAttributeValue("foo"), "bar");
+ BOOST_CHECK_EQUAL(xpp.getAttributeValue("baz"), "qu ux");
+ BOOST_CHECK_EQUAL(xpp.getAttributeValue("hi"), "ho ho ho");
+ e = xpp.next();
+ BOOST_CHECK_EQUAL(e, tagsouppullparser::END_TAG);
+ BOOST_CHECK_EQUAL(xpp.getText(), "xxx");
+ e = xpp.next();
BOOST_CHECK_EQUAL(e, tagsouppullparser::END_TAG);
BOOST_CHECK_EQUAL(xpp.getText(), "test");
e = xpp.next();