diff options
author | 2009-10-22 23:54:55 +0200 | |
---|---|---|
committer | 2009-10-22 23:55:40 +0200 | |
commit | 877f67a93ffacfb0cc7256f7b255e0bb54362216 (patch) | |
tree | 10ef59d53cd8baee65bb9c55c9d4d12c90a73cc5 | |
parent | 58abaa50b1c1556a6e31084a82df5bbdbd68db8b (diff) | |
download | newsboat-877f67a93ffacfb0cc7256f7b255e0bb54362216.tar.gz newsboat-877f67a93ffacfb0cc7256f7b255e0bb54362216.tar.zst newsboat-877f67a93ffacfb0cc7256f7b255e0bb54362216.zip |
tag soup pull parser: added support for spaces in attributes.
-rw-r--r-- | src/tagsouppullparser.cpp | 51 | ||||
-rw-r--r-- | test/test.cpp | 11 |
2 files changed, 49 insertions, 13 deletions
diff --git a/src/tagsouppullparser.cpp b/src/tagsouppullparser.cpp index b5c42133..3d9e5148 100644 --- a/src/tagsouppullparser.cpp +++ b/src/tagsouppullparser.cpp @@ -317,20 +317,47 @@ void tagsouppullparser::remove_trailing_whitespace(std::string& s) { } void tagsouppullparser::parse_tag(const std::string& tagstr) { - std::vector<std::string> tokens = utils::tokenize(tagstr); - if (tokens.size() > 0) { - text = tokens[0]; - if (tokens.size() > 1) { - std::vector<std::string>::iterator it = tokens.begin(); - ++it; - while (it != tokens.end()) { - add_attribute(*it); - ++it; - } + std::string::size_type last_pos = tagstr.find_first_not_of(" \r\n\t", 0); + std::string::size_type pos = tagstr.find_first_of(" \r\n\t", last_pos); + unsigned int count = 0; + + LOG(LOG_DEBUG, "parse_tag: parsing '%s', pos = %d, last_pos = %d", tagstr.c_str(), pos, last_pos); + + while (last_pos != std::string::npos) { + if (count == 0) { + // first token: tag name + if (pos == std::string::npos) + pos = tagstr.length(); + text = tagstr.substr(last_pos, pos - last_pos); + LOG(LOG_DEBUG, "parse_tag: tag name = %s", text.c_str()); } else { - if (text.length() > 0 && text[text.length()-1] == '/') - text.erase(text.length()-1, 1); + pos = tagstr.find_first_of("= ", last_pos); + std::string attr; + if (pos != std::string::npos) { + LOG(LOG_DEBUG, "parse_tag: found = or space"); + if (tagstr[pos] == '=') { + LOG(LOG_DEBUG, "parse_tag: found ="); + if (tagstr[pos+1] == '\'' || tagstr[pos+1] == '"') { + pos = tagstr.find_first_of("'\"", pos+2); + if (pos != std::string::npos) + pos++; + LOG(LOG_DEBUG, "parse_tag: finding ending quote, pos = %d", pos); + } else { + pos = tagstr.find_first_of(" \r\n\t", pos+1); + LOG(LOG_DEBUG, "parse_tag: finding end of unquoted attribute"); + } + } + } + if (pos == std::string::npos) { + LOG(LOG_DEBUG, "parse_tag: found end of string, correcting end position"); + pos = tagstr.length(); + } + attr = tagstr.substr(last_pos, pos - last_pos); + LOG(LOG_DEBUG, "parse_tag: extracted attribute is '%s', adding", attr.c_str()); + add_attribute(attr); } + last_pos = tagstr.find_first_not_of(" \r\n\t", pos); + count++; } } diff --git a/test/test.cpp b/test/test.cpp index d5ad44be..b4a00926 100644 --- a/test/test.cpp +++ b/test/test.cpp @@ -180,7 +180,7 @@ BOOST_AUTO_TEST_CASE(TestConfigParserContainerAndKeymap) { } BOOST_AUTO_TEST_CASE(TestTagSoupPullParser) { - std::istringstream is("<test><foo quux='asdf' bar=\"qqq\">text</foo>more text<more>"!@</more></test>"); + std::istringstream is("<test><foo quux='asdf' bar=\"qqq\">text</foo>more text<more>"!@</more><xxx foo=bar baz=\"qu ux\" hi='ho ho ho'></xxx></test>"); tagsouppullparser xpp; tagsouppullparser::event e; xpp.setInput(is); @@ -214,6 +214,15 @@ BOOST_AUTO_TEST_CASE(TestTagSoupPullParser) { BOOST_CHECK_EQUAL(e, tagsouppullparser::END_TAG); BOOST_CHECK_EQUAL(xpp.getText(), "more"); e = xpp.next(); + BOOST_CHECK_EQUAL(e, tagsouppullparser::START_TAG); + BOOST_CHECK_EQUAL(xpp.getText(), "xxx"); + BOOST_CHECK_EQUAL(xpp.getAttributeValue("foo"), "bar"); + BOOST_CHECK_EQUAL(xpp.getAttributeValue("baz"), "qu ux"); + BOOST_CHECK_EQUAL(xpp.getAttributeValue("hi"), "ho ho ho"); + e = xpp.next(); + BOOST_CHECK_EQUAL(e, tagsouppullparser::END_TAG); + BOOST_CHECK_EQUAL(xpp.getText(), "xxx"); + e = xpp.next(); BOOST_CHECK_EQUAL(e, tagsouppullparser::END_TAG); BOOST_CHECK_EQUAL(xpp.getText(), "test"); e = xpp.next(); |