tag soup pull parser: added support for spaces in attributes.

author: Andreas Krennmair <ak@synflood.at> 2009-10-22 23:54:55 +0200
committer: Andreas Krennmair <ak@synflood.at> 2009-10-22 23:55:40 +0200
commit: 877f67a93ffacfb0cc7256f7b255e0bb54362216 (patch)
tree: 10ef59d53cd8baee65bb9c55c9d4d12c90a73cc5
parent: 58abaa50b1c1556a6e31084a82df5bbdbd68db8b (diff)
download: newsboat-877f67a93ffacfb0cc7256f7b255e0bb54362216.tar.gz
newsboat-877f67a93ffacfb0cc7256f7b255e0bb54362216.tar.zst
newsboat-877f67a93ffacfb0cc7256f7b255e0bb54362216.zip
2 files changed, 49 insertions, 13 deletions
diff --git a/src/tagsouppullparser.cpp b/src/tagsouppullparser.cpp
index b5c42133..3d9e5148 100644
--- a/src/tagsouppullparser.cpp
+++ b/src/tagsouppullparser.cpp
@@ -317,20 +317,47 @@ void tagsouppullparser::remove_trailing_whitespace(std::string& s) {
 }
 
 void tagsouppullparser::parse_tag(const std::string& tagstr) {
-	std::vector<std::string> tokens = utils::tokenize(tagstr);
-	if (tokens.size() > 0) {
-		text = tokens[0];
-		if (tokens.size() > 1) {
-			std::vector<std::string>::iterator it = tokens.begin();
-			++it;
-			while (it != tokens.end()) {
-				add_attribute(*it);
-				++it;	
-			}
+	std::string::size_type last_pos = tagstr.find_first_not_of(" \r\n\t", 0);
+	std::string::size_type pos = tagstr.find_first_of(" \r\n\t", last_pos);
+	unsigned int count = 0;
+
+	LOG(LOG_DEBUG, "parse_tag: parsing '%s', pos = %d, last_pos = %d", tagstr.c_str(), pos, last_pos);
+
+	while (last_pos != std::string::npos) {
+		if (count == 0) {
+			// first token: tag name
+			if (pos == std::string::npos)
+				pos = tagstr.length();
+			text = tagstr.substr(last_pos, pos - last_pos);
+			LOG(LOG_DEBUG, "parse_tag: tag name = %s", text.c_str());
 		} else {
-			if (text.length() > 0 && text[text.length()-1] == '/')
-				text.erase(text.length()-1, 1);
+			pos = tagstr.find_first_of("= ", last_pos);
+			std::string attr;
+			if (pos != std::string::npos) {
+				LOG(LOG_DEBUG, "parse_tag: found = or space");
+				if (tagstr[pos] == '=') {
+					LOG(LOG_DEBUG, "parse_tag: found =");
+					if (tagstr[pos+1] == '\'' || tagstr[pos+1] == '"') {
+						pos = tagstr.find_first_of("'\"", pos+2);
+						if (pos != std::string::npos)
+							pos++;
+						LOG(LOG_DEBUG, "parse_tag: finding ending quote, pos = %d", pos);
+					} else {
+						pos = tagstr.find_first_of(" \r\n\t", pos+1);
+						LOG(LOG_DEBUG, "parse_tag: finding end of unquoted attribute");
+					}
+				}
+			}
+			if (pos == std::string::npos) {
+				LOG(LOG_DEBUG, "parse_tag: found end of string, correcting end position");
+				pos = tagstr.length();
+			}
+			attr = tagstr.substr(last_pos, pos - last_pos);
+			LOG(LOG_DEBUG, "parse_tag: extracted attribute is '%s', adding", attr.c_str());
+			add_attribute(attr);
 		}
+		last_pos = tagstr.find_first_not_of(" \r\n\t", pos);
+		count++;
 	}
 }
 
diff --git a/test/test.cpp b/test/test.cpp
index d5ad44be..b4a00926 100644
--- a/test/test.cpp
+++ b/test/test.cpp
@@ -180,7 +180,7 @@ BOOST_AUTO_TEST_CASE(TestConfigParserContainerAndKeymap) {
 }
 
 BOOST_AUTO_TEST_CASE(TestTagSoupPullParser) {
-	std::istringstream is("<test><foo quux='asdf' bar=\"qqq\">text</foo>more text<more>&quot;&#33;&#x40;</more></test>");
+	std::istringstream is("<test><foo quux='asdf' bar=\"qqq\">text</foo>more text<more>&quot;&#33;&#x40;</more><xxx foo=bar baz=\"qu ux\" hi='ho ho ho'></xxx></test>");
 	tagsouppullparser xpp;
 	tagsouppullparser::event e;
 	xpp.setInput(is);
@@ -214,6 +214,15 @@ BOOST_AUTO_TEST_CASE(TestTagSoupPullParser) {
 	BOOST_CHECK_EQUAL(e, tagsouppullparser::END_TAG);
 	BOOST_CHECK_EQUAL(xpp.getText(), "more");
 	e = xpp.next();
+	BOOST_CHECK_EQUAL(e, tagsouppullparser::START_TAG);
+	BOOST_CHECK_EQUAL(xpp.getText(), "xxx");
+	BOOST_CHECK_EQUAL(xpp.getAttributeValue("foo"), "bar");
+	BOOST_CHECK_EQUAL(xpp.getAttributeValue("baz"), "qu ux");
+	BOOST_CHECK_EQUAL(xpp.getAttributeValue("hi"), "ho ho ho");
+	e = xpp.next();
+	BOOST_CHECK_EQUAL(e, tagsouppullparser::END_TAG);
+	BOOST_CHECK_EQUAL(xpp.getText(), "xxx");
+	e = xpp.next();
 	BOOST_CHECK_EQUAL(e, tagsouppullparser::END_TAG);
 	BOOST_CHECK_EQUAL(xpp.getText(), "test");
 	e = xpp.next();
author	Andreas Krennmair <ak@synflood.at>	2009-10-22 23:54:55 +0200
committer	Andreas Krennmair <ak@synflood.at>	2009-10-22 23:55:40 +0200
commit	877f67a93ffacfb0cc7256f7b255e0bb54362216 (patch)
tree	10ef59d53cd8baee65bb9c55c9d4d12c90a73cc5
parent	58abaa50b1c1556a6e31084a82df5bbdbd68db8b (diff)
download	newsboat-877f67a93ffacfb0cc7256f7b255e0bb54362216.tar.gz newsboat-877f67a93ffacfb0cc7256f7b255e0bb54362216.tar.zst newsboat-877f67a93ffacfb0cc7256f7b255e0bb54362216.zip