summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGravatar Alexander Batischev <eual.jp@gmail.com> 2019-09-28 23:17:16 +0300
committerGravatar Alexander Batischev <eual.jp@gmail.com> 2019-10-02 13:39:27 +0300
commit2aa982e67a2602bdfab78ceaa0344c880385392d (patch)
treecac549e40534222afdc1df32109c2a18e461a72c
parent1e08618a01d0521d1972de0a8cb1a100c857e660 (diff)
downloadnewsboat-2aa982e67a2602bdfab78ceaa0344c880385392d.tar.gz
newsboat-2aa982e67a2602bdfab78ceaa0344c880385392d.tar.zst
newsboat-2aa982e67a2602bdfab78ceaa0344c880385392d.zip
Ignore # chars inside double quotes and backticks
Fixes #652.
-rw-r--r--rust/libnewsboat-ffi/src/utils.rs2
-rw-r--r--rust/libnewsboat/src/utils.rs66
-rw-r--r--test/utils.cpp58
3 files changed, 118 insertions, 8 deletions
diff --git a/rust/libnewsboat-ffi/src/utils.rs b/rust/libnewsboat-ffi/src/utils.rs
index 40a7946d..b75c94dc 100644
--- a/rust/libnewsboat-ffi/src/utils.rs
+++ b/rust/libnewsboat-ffi/src/utils.rs
@@ -500,7 +500,7 @@ pub unsafe extern "C" fn rs_strip_comments(line: *const c_char) -> *mut c_char {
// `result` contains a subset of `line`, which is a C string. Thus, we conclude that
// `result` doesn't contain null bytes. Therefore, `CString::new` always returns `Some`.
- let result = CString::new(result.into_owned()).unwrap();
+ let result = CString::new(result).unwrap();
result.into_raw()
})
}
diff --git a/rust/libnewsboat/src/utils.rs b/rust/libnewsboat/src/utils.rs
index 40f6ad39..c28047d9 100644
--- a/rust/libnewsboat/src/utils.rs
+++ b/rust/libnewsboat/src/utils.rs
@@ -16,7 +16,6 @@ use self::url::percent_encoding::*;
use self::url::Url;
use libc::c_ulong;
use logger::{self, Level};
-use std::borrow::Cow;
use std::fs::DirBuilder;
use std::io::{self, Write};
use std::os::unix::fs::DirBuilderExt;
@@ -547,13 +546,41 @@ pub fn newsboat_major_version() -> u32 {
}
/// Returns the part of the string before first # character (or the whole input string if there are
-/// no # character in it).
-pub fn strip_comments(line: &str) -> Cow<str> {
- if let Some(index) = line.find('#') {
- Cow::from(&line[0..index])
- } else {
- Cow::from(line)
+/// no # character in it). Pound characters inside double quotes and backticks are ignored.
+pub fn strip_comments(line: &str) -> &str {
+ let mut prev_was_backslash = false;
+ let mut inside_quotes = false;
+ let mut inside_backticks = false;
+
+ let mut first_pound_chr_idx = line.len();
+
+ for (idx, chr) in line.char_indices() {
+ if chr == '\\' {
+ prev_was_backslash = true;
+ continue;
+ } else if chr == '"' {
+ // If the quote is escaped, do nothing
+ if !prev_was_backslash {
+ inside_quotes = !inside_quotes;
+ }
+ } else if chr == '`' {
+ // If the backtick is escaped, do nothing
+ if !prev_was_backslash {
+ inside_backticks = !inside_backticks;
+ }
+ } else if chr == '#' {
+ if !inside_quotes && !inside_backticks {
+ first_pound_chr_idx = idx;
+ break;
+ }
+ }
+
+ // We call `continue` when we run into a backslash; here, we handle all the other
+ // characters, which clearly *aren't* a backslash
+ prev_was_backslash = false;
}
+
+ &line[0..first_pound_chr_idx]
}
#[cfg(test)]
@@ -1073,5 +1100,30 @@ mod tests {
"directive "
);
assert_eq!(strip_comments("directive#comment"), "directive");
+
+ // ignores # characters inside double quotes (#652)
+ let expected = r#"highlight article "[-=+#_*~]{3,}.*" green default"#;
+ let input = expected.to_owned() + "# this is a comment";
+ assert_eq!(strip_comments(&input), expected);
+
+ let expected =
+ r#"highlight all "(https?|ftp)://[\-\.,/%~_:?&=\#a-zA-Z0-9]+" blue default bold"#;
+ let input = expected.to_owned() + "#heresacomment";
+ assert_eq!(strip_comments(&input), expected);
+
+ // Escaped double quote inside double quotes is not treated as closing quote
+ let expected = r#"test "here \"goes # nothing\" etc" hehe"#;
+ let input = expected.to_owned() + "# and here is a comment";
+ assert_eq!(strip_comments(&input), expected);
+
+ // Ignores # characters inside backticks
+ let expected = r#"one `two # three` four"#;
+ let input = expected.to_owned() + "# and a comment, of course";
+ assert_eq!(strip_comments(&input), expected);
+
+ // Escaped backtick inside backticks is not treated as closing
+ let expected = r#"some `other \` tricky # test` hehe"#;
+ let input = expected.to_owned() + "#here goescomment";
+ assert_eq!(strip_comments(&input), expected);
}
}
diff --git a/test/utils.cpp b/test/utils.cpp
index e9c8c587..97f7b0b6 100644
--- a/test/utils.cpp
+++ b/test/utils.cpp
@@ -219,6 +219,64 @@ TEST_CASE(
}
}
+TEST_CASE("strip_comments ignores # characters inside double quotes",
+ "[utils][issue652]")
+{
+ SECTION("Real-world cases from issue 652") {
+ const auto expected1 =
+ std::string(R"#(highlight article "[-=+#_*~]{3,}.*" green default)#");
+ const auto input1 = expected1 + "# this is a comment";
+ REQUIRE(utils::strip_comments(input1) == expected1);
+
+ const auto expected2 =
+ std::string(R"#(highlight all "(https?|ftp)://[\-\.,/%~_:?&=\#a-zA-Z0-9]+" blue default bold)#");
+ const auto input2 = expected2 + "#heresacomment";
+ REQUIRE(utils::strip_comments(input2) == expected2);
+ }
+
+ SECTION("Escaped double quote inside double quotes is not treated "
+ "as closing quote")
+ {
+ const auto expected =
+ std::string(R"#(test "here \"goes # nothing\" etc" hehe)#");
+ const auto input = expected + "# and here is a comment";
+ REQUIRE(utils::strip_comments(input) == expected);
+ }
+}
+
+TEST_CASE("strip_comments ignores # characters inside backticks", "[utils]")
+{
+ SECTION("Simple case") {
+ const auto expected = std::string(R"#(one `two # three` four)#");
+ const auto input = expected + "# and a comment, of course";
+ REQUIRE(utils::strip_comments(input) == expected);
+ }
+
+ SECTION("Escaped backtick inside backticks is not treated as closing")
+ {
+ const auto expected =
+ std::string(R"#(some `other \` tricky # test` hehe)#");
+ const auto input = expected + "#here goescomment";
+ REQUIRE(utils::strip_comments(input) == expected);
+ }
+}
+
+TEST_CASE("strip_comments is not confused by nested double quotes and backticks",
+ "[utils]")
+{
+ {
+ const auto expected = std::string(R"#("`" ... ` )#");
+ const auto input = expected + "#comment";
+ REQUIRE(utils::strip_comments(input) == expected);
+ }
+
+ {
+ const auto expected = std::string(R"#(aaa ` bbb "ccc ddd" e` dd)#");
+ const auto input = expected + "# a comment string";
+ REQUIRE(utils::strip_comments(input) == expected);
+ }
+}
+
TEST_CASE(
"consolidate_whitespace replaces multiple consecutive"
"whitespace with a single space",