diff options
author | 2019-09-28 23:17:16 +0300 | |
---|---|---|
committer | 2019-10-02 13:39:27 +0300 | |
commit | 2aa982e67a2602bdfab78ceaa0344c880385392d (patch) | |
tree | cac549e40534222afdc1df32109c2a18e461a72c | |
parent | 1e08618a01d0521d1972de0a8cb1a100c857e660 (diff) | |
download | newsboat-2aa982e67a2602bdfab78ceaa0344c880385392d.tar.gz newsboat-2aa982e67a2602bdfab78ceaa0344c880385392d.tar.zst newsboat-2aa982e67a2602bdfab78ceaa0344c880385392d.zip |
Ignore # chars inside double quotes and backticks
Fixes #652.
-rw-r--r-- | rust/libnewsboat-ffi/src/utils.rs | 2 | ||||
-rw-r--r-- | rust/libnewsboat/src/utils.rs | 66 | ||||
-rw-r--r-- | test/utils.cpp | 58 |
3 files changed, 118 insertions, 8 deletions
diff --git a/rust/libnewsboat-ffi/src/utils.rs b/rust/libnewsboat-ffi/src/utils.rs index 40a7946d..b75c94dc 100644 --- a/rust/libnewsboat-ffi/src/utils.rs +++ b/rust/libnewsboat-ffi/src/utils.rs @@ -500,7 +500,7 @@ pub unsafe extern "C" fn rs_strip_comments(line: *const c_char) -> *mut c_char { // `result` contains a subset of `line`, which is a C string. Thus, we conclude that // `result` doesn't contain null bytes. Therefore, `CString::new` always returns `Some`. - let result = CString::new(result.into_owned()).unwrap(); + let result = CString::new(result).unwrap(); result.into_raw() }) } diff --git a/rust/libnewsboat/src/utils.rs b/rust/libnewsboat/src/utils.rs index 40f6ad39..c28047d9 100644 --- a/rust/libnewsboat/src/utils.rs +++ b/rust/libnewsboat/src/utils.rs @@ -16,7 +16,6 @@ use self::url::percent_encoding::*; use self::url::Url; use libc::c_ulong; use logger::{self, Level}; -use std::borrow::Cow; use std::fs::DirBuilder; use std::io::{self, Write}; use std::os::unix::fs::DirBuilderExt; @@ -547,13 +546,41 @@ pub fn newsboat_major_version() -> u32 { } /// Returns the part of the string before first # character (or the whole input string if there are -/// no # character in it). -pub fn strip_comments(line: &str) -> Cow<str> { - if let Some(index) = line.find('#') { - Cow::from(&line[0..index]) - } else { - Cow::from(line) +/// no # character in it). Pound characters inside double quotes and backticks are ignored. +pub fn strip_comments(line: &str) -> &str { + let mut prev_was_backslash = false; + let mut inside_quotes = false; + let mut inside_backticks = false; + + let mut first_pound_chr_idx = line.len(); + + for (idx, chr) in line.char_indices() { + if chr == '\\' { + prev_was_backslash = true; + continue; + } else if chr == '"' { + // If the quote is escaped, do nothing + if !prev_was_backslash { + inside_quotes = !inside_quotes; + } + } else if chr == '`' { + // If the backtick is escaped, do nothing + if !prev_was_backslash { + inside_backticks = !inside_backticks; + } + } else if chr == '#' { + if !inside_quotes && !inside_backticks { + first_pound_chr_idx = idx; + break; + } + } + + // We call `continue` when we run into a backslash; here, we handle all the other + // characters, which clearly *aren't* a backslash + prev_was_backslash = false; } + + &line[0..first_pound_chr_idx] } #[cfg(test)] @@ -1073,5 +1100,30 @@ mod tests { "directive " ); assert_eq!(strip_comments("directive#comment"), "directive"); + + // ignores # characters inside double quotes (#652) + let expected = r#"highlight article "[-=+#_*~]{3,}.*" green default"#; + let input = expected.to_owned() + "# this is a comment"; + assert_eq!(strip_comments(&input), expected); + + let expected = + r#"highlight all "(https?|ftp)://[\-\.,/%~_:?&=\#a-zA-Z0-9]+" blue default bold"#; + let input = expected.to_owned() + "#heresacomment"; + assert_eq!(strip_comments(&input), expected); + + // Escaped double quote inside double quotes is not treated as closing quote + let expected = r#"test "here \"goes # nothing\" etc" hehe"#; + let input = expected.to_owned() + "# and here is a comment"; + assert_eq!(strip_comments(&input), expected); + + // Ignores # characters inside backticks + let expected = r#"one `two # three` four"#; + let input = expected.to_owned() + "# and a comment, of course"; + assert_eq!(strip_comments(&input), expected); + + // Escaped backtick inside backticks is not treated as closing + let expected = r#"some `other \` tricky # test` hehe"#; + let input = expected.to_owned() + "#here goescomment"; + assert_eq!(strip_comments(&input), expected); } } diff --git a/test/utils.cpp b/test/utils.cpp index e9c8c587..97f7b0b6 100644 --- a/test/utils.cpp +++ b/test/utils.cpp @@ -219,6 +219,64 @@ TEST_CASE( } } +TEST_CASE("strip_comments ignores # characters inside double quotes", + "[utils][issue652]") +{ + SECTION("Real-world cases from issue 652") { + const auto expected1 = + std::string(R"#(highlight article "[-=+#_*~]{3,}.*" green default)#"); + const auto input1 = expected1 + "# this is a comment"; + REQUIRE(utils::strip_comments(input1) == expected1); + + const auto expected2 = + std::string(R"#(highlight all "(https?|ftp)://[\-\.,/%~_:?&=\#a-zA-Z0-9]+" blue default bold)#"); + const auto input2 = expected2 + "#heresacomment"; + REQUIRE(utils::strip_comments(input2) == expected2); + } + + SECTION("Escaped double quote inside double quotes is not treated " + "as closing quote") + { + const auto expected = + std::string(R"#(test "here \"goes # nothing\" etc" hehe)#"); + const auto input = expected + "# and here is a comment"; + REQUIRE(utils::strip_comments(input) == expected); + } +} + +TEST_CASE("strip_comments ignores # characters inside backticks", "[utils]") +{ + SECTION("Simple case") { + const auto expected = std::string(R"#(one `two # three` four)#"); + const auto input = expected + "# and a comment, of course"; + REQUIRE(utils::strip_comments(input) == expected); + } + + SECTION("Escaped backtick inside backticks is not treated as closing") + { + const auto expected = + std::string(R"#(some `other \` tricky # test` hehe)#"); + const auto input = expected + "#here goescomment"; + REQUIRE(utils::strip_comments(input) == expected); + } +} + +TEST_CASE("strip_comments is not confused by nested double quotes and backticks", + "[utils]") +{ + { + const auto expected = std::string(R"#("`" ... ` )#"); + const auto input = expected + "#comment"; + REQUIRE(utils::strip_comments(input) == expected); + } + + { + const auto expected = std::string(R"#(aaa ` bbb "ccc ddd" e` dd)#"); + const auto input = expected + "# a comment string"; + REQUIRE(utils::strip_comments(input) == expected); + } +} + TEST_CASE( "consolidate_whitespace replaces multiple consecutive" "whitespace with a single space", |