diff options
Diffstat (limited to 'rust/libnewsboat/src/utils.rs')
-rw-r--r-- | rust/libnewsboat/src/utils.rs | 90 |
1 files changed, 90 insertions, 0 deletions
diff --git a/rust/libnewsboat/src/utils.rs b/rust/libnewsboat/src/utils.rs index 91ee9cfe..d1ccca80 100644 --- a/rust/libnewsboat/src/utils.rs +++ b/rust/libnewsboat/src/utils.rs @@ -545,6 +545,44 @@ pub fn newsboat_major_version() -> u32 { env!("CARGO_PKG_VERSION_MAJOR").parse::<u32>().unwrap() } +/// Returns the part of the string before first # character (or the whole input string if there are +/// no # character in it). Pound characters inside double quotes and backticks are ignored. +pub fn strip_comments(line: &str) -> &str { + let mut prev_was_backslash = false; + let mut inside_quotes = false; + let mut inside_backticks = false; + + let mut first_pound_chr_idx = line.len(); + + for (idx, chr) in line.char_indices() { + if chr == '\\' { + prev_was_backslash = true; + continue; + } else if chr == '"' { + // If the quote is escaped or we're inside backticks, do nothing + if !prev_was_backslash && !inside_backticks { + inside_quotes = !inside_quotes; + } + } else if chr == '`' { + // If the backtick is escaped, do nothing + if !prev_was_backslash { + inside_backticks = !inside_backticks; + } + } else if chr == '#' { + if !prev_was_backslash && !inside_quotes && !inside_backticks { + first_pound_chr_idx = idx; + break; + } + } + + // We call `continue` when we run into a backslash; here, we handle all the other + // characters, which clearly *aren't* a backslash + prev_was_backslash = false; + } + + &line[0..first_pound_chr_idx] +} + #[cfg(test)] mod tests { extern crate tempfile; @@ -827,6 +865,7 @@ mod tests { get_command_output("a-program-that-is-guaranteed-to-not-exists"), "".to_string() ); + assert_eq!(get_command_output("echo c\" d e"), "".to_string()); } #[test] @@ -1042,4 +1081,55 @@ mod tests { assert_eq!(strnaturalcmp("aa10", "aa2"), Ordering::Greater); } + + #[test] + fn t_strip_comments() { + // no comments in line + assert_eq!(strip_comments(""), ""); + assert_eq!(strip_comments("\t\n"), "\t\n"); + assert_eq!(strip_comments("some directive "), "some directive "); + + // fully commented line + assert_eq!(strip_comments("#"), ""); + assert_eq!(strip_comments("# #"), ""); + assert_eq!(strip_comments("# comment"), ""); + + // partially commented line + assert_eq!(strip_comments("directive # comment"), "directive "); + assert_eq!( + strip_comments("directive # comment # another"), + "directive " + ); + assert_eq!(strip_comments("directive#comment"), "directive"); + + // ignores # characters inside double quotes (#652) + let expected = r#"highlight article "[-=+#_*~]{3,}.*" green default"#; + let input = expected.to_owned() + "# this is a comment"; + assert_eq!(strip_comments(&input), expected); + + let expected = + r#"highlight all "(https?|ftp)://[\-\.,/%~_:?&=\#a-zA-Z0-9]+" blue default bold"#; + let input = expected.to_owned() + "#heresacomment"; + assert_eq!(strip_comments(&input), expected); + + // Escaped double quote inside double quotes is not treated as closing quote + let expected = r#"test "here \"goes # nothing\" etc" hehe"#; + let input = expected.to_owned() + "# and here is a comment"; + assert_eq!(strip_comments(&input), expected); + + // Ignores # characters inside backticks + let expected = r#"one `two # three` four"#; + let input = expected.to_owned() + "# and a comment, of course"; + assert_eq!(strip_comments(&input), expected); + + // Escaped backtick inside backticks is not treated as closing + let expected = r#"some `other \` tricky # test` hehe"#; + let input = expected.to_owned() + "#here goescomment"; + assert_eq!(strip_comments(&input), expected); + + // Ignores escaped # characters (\\#) + let expected = r#"one two \# three four"#; + let input = expected.to_owned() + "# and a comment"; + assert_eq!(strip_comments(&input), expected); + } } |