From fcb758a62d8925a1dda2feee52e88cf915eac7ac Mon Sep 17 00:00:00 2001 From: Cabbache Date: Wed, 23 Oct 2024 17:15:16 +0200 Subject: [PATCH 1/7] add --histogram flag definition --- crates/core/flags/defs.rs | 37 ++++++++++++++++++++++++++++++++++++ crates/core/flags/lowargs.rs | 1 + 2 files changed, 38 insertions(+) diff --git a/crates/core/flags/defs.rs b/crates/core/flags/defs.rs index 9a196c491..5476f6e19 100644 --- a/crates/core/flags/defs.rs +++ b/crates/core/flags/defs.rs @@ -59,6 +59,7 @@ pub(super) const FLAGS: &[&dyn Flag] = &[ &ContextSeparator, &Count, &CountMatches, + &Histogram, &Crlf, &Debug, &DfaSizeLimit, @@ -1322,6 +1323,42 @@ given. } } +/// --histogram +#[derive(Debug)] +struct Histogram; + +impl Flag for Histogram { + fn is_switch(&self) -> bool { + false + } + fn name_short(&self) -> Option { + None + } + fn name_long(&self) -> &'static str { + "histogram" + } + fn doc_variable(&self) -> Option<&'static str> { + Some("NUM") + } + fn doc_category(&self) -> Category { + Category::OutputModes + } + fn doc_short(&self) -> &'static str { + r"Print a histogram of the matches" + } + fn doc_long(&self) -> &'static str { + r" +--histogram NUM means that the bins of the histograms are NUM characters wide. In the output the numbers are the counts in these bins. + " + } + + fn update(&self, v: FlagValue, args: &mut LowArgs) -> anyhow::Result<()> { + let binsize = convert::usize(&v.unwrap_value())?; + args.histogram = if binsize == 0 { None } else { Some(binsize) }; + Ok(()) + } +} + #[cfg(test)] #[test] fn test_count_matches() { diff --git a/crates/core/flags/lowargs.rs b/crates/core/flags/lowargs.rs index 184c96ae8..7ca29793c 100644 --- a/crates/core/flags/lowargs.rs +++ b/crates/core/flags/lowargs.rs @@ -59,6 +59,7 @@ pub(crate) struct LowArgs { pub(crate) globs: Vec, pub(crate) heading: Option, pub(crate) hidden: bool, + pub(crate) histogram: Option, pub(crate) hostname_bin: Option, pub(crate) hyperlink_format: HyperlinkFormat, pub(crate) iglobs: Vec, From e2659ff051f2b7b07f193415c53af9bdd9c12f26 Mon Sep 17 00:00:00 2001 From: Cabbache Date: Wed, 23 Oct 2024 19:04:38 +0200 Subject: [PATCH 2/7] print hello for histogram --- crates/core/flags/defs.rs | 3 ++- crates/core/flags/hiargs.rs | 4 +++- crates/core/flags/lowargs.rs | 4 +++- crates/printer/src/summary.rs | 11 ++++++++--- 4 files changed, 16 insertions(+), 6 deletions(-) diff --git a/crates/core/flags/defs.rs b/crates/core/flags/defs.rs index 5476f6e19..72fd36693 100644 --- a/crates/core/flags/defs.rs +++ b/crates/core/flags/defs.rs @@ -1354,7 +1354,8 @@ impl Flag for Histogram { fn update(&self, v: FlagValue, args: &mut LowArgs) -> anyhow::Result<()> { let binsize = convert::usize(&v.unwrap_value())?; - args.histogram = if binsize == 0 { None } else { Some(binsize) }; + args.histogram = binsize; + args.mode.update(Mode::Search(SearchMode::Histogram)); Ok(()) } } diff --git a/crates/core/flags/hiargs.rs b/crates/core/flags/hiargs.rs index df09dceda..4791cbf2d 100644 --- a/crates/core/flags/hiargs.rs +++ b/crates/core/flags/hiargs.rs @@ -203,6 +203,7 @@ impl HiArgs { SearchMode::FilesWithMatches | SearchMode::FilesWithoutMatch | SearchMode::Count + | SearchMode::Histogram | SearchMode::CountMatches => return false, SearchMode::JSON => return true, SearchMode::Standard => { @@ -569,9 +570,10 @@ impl HiArgs { SearchMode::FilesWithoutMatch => SummaryKind::PathWithoutMatch, SearchMode::Count => SummaryKind::Count, SearchMode::CountMatches => SummaryKind::CountMatches, + SearchMode::Histogram => SummaryKind::Histogram, SearchMode::JSON => { return Printer::JSON(self.printer_json(wtr)) - } + }, SearchMode::Standard => { return Printer::Standard(self.printer_standard(wtr)) } diff --git a/crates/core/flags/lowargs.rs b/crates/core/flags/lowargs.rs index 7ca29793c..77fc25919 100644 --- a/crates/core/flags/lowargs.rs +++ b/crates/core/flags/lowargs.rs @@ -59,7 +59,7 @@ pub(crate) struct LowArgs { pub(crate) globs: Vec, pub(crate) heading: Option, pub(crate) hidden: bool, - pub(crate) histogram: Option, + pub(crate) histogram: usize, pub(crate) hostname_bin: Option, pub(crate) hyperlink_format: HyperlinkFormat, pub(crate) iglobs: Vec, @@ -210,6 +210,8 @@ pub(crate) enum SearchMode { /// Show files containing at least one match and the total number of /// matches. CountMatches, + /// Show a histogram of the matches + Histogram, /// Print matches in a JSON lines format. JSON, } diff --git a/crates/printer/src/summary.rs b/crates/printer/src/summary.rs index 275419d4c..ebe086e78 100644 --- a/crates/printer/src/summary.rs +++ b/crates/printer/src/summary.rs @@ -71,6 +71,8 @@ pub enum SummaryKind { /// If the `path` setting is enabled, then the count is prefixed by the /// corresponding file path. CountMatches, + /// Show a histogram of the matches + Histogram, /// Show only the file path if and only if a match was found. /// /// This ignores the `path` setting and always shows the file path. If no @@ -101,7 +103,7 @@ impl SummaryKind { match *self { PathWithMatch | PathWithoutMatch => true, - Count | CountMatches | Quiet => false, + Count | CountMatches | Histogram | Quiet => false, } } @@ -111,7 +113,7 @@ impl SummaryKind { use self::SummaryKind::*; match *self { - CountMatches => true, + Histogram |CountMatches => true, Count | PathWithMatch | PathWithoutMatch | Quiet => false, } } @@ -123,7 +125,7 @@ impl SummaryKind { match *self { PathWithMatch | Quiet => true, - Count | CountMatches | PathWithoutMatch => false, + Count | CountMatches | Histogram | PathWithoutMatch => false, } } } @@ -788,6 +790,9 @@ impl<'p, 's, M: Matcher, W: WriteColor> Sink for SummarySink<'p, 's, M, W> { self.write_line_term(searcher)?; } } + SummaryKind::Histogram => { + self.write(b"hello")?; + }, SummaryKind::PathWithMatch => { if self.match_count > 0 { self.write_path_line(searcher)?; From d6ead98a5e4772226e9504972f906d68898f0153 Mon Sep 17 00:00:00 2001 From: Cabbache Date: Wed, 23 Oct 2024 19:45:52 +0200 Subject: [PATCH 3/7] implement histogram with hardcoded bin size --- crates/core/flags/hiargs.rs | 2 +- crates/printer/src/stats.rs | 20 ++++++++++++++++++++ crates/printer/src/summary.rs | 17 ++++++++++++++--- 3 files changed, 35 insertions(+), 4 deletions(-) diff --git a/crates/core/flags/hiargs.rs b/crates/core/flags/hiargs.rs index 4791cbf2d..487afe2b3 100644 --- a/crates/core/flags/hiargs.rs +++ b/crates/core/flags/hiargs.rs @@ -573,7 +573,7 @@ impl HiArgs { SearchMode::Histogram => SummaryKind::Histogram, SearchMode::JSON => { return Printer::JSON(self.printer_json(wtr)) - }, + } SearchMode::Standard => { return Printer::Standard(self.printer_standard(wtr)) } diff --git a/crates/printer/src/stats.rs b/crates/printer/src/stats.rs index 555401b3d..7fcfe2f34 100644 --- a/crates/printer/src/stats.rs +++ b/crates/printer/src/stats.rs @@ -1,4 +1,5 @@ use std::{ + collections::HashMap, ops::{Add, AddAssign}, time::Duration, }; @@ -17,6 +18,7 @@ pub struct Stats { bytes_searched: u64, bytes_printed: u64, matched_lines: u64, + histogram: HashMap, matches: u64, } @@ -33,6 +35,11 @@ impl Stats { self.elapsed.0 } + /// Returns a reference to the histogram + pub fn histogram(&self) -> &HashMap { + &self.histogram + } + /// Return the total number of searches executed. pub fn searches(&self) -> u64 { self.searches @@ -102,6 +109,11 @@ impl Stats { pub fn add_matches(&mut self, n: u64) { self.matches += n; } + + /// Add to the total number of matches. + pub fn increment_histogram(&mut self, entry: u64) { + self.histogram.entry(entry).and_modify(|c| *c += 1).or_insert(1); + } } impl Add for Stats { @@ -125,6 +137,14 @@ impl<'a> Add<&'a Stats> for Stats { bytes_printed: self.bytes_printed + rhs.bytes_printed, matched_lines: self.matched_lines + rhs.matched_lines, matches: self.matches + rhs.matches, + histogram: self + .histogram + .into_iter() + .chain(rhs.histogram.clone()) + .fold(std::collections::HashMap::new(), |mut acc, (k, v)| { + acc.entry(k).and_modify(|e| *e += v).or_insert(v); + acc + }), } } } diff --git a/crates/printer/src/summary.rs b/crates/printer/src/summary.rs index ebe086e78..5c2f7f1cc 100644 --- a/crates/printer/src/summary.rs +++ b/crates/printer/src/summary.rs @@ -113,7 +113,7 @@ impl SummaryKind { use self::SummaryKind::*; match *self { - Histogram |CountMatches => true, + Histogram | CountMatches => true, Count | PathWithMatch | PathWithoutMatch | Quiet => false, } } @@ -682,6 +682,7 @@ impl<'p, 's, M: Matcher, W: WriteColor> Sink for SummarySink<'p, 's, M, W> { self.match_count += 1; } if let Some(ref mut stats) = self.stats { + stats.increment_histogram(mat.absolute_byte_offset() / 500); stats.add_matches(sink_match_count); stats.add_matched_lines(mat.lines().count() as u64); } else if self.summary.config.kind.quit_early() { @@ -791,8 +792,18 @@ impl<'p, 's, M: Matcher, W: WriteColor> Sink for SummarySink<'p, 's, M, W> { } } SummaryKind::Histogram => { - self.write(b"hello")?; - }, + let stats = self + .stats + .as_ref() + .expect("Histogram should enable stats tracking"); + let histo_string = stats + .histogram() + .values() + .map(|v| v.to_string()) + .collect::>() + .join("\n"); + self.write(histo_string.as_bytes())?; + } SummaryKind::PathWithMatch => { if self.match_count > 0 { self.write_path_line(searcher)?; From 6fd3259202c4b3103bb1523bc6aec61bd1591b00 Mon Sep 17 00:00:00 2001 From: Cabbache Date: Wed, 23 Oct 2024 23:26:29 +0200 Subject: [PATCH 4/7] actually use the bin size parameter and display file paths --- crates/core/flags/defs.rs | 4 +-- crates/core/flags/hiargs.rs | 7 ++++- crates/core/flags/lowargs.rs | 2 +- crates/printer/src/summary.rs | 48 +++++++++++++++++++++++++---------- 4 files changed, 44 insertions(+), 17 deletions(-) diff --git a/crates/core/flags/defs.rs b/crates/core/flags/defs.rs index 72fd36693..09dc22f7f 100644 --- a/crates/core/flags/defs.rs +++ b/crates/core/flags/defs.rs @@ -1353,8 +1353,8 @@ impl Flag for Histogram { } fn update(&self, v: FlagValue, args: &mut LowArgs) -> anyhow::Result<()> { - let binsize = convert::usize(&v.unwrap_value())?; - args.histogram = binsize; + let binsize = convert::u64(&v.unwrap_value())?; + args.histogram_bin_size = Some(binsize); args.mode.update(Mode::Search(SearchMode::Histogram)); Ok(()) } diff --git a/crates/core/flags/hiargs.rs b/crates/core/flags/hiargs.rs index 487afe2b3..a9feca761 100644 --- a/crates/core/flags/hiargs.rs +++ b/crates/core/flags/hiargs.rs @@ -55,6 +55,7 @@ pub(crate) struct HiArgs { follow: bool, globs: ignore::overrides::Override, heading: bool, + histogram_bin_size: Option, hidden: bool, hyperlink_config: grep::printer::HyperlinkConfig, ignore_file_case_insensitive: bool, @@ -273,6 +274,7 @@ impl HiArgs { follow: low.follow, heading, hidden: low.hidden, + histogram_bin_size: low.histogram_bin_size, hyperlink_config, ignore_file: low.ignore_file, ignore_file_case_insensitive: low.ignore_file_case_insensitive, @@ -570,7 +572,10 @@ impl HiArgs { SearchMode::FilesWithoutMatch => SummaryKind::PathWithoutMatch, SearchMode::Count => SummaryKind::Count, SearchMode::CountMatches => SummaryKind::CountMatches, - SearchMode::Histogram => SummaryKind::Histogram, + SearchMode::Histogram => SummaryKind::Histogram( + self.histogram_bin_size + .expect("Histogram bin size must be specified"), + ), SearchMode::JSON => { return Printer::JSON(self.printer_json(wtr)) } diff --git a/crates/core/flags/lowargs.rs b/crates/core/flags/lowargs.rs index 77fc25919..25da1af4d 100644 --- a/crates/core/flags/lowargs.rs +++ b/crates/core/flags/lowargs.rs @@ -59,7 +59,7 @@ pub(crate) struct LowArgs { pub(crate) globs: Vec, pub(crate) heading: Option, pub(crate) hidden: bool, - pub(crate) histogram: usize, + pub(crate) histogram_bin_size: Option, pub(crate) hostname_bin: Option, pub(crate) hyperlink_format: HyperlinkFormat, pub(crate) iglobs: Vec, diff --git a/crates/printer/src/summary.rs b/crates/printer/src/summary.rs index 5c2f7f1cc..81e16edb3 100644 --- a/crates/printer/src/summary.rs +++ b/crates/printer/src/summary.rs @@ -72,7 +72,7 @@ pub enum SummaryKind { /// corresponding file path. CountMatches, /// Show a histogram of the matches - Histogram, + Histogram(u64), /// Show only the file path if and only if a match was found. /// /// This ignores the `path` setting and always shows the file path. If no @@ -103,7 +103,7 @@ impl SummaryKind { match *self { PathWithMatch | PathWithoutMatch => true, - Count | CountMatches | Histogram | Quiet => false, + Count | CountMatches | Histogram { .. } | Quiet => false, } } @@ -113,7 +113,7 @@ impl SummaryKind { use self::SummaryKind::*; match *self { - Histogram | CountMatches => true, + Histogram { .. } | CountMatches => true, Count | PathWithMatch | PathWithoutMatch | Quiet => false, } } @@ -125,7 +125,9 @@ impl SummaryKind { match *self { PathWithMatch | Quiet => true, - Count | CountMatches | Histogram | PathWithoutMatch => false, + Count | CountMatches | Histogram { .. } | PathWithoutMatch => { + false + } } } } @@ -682,9 +684,15 @@ impl<'p, 's, M: Matcher, W: WriteColor> Sink for SummarySink<'p, 's, M, W> { self.match_count += 1; } if let Some(ref mut stats) = self.stats { - stats.increment_histogram(mat.absolute_byte_offset() / 500); stats.add_matches(sink_match_count); stats.add_matched_lines(mat.lines().count() as u64); + + if let SummaryKind::Histogram(bin_size) = self.summary.config.kind + { + stats.increment_histogram( + mat.absolute_byte_offset() / bin_size, + ); + } } else if self.summary.config.kind.quit_early() { return Ok(false); } @@ -791,18 +799,32 @@ impl<'p, 's, M: Matcher, W: WriteColor> Sink for SummarySink<'p, 's, M, W> { self.write_line_term(searcher)?; } } - SummaryKind::Histogram => { + SummaryKind::Histogram(bin_size) => { let stats = self .stats .as_ref() .expect("Histogram should enable stats tracking"); - let histo_string = stats - .histogram() - .values() - .map(|v| v.to_string()) - .collect::>() - .join("\n"); - self.write(histo_string.as_bytes())?; + let total: u64 = stats.histogram().values().sum(); + if total > 0 { + let bin_iter = 0..=(stats.bytes_searched() / bin_size); + let terminal_str = bin_iter + .map(|i| { + stats + .histogram() + .get(&i) + .unwrap_or(&0) + .to_string() + .into_bytes() + }) + .collect::>>() + .join(searcher.line_terminator().as_bytes()); + self.write_path_field()?; + self.write_line_term(searcher)?; + self.write(&terminal_str)?; + self.write_line_term(searcher)?; + self.write(b"---")?; + self.write_line_term(searcher)?; + } } SummaryKind::PathWithMatch => { if self.match_count > 0 { From 29c99fd1f1148010c491932c4f236171c6aa29ae Mon Sep 17 00:00:00 2001 From: Cabbache Date: Wed, 23 Oct 2024 23:27:49 +0200 Subject: [PATCH 5/7] remove --- --- crates/printer/src/summary.rs | 2 -- 1 file changed, 2 deletions(-) diff --git a/crates/printer/src/summary.rs b/crates/printer/src/summary.rs index 81e16edb3..7caa66686 100644 --- a/crates/printer/src/summary.rs +++ b/crates/printer/src/summary.rs @@ -822,8 +822,6 @@ impl<'p, 's, M: Matcher, W: WriteColor> Sink for SummarySink<'p, 's, M, W> { self.write_line_term(searcher)?; self.write(&terminal_str)?; self.write_line_term(searcher)?; - self.write(b"---")?; - self.write_line_term(searcher)?; } } SummaryKind::PathWithMatch => { From 4b0e5a11b9c5a35fc4553c527051489c5f97ece0 Mon Sep 17 00:00:00 2001 From: Cabbache Date: Wed, 23 Oct 2024 23:35:15 +0200 Subject: [PATCH 6/7] update long definition --- crates/core/flags/defs.rs | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/crates/core/flags/defs.rs b/crates/core/flags/defs.rs index 09dc22f7f..d2f28ea8a 100644 --- a/crates/core/flags/defs.rs +++ b/crates/core/flags/defs.rs @@ -1347,9 +1347,10 @@ impl Flag for Histogram { r"Print a histogram of the matches" } fn doc_long(&self) -> &'static str { - r" ---histogram NUM means that the bins of the histograms are NUM characters wide. In the output the numbers are the counts in these bins. - " + r" +The offset of the match and the specified bin size +(NUM) of this argument are used to determine which bin gets +incremented for every match." } fn update(&self, v: FlagValue, args: &mut LowArgs) -> anyhow::Result<()> { From c0558a808c918eaa05e6e3bc2f2b5570dffc5587 Mon Sep 17 00:00:00 2001 From: Cabbache Date: Thu, 24 Oct 2024 13:56:55 +0200 Subject: [PATCH 7/7] optimise --- crates/printer/src/summary.rs | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/crates/printer/src/summary.rs b/crates/printer/src/summary.rs index 7caa66686..2e5cd8bbf 100644 --- a/crates/printer/src/summary.rs +++ b/crates/printer/src/summary.rs @@ -804,8 +804,7 @@ impl<'p, 's, M: Matcher, W: WriteColor> Sink for SummarySink<'p, 's, M, W> { .stats .as_ref() .expect("Histogram should enable stats tracking"); - let total: u64 = stats.histogram().values().sum(); - if total > 0 { + if self.match_count > 0 { let bin_iter = 0..=(stats.bytes_searched() / bin_size); let terminal_str = bin_iter .map(|i| { @@ -818,8 +817,10 @@ impl<'p, 's, M: Matcher, W: WriteColor> Sink for SummarySink<'p, 's, M, W> { }) .collect::>>() .join(searcher.line_terminator().as_bytes()); - self.write_path_field()?; - self.write_line_term(searcher)?; + if self.path.is_some() { + self.write_path_field()?; + self.write_line_term(searcher)?; + } self.write(&terminal_str)?; self.write_line_term(searcher)?; }