Get it working with stdin
- ID
461d4f6- date
2025-01-09 23:44:43+00:00- author
Alex Chan <alex@alexwlchan.net>- parent
72223b6- message
Get it working with stdin- changed files
2 files, 67 additions, 28 deletions
Changed files
src/main.rs (1101) → src/main.rs (1402)
diff --git a/src/main.rs b/src/main.rs
index b61ef4d..260dd60 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -1,5 +1,6 @@
// #![deny(warnings)]
+use std::io::BufRead;
use std::iter::Iterator;
mod sampling;
@@ -8,33 +9,51 @@ fn main() {
// Read the user's command line arguments (if any)
//
// 0 arguments = get a single random line
- // 1 argument N = get that number of lines
+ // 1 argument k = get that number of lines
// >1 arguments = error
//
- // let args: Vec<_> = std::env::args().collect();
- //
- // let n = match args.len() {
- // 1 => 0,
- // 2 => match args[1].parse::<i32>() {
- // Ok(parsed_n) => parsed_n,
- // Err(_) => {
- // eprintln!("Usage: randline [N]");
- // std::process::exit(1)
- // }
- // },
- // _ => {
- // eprintln!("Usage: randline [N]");
- // std::process::exit(1)
- // }
- // };
- //
- // // Read the first N lines from stdout
- // // let stdin = io::stdin();
- //
- // let a = [1, 2, 3, 4, 5, 6];
- // let iter = a.iter();
- //
- // println!("{:?}", sampling::reservoir_sample(iter, n));
- //
- // println!("n = {:?}", n);
+ let args: Vec<_> = std::env::args().collect();
+
+ let k = match args.len() {
+ 1 => 1,
+ 2 => match args[1].parse::<usize>() {
+ Ok(parsed_k) => parsed_k,
+ Err(_) => {
+ eprintln!("Usage: randline [k]");
+ std::process::exit(1)
+ }
+ },
+ _ => {
+ eprintln!("Usage: randline [k]");
+ std::process::exit(1)
+ }
+ };
+
+ let lines = std::io::stdin().lock().lines()
+ .map(|line| match line {
+ Ok(ln) => ln,
+ Err(e) => {
+ eprintln!("Unable to read from stdin: {:?}", e);
+ std::process::exit(1)
+ }
+ });
+
+ println!("k = {:?}", k);
+
+ let sample = sampling::reservoir_sample(lines, k);
+
+ for line in sample {
+ println!("{}", line);
+ }
+
+
+ // Read the first N lines from stdout
+ // let stdin = io::stdin();
+
+ // let a = [1, 2, 3, 4, 5, 6];
+ // let iter = a.iter();
+ //
+ // println!("{:?}", sampling::reservoir_sample(iter, n));
+ //
+ // println!("n = {:?}", n);
}
src/sampling.rs (5269) → src/sampling.rs (5970)
diff --git a/src/sampling.rs b/src/sampling.rs
index e74cfff..06a3d04 100644
--- a/src/sampling.rs
+++ b/src/sampling.rs
@@ -14,12 +14,18 @@ pub fn reservoir_sample<T: std::fmt::Debug>(
mut items: impl Iterator<Item = T>,
k: usize,
) -> Vec<T> {
+ // Taking a sample with k=0 doesn't make much sense in practice,
+ // but we include this to avoid problems downstream.
+ if k == 0 {
+ return vec![];
+ }
+
// Create an empty reservoir.
//
// This is a map (weight) -> (item).
let mut reservoir: HashMap<i32, T> = HashMap::with_capacity(k);
- // Fill the reservoir with the first n items. If there are less
+ // Fill the reservoir with the first k items. If there are less
// than n items, we can exit immediately.
for _ in 1..=k {
match items.next() {
@@ -29,6 +35,11 @@ pub fn reservoir_sample<T: std::fmt::Debug>(
}
// What's the largest weight seen so far?
+ //
+ // Note: we're okay to `unwrap()` here because we know that `reservoir`
+ // contains at least one item. Either `items` was non-empty, or if itwas
+ // was empty, then we'd already have returned when trying to fill the
+ // reservoir with the first k items.
let mut max_weight: i32 = *reservoir.keys().max().unwrap();
// Now go through the remaining items.
@@ -98,6 +109,15 @@ mod reservoir_sample_tests {
assert!(equivalent_items(sample, vec!["a", "b", "c"]));
}
+ // If k=0, then it returns an empty sample.
+ #[test]
+ fn it_returns_an_empty_sample_if_k_zero() {
+ let items = vec!["a", "b", "c"];
+ let sample = reservoir_sample(items.into_iter(), 0);
+
+ assert_eq!(sample.len(), 0);
+ }
+
// It chooses items with a uniform distribution -- every item has
// an equal chance of being picked.
//