Skip to main content

Get it working with stdin

ID
461d4f6
date
2025-01-09 23:44:43+00:00
author
Alex Chan <alex@alexwlchan.net>
parent
72223b6
message
Get it working with stdin
changed files
2 files, 67 additions, 28 deletions

Changed files

src/main.rs (1101) → src/main.rs (1402)

diff --git a/src/main.rs b/src/main.rs
index b61ef4d..260dd60 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -1,5 +1,6 @@
 // #![deny(warnings)]
 
+use std::io::BufRead;
 use std::iter::Iterator;
 
 mod sampling;
@@ -8,33 +9,51 @@ fn main() {
     // Read the user's command line arguments (if any)
     //
     //   0 arguments  = get a single random line
-    //   1 argument N = get that number of lines
+    //   1 argument k = get that number of lines
     //  >1 arguments  = error
     //
-    // let args: Vec<_> = std::env::args().collect();
-    //
-    //     let n = match args.len() {
-    //         1 => 0,
-    //         2 => match args[1].parse::<i32>() {
-    //             Ok(parsed_n) => parsed_n,
-    //             Err(_) => {
-    //                 eprintln!("Usage: randline [N]");
-    //                 std::process::exit(1)
-    //             }
-    //         },
-    //         _ => {
-    //             eprintln!("Usage: randline [N]");
-    //             std::process::exit(1)
-    //         }
-    //     };
-    //
-    //     // Read the first N lines from stdout
-    //     // let stdin = io::stdin();
-    //
-    //     let a = [1, 2, 3, 4, 5, 6];
-    //     let iter = a.iter();
-    //
-    //     println!("{:?}", sampling::reservoir_sample(iter, n));
-    //
-    //     println!("n = {:?}", n);
+    let args: Vec<_> = std::env::args().collect();
+
+    let k = match args.len() {
+        1 => 1,
+        2 => match args[1].parse::<usize>() {
+            Ok(parsed_k) => parsed_k,
+            Err(_) => {
+                eprintln!("Usage: randline [k]");
+                std::process::exit(1)
+            }
+        },
+        _ => {
+            eprintln!("Usage: randline [k]");
+            std::process::exit(1)
+        }
+    };
+
+    let lines = std::io::stdin().lock().lines()
+        .map(|line| match line {
+          Ok(ln) => ln,
+          Err(e) => {
+            eprintln!("Unable to read from stdin: {:?}", e);
+            std::process::exit(1)
+          }
+        });
+
+    println!("k = {:?}", k);
+
+    let sample = sampling::reservoir_sample(lines, k);
+
+    for line in sample {
+      println!("{}", line);
+    }
+
+
+        // Read the first N lines from stdout
+        // let stdin = io::stdin();
+
+        // let a = [1, 2, 3, 4, 5, 6];
+        // let iter = a.iter();
+        //
+        // println!("{:?}", sampling::reservoir_sample(iter, n));
+        //
+        // println!("n = {:?}", n);
 }

src/sampling.rs (5269) → src/sampling.rs (5970)

diff --git a/src/sampling.rs b/src/sampling.rs
index e74cfff..06a3d04 100644
--- a/src/sampling.rs
+++ b/src/sampling.rs
@@ -14,12 +14,18 @@ pub fn reservoir_sample<T: std::fmt::Debug>(
     mut items: impl Iterator<Item = T>,
     k: usize,
 ) -> Vec<T> {
+    // Taking a sample with k=0 doesn't make much sense in practice,
+    // but we include this to avoid problems downstream.
+    if k == 0 {
+      return vec![];
+    }
+
     // Create an empty reservoir.
     //
     // This is a map (weight) -> (item).
     let mut reservoir: HashMap<i32, T> = HashMap::with_capacity(k);
 
-    // Fill the reservoir with the first n items.  If there are less
+    // Fill the reservoir with the first k items.  If there are less
     // than n items, we can exit immediately.
     for _ in 1..=k {
         match items.next() {
@@ -29,6 +35,11 @@ pub fn reservoir_sample<T: std::fmt::Debug>(
     }
 
     // What's the largest weight seen so far?
+    //
+    // Note: we're okay to `unwrap()` here because we know that `reservoir`
+    // contains at least one item.  Either `items` was non-empty, or if itwas
+    // was empty, then we'd already have returned when trying to fill the
+    // reservoir with the first k items.
     let mut max_weight: i32 = *reservoir.keys().max().unwrap();
 
     // Now go through the remaining items.
@@ -98,6 +109,15 @@ mod reservoir_sample_tests {
         assert!(equivalent_items(sample, vec!["a", "b", "c"]));
     }
 
+    // If k=0, then it returns an empty sample.
+    #[test]
+    fn it_returns_an_empty_sample_if_k_zero() {
+        let items = vec!["a", "b", "c"];
+        let sample = reservoir_sample(items.into_iter(), 0);
+
+        assert_eq!(sample.len(), 0);
+    }
+
     // It chooses items with a uniform distribution -- every item has
     // an equal chance of being picked.
     //