sled/
histogram.rs

1//! Copied from my historian crate. - Tyler Neely
2//!
3//! A zero-config simple histogram collector
4//!
5//! for use in instrumented optimization.
6//! Uses logarithmic bucketing rather than sampling,
7//! and has bounded (generally <0.5%) error on percentiles.
8//! Performs no allocations after initial creation.
9//! Uses Relaxed atomics during collection.
10//!
11//! When you create it, it allocates 65k `AtomicUsize`'s
12//! that it uses for incrementing. Generating reports
13//! after running workloads on dozens of `Histogram`'s
14//! does not result in a perceptible delay, but it
15//! might not be acceptable for use in low-latency
16//! reporting paths.
17//!
18//! The trade-offs taken in this are to minimize latency
19//! during collection, while initial allocation and
20//! postprocessing delays are acceptable.
21//!
22//! Future work to further reduce collection latency
23//! may include using thread-local caches that perform
24//! no atomic operations until they are dropped, when
25//! they may atomically aggregate their measurements
26//! into the shared collector that will be used for
27//! reporting.
28#![allow(unused)]
29#![allow(unused_results)]
30#![allow(clippy::print_stdout)]
31#![allow(clippy::float_arithmetic)]
32
33use std::convert::TryFrom;
34use std::fmt::{self, Debug};
35use std::sync::atomic::{AtomicUsize, Ordering};
36
37const PRECISION: f64 = 100.;
38const BUCKETS: usize = 1 << 16;
39
40/// A histogram collector that uses zero-configuration logarithmic buckets.
41pub struct Histogram {
42    vals: Vec<AtomicUsize>,
43    sum: AtomicUsize,
44    count: AtomicUsize,
45}
46
47impl Default for Histogram {
48    #[allow(unsafe_code)]
49    fn default() -> Histogram {
50        #[cfg(not(feature = "miri_optimizations"))]
51        {
52            let mut vals = Vec::with_capacity(BUCKETS);
53            vals.resize_with(BUCKETS, Default::default);
54
55            Histogram {
56                vals,
57                sum: AtomicUsize::new(0),
58                count: AtomicUsize::new(0),
59            }
60        }
61
62        #[cfg(feature = "miri_optimizations")]
63        {
64            // Avoid calling Vec::resize_with with a large length because its
65            // internals cause stacked borrows tracking information to add an
66            // item for each element of the vector.
67            let mut vals = std::mem::ManuallyDrop::new(vec![0_usize; BUCKETS]);
68            let ptr: *mut usize = vals.as_mut_ptr();
69            let len = vals.len();
70            let capacity = vals.capacity();
71
72            let vals: Vec<AtomicUsize> = unsafe {
73                Vec::from_raw_parts(ptr as *mut AtomicUsize, len, capacity)
74            };
75
76            Histogram {
77                vals,
78                sum: AtomicUsize::new(0),
79                count: AtomicUsize::new(0),
80            }
81        }
82    }
83}
84
85#[allow(unsafe_code)]
86unsafe impl Send for Histogram {}
87
88impl Debug for Histogram {
89    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> Result<(), fmt::Error> {
90        const PS: [f64; 10] =
91            [0., 50., 75., 90., 95., 97.5, 99., 99.9, 99.99, 100.];
92        f.write_str("Histogramgram[")?;
93
94        for p in &PS {
95            let res = self.percentile(*p).round();
96            let line = format!("({} -> {}) ", p, res);
97            f.write_str(&*line)?;
98        }
99
100        f.write_str("]")
101    }
102}
103
104impl Histogram {
105    /// Record a value.
106    #[inline]
107    pub fn measure(&self, raw_value: u64) {
108        #[cfg(not(feature = "no_metrics"))]
109        {
110            let value_float: f64 = raw_value as f64;
111            self.sum.fetch_add(value_float.round() as usize, Ordering::Relaxed);
112
113            self.count.fetch_add(1, Ordering::Relaxed);
114
115            // compress the value to one of 2**16 values
116            // using logarithmic bucketing
117            let compressed: u16 = compress(value_float);
118
119            // increment the counter for this compressed value
120            self.vals[compressed as usize].fetch_add(1, Ordering::Relaxed);
121        }
122    }
123
124    /// Retrieve a percentile [0-100]. Returns NAN if no metrics have been
125    /// collected yet.
126    pub fn percentile(&self, p: f64) -> f64 {
127        #[cfg(not(feature = "no_metrics"))]
128        {
129            assert!(p <= 100., "percentiles must not exceed 100.0");
130
131            let count = self.count.load(Ordering::Acquire);
132
133            if count == 0 {
134                return std::f64::NAN;
135            }
136
137            let mut target = count as f64 * (p / 100.);
138            if target == 0. {
139                target = 1.;
140            }
141
142            let mut sum = 0.;
143
144            for (idx, val) in self.vals.iter().enumerate() {
145                let count = val.load(Ordering::Acquire);
146                sum += count as f64;
147
148                if sum >= target {
149                    return decompress(idx as u16);
150                }
151            }
152        }
153
154        std::f64::NAN
155    }
156
157    /// Dump out some common percentiles.
158    pub fn print_percentiles(&self) {
159        println!("{:?}", self);
160    }
161
162    /// Return the sum of all observations in this histogram.
163    pub fn sum(&self) -> usize {
164        self.sum.load(Ordering::Acquire)
165    }
166
167    /// Return the count of observations in this histogram.
168    pub fn count(&self) -> usize {
169        self.count.load(Ordering::Acquire)
170    }
171}
172
173// compress takes a value and lossily shrinks it to an u16 to facilitate
174// bucketing of histogram values, staying roughly within 1% of the true
175// value. This fails for large values of 1e142 and above, and is
176// inaccurate for values closer to 0 than +/- 0.51 or +/- math.Inf.
177#[allow(clippy::cast_sign_loss)]
178#[allow(clippy::cast_possible_truncation)]
179#[inline]
180fn compress<T: Into<f64>>(input_value: T) -> u16 {
181    let value: f64 = input_value.into();
182    let abs = value.abs();
183    let boosted = 1. + abs;
184    let ln = boosted.ln();
185    let compressed = PRECISION.mul_add(ln, 0.5);
186    assert!(compressed <= f64::from(u16::max_value()));
187
188    compressed as u16
189}
190
191// decompress takes a lossily shrunken u16 and returns an f64 within 1% of
192// the original passed to compress.
193#[inline]
194fn decompress(compressed: u16) -> f64 {
195    let unboosted = f64::from(compressed) / PRECISION;
196    (unboosted.exp() - 1.)
197}
198
199#[cfg(not(feature = "no_metrics"))]
200#[test]
201fn it_works() {
202    let c = Histogram::default();
203    c.measure(2);
204    c.measure(2);
205    c.measure(3);
206    c.measure(3);
207    c.measure(4);
208    assert_eq!(c.percentile(0.).round() as usize, 2);
209    assert_eq!(c.percentile(40.).round() as usize, 2);
210    assert_eq!(c.percentile(40.1).round() as usize, 3);
211    assert_eq!(c.percentile(80.).round() as usize, 3);
212    assert_eq!(c.percentile(80.1).round() as usize, 4);
213    assert_eq!(c.percentile(100.).round() as usize, 4);
214    c.print_percentiles();
215}
216
217#[cfg(not(feature = "no_metrics"))]
218#[test]
219fn high_percentiles() {
220    let c = Histogram::default();
221    for _ in 0..9000 {
222        c.measure(10);
223    }
224    for _ in 0..900 {
225        c.measure(25);
226    }
227    for _ in 0..90 {
228        c.measure(33);
229    }
230    for _ in 0..9 {
231        c.measure(47);
232    }
233    c.measure(500);
234    assert_eq!(c.percentile(0.).round() as usize, 10);
235    assert_eq!(c.percentile(99.).round() as usize, 25);
236    assert_eq!(c.percentile(99.89).round() as usize, 33);
237    assert_eq!(c.percentile(99.91).round() as usize, 47);
238    assert_eq!(c.percentile(99.99).round() as usize, 47);
239    assert_eq!(c.percentile(100.).round() as usize, 502);
240}
241
242#[cfg(not(feature = "no_metrics"))]
243#[test]
244fn multithreaded() {
245    use std::sync::Arc;
246    use std::thread;
247
248    let h = Arc::new(Histogram::default());
249    let mut threads = vec![];
250
251    for _ in 0..10 {
252        let h = h.clone();
253        threads.push(thread::spawn(move || {
254            h.measure(20);
255        }));
256    }
257
258    for t in threads.into_iter() {
259        t.join().unwrap();
260    }
261
262    assert_eq!(h.percentile(50.).round() as usize, 20);
263}