//! A "bag" data structure (based on //! [`BTreeMap`][std::collections::BTreeMap]), used in //! [dataspace][super::dataspace] indexing. use std::collections::BTreeMap; use std::collections::btree_map::{Iter, Keys, Entry}; use std::iter::FromIterator; /// Element counts in [`BTreeBag`]s are 32-bit signed integers. pub type Count = i32; /// Represents the "net change" to the count of a given `key` after a /// change-in-count has been applied via [`BTreeBag::change`] and /// friends. #[derive(Debug, PartialEq, Eq)] pub enum Net { /// The key previously had non-zero count, now has zero count. PresentToAbsent, /// The key's count stayed the same, at zero. AbsentToAbsent, /// The key previously had zero count, now has non-zero count. AbsentToPresent, /// The key's count was previously, and is now also, non-zero. PresentToPresent, } /// A bag datastructure (mapping from values `V` to [`Count`]s /// internally). /// /// A bag may have a *negative* count against a particular element. /// Such a bag is called a "delta", and can be used to represent a /// *change* against a bag containing only positive counts. /// /// Every value of type `V` notionally has a zero count associated /// with it at the start of the bag's lifetime; so a bag is a *total* /// mapping from elements of `V` to counts. /// #[derive(Debug, PartialEq, Eq, PartialOrd, Ord)] pub struct BTreeBag { counts: BTreeMap, total: isize, } impl std::default::Default for BTreeBag { fn default() -> Self { Self::new() } } impl BTreeBag { /// Construct a new, empty bag. pub fn new() -> BTreeBag { BTreeBag { counts: BTreeMap::new(), total: 0 } } /// Apply a change-in-count (`delta`) against `key`, allowing the /// resulting count to be negative if needed. pub fn change(&mut self, key: V, delta: Count) -> Net { self._change(key, delta, false) } /// Apply a change-in-count (`delta`) against `key`, preventing /// the resulting count from dropping below zero (by clamping it /// at zero in that case). pub fn change_clamped(&mut self, key: V, delta: Count) -> Net { self._change(key, delta, true) } /// Apply a change-in-count (`delta`) against `key`, clamping the /// resulting count at zero iff `clamp` is `true`. pub fn _change(&mut self, key: V, delta: Count, clamp: bool) -> Net { let old_count = self[&key]; let mut new_count = old_count + delta; if clamp { new_count = new_count.max(0) } self.total = self.total + (new_count - old_count) as isize; if new_count == 0 { self.counts.remove(&key); if old_count == 0 { Net::AbsentToAbsent } else { Net::PresentToAbsent } } else { self.counts.insert(key, new_count); if old_count == 0 { Net::AbsentToPresent } else { Net::PresentToPresent } } } /// Removes all elements from the bag, leaving it empty with a zero total count. pub fn clear(&mut self) { self.counts.clear(); self.total = 0; } /// `true` iff `key` has a non-zero count associated with it. /// /// Note that `true` will be returned even when the count is *negative*. /// pub fn contains_key(&self, key: &V) -> bool { self.counts.contains_key(key) } /// `true` iff no element of the bag has a non-zero count. pub fn is_empty(&self) -> bool { self.counts.is_empty() } /// Yields the number of elements having a non-zero count. pub fn len(&self) -> usize { self.counts.len() } /// Answers the sum of all counts in the bag. /// /// For bags with no negative counts, this is the same as the /// number of "distinct copies" of elements in the bag. pub fn total(&self) -> isize { self.total } /// Iterates over elements in the bag with non-zero counts. pub fn keys(&self) -> Keys { self.counts.keys() } /// Retrieves an [`Entry`] for `key`. /// /// Note that the `Entry` will be "absent" when `key` has a zero /// associated count. pub fn entry(&mut self, key: V) -> Entry { self.counts.entry(key) } } impl<'a, V: std::cmp::Ord> IntoIterator for &'a BTreeBag { type Item = (&'a V, &'a Count); type IntoIter = Iter<'a, V, Count>; fn into_iter(self) -> Self::IntoIter { self.counts.iter() } } impl FromIterator for BTreeBag { fn from_iter>(iter: I) -> Self { let mut bag = Self::new(); for k in iter { bag.change(k, 1); } bag } } impl std::ops::Index<&V> for BTreeBag { type Output = Count; fn index(&self, i: &V) -> &Count { self.counts.get(i).unwrap_or(&0) } }