Skip to content

Commit

Permalink
clean up boolwriter a bit and make it slightly faster
Browse files Browse the repository at this point in the history
  • Loading branch information
mcroomp committed Dec 17, 2024
1 parent caeea98 commit da9adbd
Show file tree
Hide file tree
Showing 2 changed files with 40 additions and 11 deletions.
8 changes: 8 additions & 0 deletions src/helpers.rs
Original file line number Diff line number Diff line change
Expand Up @@ -161,6 +161,14 @@ pub fn calc_sign_index(val: i16) -> usize {
}
}

/// This checks to see if a vector can fit additional elements without growing,
/// but does it in such a way that the optimizer understands that a subsequent
/// push or extend will not need to grow the vector.
#[inline(always)]
pub fn needs_to_grow<T>(v: &Vec<T>, additional: usize) -> bool {
additional > v.capacity().wrapping_sub(v.len())
}

#[cfg(test)]
pub fn get_rand_from_seed(seed: [u8; 32]) -> rand_chacha::ChaCha12Rng {
use rand_chacha::rand_core::SeedableRng;
Expand Down
43 changes: 32 additions & 11 deletions src/structs/vpx_bool_writer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS “AS IS”

use std::io::{Result, Write};

use crate::helpers::needs_to_grow;
use crate::metrics::{Metrics, ModelComponent};
use crate::structs::branch::Branch;
use crate::structs::simple_hash::SimpleHash;
Expand Down Expand Up @@ -112,31 +113,44 @@ impl<W: Write> VPXBoolWriter<W> {

// check whether we cannot put next bit into stream
if tmp_value & (u64::MAX << 57) != 0 {
let mut stream_bits = 64 - tmp_value.leading_zeros() - 2;
// 62 >= stream_bits >= 56
// calculate the number of unaligned bits left after we remove 56 bits
let unaligned_bits = tmp_value.leading_zeros() + 2;

if tmp_value & (1 << stream_bits) != 0 {
// shift align so that the top 6 bytes are ones we want to write, if there
// was an overflow it get rotated down to the bottom bit
let v_aligned = tmp_value.rotate_left(unaligned_bits);

if (v_aligned & 1) != 0 {
self.carry();
}

for _stream_bytes in 0..6 {
stream_bits -= 8;
self.buffer.push((tmp_value >> stream_bits) as u8);
// Append six bytes at a time to the buffer. Faster to add all
// 8 and then shrink the buffer than add 6 that creates a temporary buffer.
if needs_to_grow(&self.buffer, 8) {
// avoid inlining slow path to allocate more memory that happens almost never
put_6bytes(&mut self.buffer, v_aligned);
} else {
let b = v_aligned.to_be_bytes();
self.buffer.extend_from_slice(&b);
self.buffer.truncate(self.buffer.len() - 2);
}

tmp_value &= (1 << stream_bits) - 1;
tmp_value |= 1 << (stream_bits + 1);
// 14 >= stream_bits >= 8
// mask the remaining bits and put them back to where they were
// adding the marker bit to the top
tmp_value = ((v_aligned & 0xffff) | 0x20000/*marker bit*/) >> unaligned_bits;
}
// 55 >= stream_bits >= 8

(tmp_value, tmp_range)
}

/// Safe as: at the stream beginning initially put `false` ensure that carry cannot get out
/// of the first stream byte - then `carry` cannot be invoked on empty `buffer`,
/// and after the stream beginning `flush_non_final_data` keeps carry-terminating
/// byte sequence (one non-255-byte before any number of 255-bytes) inside the `buffer`.
#[inline(always)]
///
/// Cold to keep this out of the inner loop since carries are pretty rare
#[cold]
#[inline(never)]
fn carry(&mut self) {
let mut x = self.buffer.len() - 1;

Expand Down Expand Up @@ -310,6 +324,13 @@ impl<W: Write> VPXBoolWriter<W> {
}
}

#[cold]
#[inline(never)]
fn put_6bytes(buffer: &mut Vec<u8>, v: u64) {
let b = v.to_be_bytes();
buffer.extend_from_slice(b[0..6].as_ref());
}

#[cfg(test)]
use crate::structs::vpx_bool_reader::VPXBoolReader;

Expand Down

0 comments on commit da9adbd

Please sign in to comment.