microsoft · mcroomp · May 15, 2024 · May 8, 2024 · May 8, 2024 · May 8, 2024
diff --git a/src/structs/lepton_decoder.rs b/src/structs/lepton_decoder.rs
@@ -355,7 +355,7 @@ pub fn read_coefficient_block<const ALL_PRESENT: bool, R: Read>(
 
         // now loop through the coefficients in zigzag, terminating once we hit the number of non-zeros
         for (zig49, &coord) in UNZIGZAG_49.iter().enumerate() {
-            let best_prior_bit_length = u16_bit_length(best_priors[coord as usize] as u16);
+            let best_prior_bit_length = u16_bit_length(best_priors[coord as usize]);
 
             let coef = model_per_color
                 .read_coef(
@@ -508,17 +508,18 @@ fn decode_one_edge<R: Read, const ALL_PRESENT: bool, const HORIZONTAL: bool>(
             break;
         }
 
-        let ptcc8 = pt.calc_coefficient_context8_lak::<ALL_PRESENT, HORIZONTAL>(
+        let best_prior = pt.calc_coefficient_context8_lak::<ALL_PRESENT, HORIZONTAL>(
+            qt, coord, here_mut, above, left,
+        );
+
+        let coef = model_per_color.read_edge_coefficient(
+            bool_reader,
             qt,
             coord,
-            here_mut,
-            above,
-            left,
+            zig15offset,
             num_non_zeros_edge,
-        );
-
-        let coef =
-            model_per_color.read_edge_coefficient(bool_reader, qt, coord, zig15offset, &ptcc8)?;
+            best_prior,
+        )?;
 
         if coef != 0 {
             num_non_zeros_edge -= 1;

diff --git a/src/structs/lepton_encoder.rs b/src/structs/lepton_encoder.rs
@@ -370,7 +370,7 @@ pub fn write_coefficient_block<const ALL_PRESENT: bool, W: Write>(
 
         // now loop through the coefficients in zigzag, terminating once we hit the number of non-zeros
         for (zig49, &coord) in UNZIGZAG_49.iter().enumerate() {
-            let best_prior_bit_length = u16_bit_length(best_priors[coord as usize] as u16);
+            let best_prior_bit_length = u16_bit_length(best_priors[coord as usize]);
 
             let coef = here.get_coefficient(coord as usize);
 
@@ -568,19 +568,22 @@ fn encode_one_edge<W: Write, const ALL_PRESENT: bool, const HORIZONTAL: bool>(
             break;
         }
 
-        let ptcc8 = pt.calc_coefficient_context8_lak::<ALL_PRESENT, HORIZONTAL>(
-            qt,
-            coord,
-            &block,
-            &above,
-            &left,
-            num_non_zeros_edge,
+        let best_prior = pt.calc_coefficient_context8_lak::<ALL_PRESENT, HORIZONTAL>(
+            qt, coord, &block, &above, &left,
         );
 
         let coef = block.get_coefficient(coord);
 
         model_per_color
-            .write_edge_coefficient(bool_writer, qt, coef, coord, zig15offset, &ptcc8)
+            .write_edge_coefficient(
+                bool_writer,
+                qt,
+                coef,
+                coord,
+                zig15offset,
+                num_non_zeros_edge,
+                best_prior,
+            )
             .context(here!())?;
 
         if coef != 0 {

diff --git a/src/structs/mod.rs b/src/structs/mod.rs
@@ -26,7 +26,6 @@ mod model;
 mod multiplexer;
 mod neighbor_summary;
 mod probability_tables;
-mod probability_tables_coefficient_context;
 mod probability_tables_set;
 mod quantization_tables;
 mod row_spec;

diff --git a/src/structs/model.rs b/src/structs/model.rs
@@ -9,14 +9,13 @@ use std::cmp;
 use std::io::{Read, Write};
 
 use crate::consts::*;
-use crate::helpers::{calc_sign_index, err_exit_code, here, u16_bit_length};
+use crate::helpers::{calc_sign_index, err_exit_code, here, u16_bit_length, u32_bit_length};
 use crate::lepton_error::ExitCode;
 use crate::metrics::{ModelComponent, ModelSubComponent};
 use crate::structs::branch::Branch;
 use default_boxed::DefaultBoxed;
 
 use super::probability_tables::ProbabilityTables;
-use super::probability_tables_coefficient_context::ProbabilityTablesCoefficientContext;
 use super::quantization_tables::QuantizationTables;
 use super::vpx_bool_reader::VPXBoolReader;
 use super::vpx_bool_writer::VPXBoolWriter;
@@ -334,10 +333,28 @@ impl ModelPerColor {
         qt: &QuantizationTables,
         coord: usize,
         zig15offset: usize,
-        ptcc8: &ProbabilityTablesCoefficientContext,
+        num_non_zeros_edge: u8,
+        best_prior: i32,
     ) -> Result<i16> {
-        let length_branches = &mut self.counts_x[ptcc8.num_non_zeros_bin as usize][zig15offset]
-            .exponent_counts[ptcc8.best_prior_bit_len as usize];
+        let num_non_zeros_edge_bin = usize::from(num_non_zeros_edge) - 1;
+
+        // bounds checks will test these anyway, so check here for better
+        // error messages and also gives the optimizer more freedom to move code around
+        assert!(
+            num_non_zeros_edge_bin < NUM_NON_ZERO_EDGE_BINS,
+            "num_non_zeros_edge_bin {0} too high",
+            num_non_zeros_edge_bin
+        );
+
+        assert!(zig15offset < 14, "zig15offset {0} too high", zig15offset);
+
+        // we cap the bit length since the prior prediction can be wonky
+        let best_prior_abs = best_prior.unsigned_abs();
+        let best_prior_bit_len =
+            cmp::min(MAX_EXPONENT - 1, u32_bit_length(best_prior_abs) as usize);
+
+        let length_branches = &mut self.counts_x[num_non_zeros_edge_bin][zig15offset]
+            .exponent_counts[best_prior_bit_len];
 
         let length = bool_reader
             .get_unary_encoded(
@@ -348,7 +365,7 @@ impl ModelPerColor {
 
         let mut coef = 0;
         if length != 0 {
-            let sign = self.get_sign_counts_mut(ptcc8);
+            let sign = &mut self.sign_counts[calc_sign_index(best_prior)][best_prior_bit_len];
 
             let neg = !bool_reader
                 .get(sign, ModelComponent::Edge(ModelSubComponent::Sign))
@@ -361,8 +378,11 @@ impl ModelPerColor {
                 let mut i: i32 = length - 2;
 
                 if i >= min_threshold {
-                    let thresh_prob =
-                        self.get_residual_threshold_counts_mut(ptcc8, min_threshold, length);
+                    let thresh_prob = self.get_residual_threshold_counts_mut(
+                        best_prior_abs,
+                        min_threshold,
+                        length,
+                    );
 
                     let mut decoded_so_far = 1;
                     while i >= min_threshold {
@@ -383,14 +403,7 @@ impl ModelPerColor {
                 }
 
                 if i >= 0 {
-                    debug_assert!(
-                        (ptcc8.num_non_zeros_bin as usize) < self.counts_x.len(),
-                        "d1 {0} too high",
-                        ptcc8.num_non_zeros_bin
-                    );
-
-                    let res_prob = &mut self.counts_x[ptcc8.num_non_zeros_bin as usize]
-                        [zig15offset]
+                    let res_prob = &mut self.counts_x[num_non_zeros_edge_bin][zig15offset]
                         .residual_noise_counts;
 
                     coef <<= i + 1;
@@ -416,14 +429,32 @@ impl ModelPerColor {
         coef: i16,
         coord: usize,
         zig15offset: usize,
-        ptcc8: &ProbabilityTablesCoefficientContext,
+        num_non_zeros_edge: u8,
+        best_prior: i32,
     ) -> Result<()> {
-        let exp_array = &mut self.counts_x[ptcc8.num_non_zeros_bin as usize][zig15offset]
-            .exponent_counts[ptcc8.best_prior_bit_len as usize];
+        let num_non_zeros_edge_bin = usize::from(num_non_zeros_edge) - 1;
+
+        // bounds checks will test these anyway, so check here for better
+        // error messages and also gives the optimizer more freedom to move code around
+        assert!(
+            num_non_zeros_edge_bin < NUM_NON_ZERO_EDGE_BINS,
+            "num_non_zeros_edge_bin {0} too high",
+            num_non_zeros_edge_bin
+        );
+
+        assert!(zig15offset < 14, "zig15offset {0} too high", zig15offset);
+
+        // we cap the bit length since the prior prediction can be wonky
+        let best_prior_abs = best_prior.unsigned_abs();
+        let best_prior_bit_len =
+            cmp::min(MAX_EXPONENT - 1, u32_bit_length(best_prior_abs) as usize);
 
         let abs_coef = coef.unsigned_abs();
         let length = u16_bit_length(abs_coef) as usize;
 
+        let exp_array = &mut self.counts_x[num_non_zeros_edge_bin][zig15offset].exponent_counts
+            [best_prior_bit_len];
+
         if length > MAX_EXPONENT {
             return err_exit_code(ExitCode::CoefficientOutOfRange, "CoefficientOutOfRange");
         }
@@ -435,7 +466,7 @@ impl ModelPerColor {
         )?;
 
         if coef != 0 {
-            let sign = self.get_sign_counts_mut(ptcc8);
+            let sign = &mut self.sign_counts[calc_sign_index(best_prior)][best_prior_bit_len];
 
             bool_writer.put(
                 coef >= 0,
@@ -448,8 +479,11 @@ impl ModelPerColor {
                 let mut i: i32 = length as i32 - 2;
 
                 if i >= min_threshold {
-                    let thresh_prob =
-                        self.get_residual_threshold_counts_mut(ptcc8, min_threshold, length as i32);
+                    let thresh_prob = self.get_residual_threshold_counts_mut(
+                        best_prior_abs,
+                        min_threshold,
+                        length as i32,
+                    );
 
                     let mut encoded_so_far = 1;
                     while i >= min_threshold {
@@ -474,14 +508,7 @@ impl ModelPerColor {
                 }
 
                 if i >= 0 {
-                    debug_assert!(
-                        (ptcc8.num_non_zeros_bin as usize) < self.counts_x.len(),
-                        "d1 {0} too high",
-                        ptcc8.num_non_zeros_bin
-                    );
-
-                    let res_prob = &mut self.counts_x[ptcc8.num_non_zeros_bin as usize]
-                        [zig15offset]
+                    let res_prob = &mut self.counts_x[num_non_zeros_edge_bin][zig15offset]
                         .residual_noise_counts;
 
                     bool_writer
@@ -501,12 +528,12 @@ impl ModelPerColor {
 
     fn get_residual_threshold_counts_mut(
         &mut self,
-        ptcc8: &ProbabilityTablesCoefficientContext,
+        best_prior_abs: u32,
         min_threshold: i32,
         length: i32,
     ) -> &mut [Branch; RESIDUAL_THRESHOLD_COUNTS_D3] {
         return &mut self.residual_threshold_counts[cmp::min(
-            (ptcc8.best_prior.abs() >> min_threshold) as usize,
+            (best_prior_abs >> min_threshold) as usize,
             self.residual_threshold_counts.len() - 1,
         )][cmp::min(
             (length - min_threshold) as usize,
@@ -527,10 +554,6 @@ impl ModelPerColor {
                 [(num_nonzeros as usize + 3) / 7];
         }
     }
-
-    fn get_sign_counts_mut(&mut self, ptcc8: &ProbabilityTablesCoefficientContext) -> &mut Branch {
-        &mut self.sign_counts[calc_sign_index(ptcc8.best_prior)][ptcc8.best_prior_bit_len as usize]
-    }
 }
 
 impl Model {

diff --git a/src/structs/probability_tables.rs b/src/structs/probability_tables.rs
@@ -4,18 +4,14 @@
  *  This software incorporates material from third parties. See NOTICE.txt for details.
  *--------------------------------------------------------------------------------------------*/
 
-use std::cmp;
-
 use crate::consts::*;
 use crate::enabled_features;
-use crate::helpers::*;
 use crate::structs::idct::*;
 use crate::structs::model::*;
 use crate::structs::quantization_tables::*;
 
 use super::block_based_image::AlignedBlock;
 use super::block_context::NeighborData;
-use super::probability_tables_coefficient_context::ProbabilityTablesCoefficientContext;
 
 use wide::i16x8;
 use wide::i32x8;
@@ -121,34 +117,39 @@ impl ProbabilityTables {
         left: &AlignedBlock,
         above: &AlignedBlock,
         above_left: &AlignedBlock,
-    ) -> [i16; 64] {
+    ) -> [u16; 64] {
         let mut best_prior = [0; 64];
 
         if ALL_PRESENT {
             // compiler does a pretty amazing job with SSE/AVX2 here
             for i in 8..64 {
                 // approximate average of 3 without a divide with double the weight for left/top vs diagonal
-                best_prior[i] = (((left.get_coefficient(i).abs() as u32
-                    + above.get_coefficient(i).abs() as u32)
+                //
+                // No need to go to 32 bits since max exponent is 11, ie 2047, so
+                // (2047 + 2047) * 13 + 2047 * 6 = 65504 which still fits in 16 bits.
+                // In addition, if we ever returned anything higher that 2047, it would
+                // assert in the array lookup in the model.
+                best_prior[i] = ((left.get_coefficient(i).unsigned_abs()
+                    + above.get_coefficient(i).unsigned_abs())
                     * 13
-                    + 6 * above_left.get_coefficient(i).abs() as u32)
-                    >> 5) as i16;
+                    + 6 * above_left.get_coefficient(i).unsigned_abs())
+                    >> 5;
             }
         } else {
             // handle edge case :) where we are on the top or left edge
 
             if self.left_present {
                 for i in 8..64 {
-                    best_prior[i] = left.get_coefficient(i).abs();
+                    best_prior[i] = left.get_coefficient(i).unsigned_abs();
                 }
             } else if self.above_present {
                 for i in 8..64 {
-                    best_prior[i] = above.get_coefficient(i).abs();
+                    best_prior[i] = above.get_coefficient(i).unsigned_abs();
                 }
             }
         }
 
-        return best_prior;
+        best_prior
     }
 
     #[inline(always)]
@@ -159,8 +160,7 @@ impl ProbabilityTables {
         here: &AlignedBlock,
         above: &AlignedBlock,
         left: &AlignedBlock,
-        num_non_zeros_x: u8,
-    ) -> ProbabilityTablesCoefficientContext {
+    ) -> i32 {
         let mut compute_lak_coeffs_x: [i32; 8] = [0; 8];
         let mut compute_lak_coeffs_a: [i32; 8] = [0; 8];
 
@@ -212,11 +212,7 @@ impl ProbabilityTables {
 
             coef_idct = &qt.get_icos_idct_edge8192_dequantized_y()[coefficient..coefficient + 8];
         } else {
-            return ProbabilityTablesCoefficientContext {
-                best_prior: 0,
-                num_non_zeros_bin: num_non_zeros_x - 1,
-                best_prior_bit_len: 0,
-            };
+            return 0;
         }
 
         let mut best_prior: i32 = 0;
@@ -229,13 +225,7 @@ impl ProbabilityTables {
             // rounding towards zero before adding coeffs_a[0] helps ratio slightly, but this is cheaper
         }
 
-        best_prior /= coef_idct[0];
-
-        return ProbabilityTablesCoefficientContext {
-            best_prior,
-            num_non_zeros_bin: num_non_zeros_x - 1,
-            best_prior_bit_len: u32_bit_length(cmp::min(best_prior.unsigned_abs(), 1023)),
-        };
+        best_prior / coef_idct[0]
     }
 
     fn from_stride(block: &[i16; 64], offset: usize, stride: usize) -> i16x8 {