Skip to content

Commit

Permalink
WIP: get rid of first transposition in IDCT
Browse files Browse the repository at this point in the history
  • Loading branch information
Melirius committed May 10, 2024
1 parent b92e033 commit f2b4eb3
Show file tree
Hide file tree
Showing 2 changed files with 83 additions and 30 deletions.
102 changes: 73 additions & 29 deletions src/structs/lepton_decoder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ use anyhow::{Context, Result};
use crate::consts::{ICOS_BASED_8192_SCALED, ICOS_BASED_8192_SCALED_PM};
use crate::structs::idct::get_q;
use bytemuck::cast;
use wide::i32x8;
use wide::{i16x8, i32x8};

use default_boxed::DefaultBoxed;

Expand Down Expand Up @@ -307,6 +307,11 @@ fn parse_token<R: Read, const ALL_PRESENT: bool>(
Ok(())
}

fn tr(i: u8) -> usize
{
(((i & 7) << 3) | ((i & 56) >> 3)) as usize
}

/// Reads the 8x8 coefficient block from the bit reader, taking into account the neighboring
/// blocks, probability tables and model.
///
Expand Down Expand Up @@ -374,7 +379,7 @@ pub fn read_coefficient_block<const ALL_PRESENT: bool, R: Read>(
.context(here!())?;

if coef != 0 {
output.set_coefficient(coord as usize, coef);
output.set_coefficient(tr(coord), coef);
nonzero_mask |= 1 << coord;

num_non_zeros_7x7_remaining -= 1;
Expand All @@ -396,39 +401,73 @@ pub fn read_coefficient_block<const ALL_PRESENT: bool, R: Read>(
);
}

// here we calculate the furthest x and y coordinates that have non-zero coefficients
// which is later used as a predictor for the number of edge coefficients,
// dequantize raster coefficients, and produce predictors for edge DCT coefficients
let q: AlignedBlock = AlignedBlock::new(cast(*qt.get_quantization_table()));
let mut h_pred: [i32; 8] = *neighbor_data.neighbor_context_above.get_horizontal_coef();
let mut vert_pred: i32x8 = cast(*neighbor_data.neighbor_context_left.get_vertical_coef());
let mut mult: i32x8 = cast(ICOS_BASED_8192_SCALED);
// load predictors data from neighborhood blocks
let mut horiz_pred: i32x8 = cast(*neighbor_data.neighbor_context_above.get_horizontal_coef());
let mut vert_pred: [i32; 8] = *neighbor_data.neighbor_context_left.get_vertical_coef();

for i in 1..8 {
if nonzero_mask & (0xFE << (i * 8)) != 0 {
// have non-zero coefficients in the row i
eob_y = i as u8;

raster[i] = get_q(i, &output) * get_q(i, &q);
// some extreme coefficents can cause overflows, but since this is just predictors, no need to panic
horiz_pred -= raster[i] * ICOS_BASED_8192_SCALED[i];
vert_pred[i] = vert_pred[i].wrapping_sub((raster[i] * mult).reduce_add());
}
{
let q_tr: AlignedBlock = AlignedBlock::new(cast(*qt.get_quantization_table_transposed()));
// load predictors data from neighborhood blocks

for i in 1..8 {
if nonzero_mask & (0xFE << (i * 8)) != 0 {
// have non-zero coefficients in the row i
eob_y = i as u8;
}

if nonzero_mask & (0x0101010101010100 << i) != 0 {
// have non-zero coefficients in the column i
eob_x = i as u8;

if nonzero_mask & (0x0101010101010100 << i) != 0 {
// have non-zero coefficients in the column i
eob_x = i as u8;
raster[i] = get_q(i, &output) * get_q(i, &q_tr);
// some extreme coefficents can cause overflows, but since this is just predictors, no need to panic
vert_pred -= raster[i] * ICOS_BASED_8192_SCALED[i];
h_pred[i] = h_pred[i].wrapping_sub((raster[i] * mult).reduce_add());
}
}
}

let h_pred = horiz_pred.to_array();
let v_pred = vert_pred.to_array();

// let t = i16x8::transpose(cast(*output.get_block()));
// *output.get_block_mut() = cast(t);
// // here we calculate the furthest x and y coordinates that have non-zero coefficients
// // which is later used as a predictor for the number of edge coefficients,
// // dequantize raster coefficients, and produce predictors for edge DCT coefficients
// //let q: AlignedBlock = AlignedBlock::new(cast(*qt.get_quantization_table()));
// let q = i16x8::transpose(cast(*qt.get_quantization_table_transposed()));
// let mut mult: i32x8 = cast(ICOS_BASED_8192_SCALED);
// // load predictors data from neighborhood blocks
// let mut horiz_pred: i32x8 = cast(*neighbor_data.neighbor_context_above.get_horizontal_coef());
// let mut vert_pred: [i32; 8] = *neighbor_data.neighbor_context_left.get_vertical_coef();

// for i in 1..8 {
// if nonzero_mask & (0xFE << (i * 8)) != 0 {
// // have non-zero coefficients in the row i
// eob_y = i as u8;

// raster[i] = get_q(i, &output) * i32x8::from_i16x8(q[i]);//get_q(i, &q);
// // some extreme coefficents can cause overflows, but since this is just predictors, no need to panic
// horiz_pred -= raster[i] * ICOS_BASED_8192_SCALED[i];
// vert_pred[i] = vert_pred[i].wrapping_sub((raster[i] * mult).reduce_add());
// }

// if nonzero_mask & (0x0101010101010100 << i) != 0 {
// // have non-zero coefficients in the column i
// eob_x = i as u8;
// }
// }

// let h_pred = horiz_pred.to_array();

// assert_eq!(h_pred, h0_pred);
// assert_eq!(vert_pred, v0_pred);
// assert_eq!(raster0, i32x8::transpose(raster));

decode_edge::<R, ALL_PRESENT>(
model_per_color,
bool_reader,
&h_pred,
&vert_pred,
&h_pred,//&horiz_pred,//
&v_pred,//&vert_pred,//
&mut output,
qt,
pt,
Expand All @@ -438,10 +477,15 @@ pub fn read_coefficient_block<const ALL_PRESENT: bool, R: Read>(
eob_y,
)?;

let t = i16x8::transpose(cast(*output.get_block()));
*output.get_block_mut() = cast(t);
raster = i32x8::transpose(raster);

// here we produce first part of edge DCT coefficients predictions for neighborhood blocks
// and finalize dequantization of raster
horiz_pred = 0.into();
vert_pred = [0; 8];
let q: AlignedBlock = AlignedBlock::new(cast(*qt.get_quantization_table()));
let mut horiz_pred: i32x8 = 0.into();
let mut vert_pred: [i32; 8] = [0; 8];
mult = cast(ICOS_BASED_8192_SCALED_PM);

if nonzero_mask & 0xFE != 0 {
Expand Down Expand Up @@ -587,7 +631,7 @@ fn decode_one_edge<R: Read, const ALL_PRESENT: bool, const HORIZONTAL: bool>(

if coef != 0 {
num_non_zeros_edge -= 1;
here_mut.set_coefficient(coord, coef);
here_mut.set_coefficient(tr(coord as u8), coef);

*nonzero_mask |= 1 << coord;
}
Expand Down
11 changes: 10 additions & 1 deletion src/structs/quantization_tables.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ pub struct QuantizationTables {
icos_idct_edge8192_dequantized_x: [i32; 64],
icos_idct_edge8192_dequantized_y: [i32; 64],
quantization_table: [u16; 64],
quantization_table_transposed: [u16; 64],
freq_max: [u16; 64],
min_noise_threshold: [u8; 64],
}
Expand All @@ -29,6 +30,7 @@ impl QuantizationTables {
icos_idct_edge8192_dequantized_x: [0; 64],
icos_idct_edge8192_dequantized_y: [0; 64],
quantization_table: [0; 64],
quantization_table_transposed: [0; 64],
freq_max: [0; 64],
min_noise_threshold: [0; 64],
};
Expand All @@ -40,11 +42,14 @@ impl QuantizationTables {

fn set_quantization_table(&mut self, quantization_table: &[u16; 64]) {
for i in 0..64 {
self.quantization_table[i] = quantization_table[RASTER_TO_ZIGZAG[i] as usize];
let q = quantization_table[RASTER_TO_ZIGZAG[i] as usize];
self.quantization_table[i] = q;
//self.quantization_table_transposed[(i >> 3) | ((i & 7) << 3)] = q;
}

for pixel_row in 0..8 {
for i in 0..8 {
self.quantization_table_transposed[(pixel_row * 8) + i] = self.quantization_table[(i * 8) + pixel_row];
self.icos_idct_edge8192_dequantized_x[(pixel_row * 8) + i] = ICOS_BASED_8192_SCALED
[i]
* (self.quantization_table[(i * 8) + pixel_row] as i32);
Expand Down Expand Up @@ -79,6 +84,10 @@ impl QuantizationTables {
&self.quantization_table
}

pub fn get_quantization_table_transposed(&self) -> &[u16; 64] {
&self.quantization_table_transposed
}

pub fn get_min_noise_threshold(&self, coef: usize) -> u8 {
self.min_noise_threshold[coef]
}
Expand Down

0 comments on commit f2b4eb3

Please sign in to comment.