Skip to content

Commit

Permalink
added scalar overflow handling (#57)
Browse files Browse the repository at this point in the history
* added scalar overflow handling

* fix defaults so that we maintain backward compat and do the right thing going forward
  • Loading branch information
mcroomp authored Mar 15, 2024
1 parent 99aeec3 commit 2e9bf26
Show file tree
Hide file tree
Showing 10 changed files with 174 additions and 58 deletions.
Binary file added images/mathoverflow_scalar.jpg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added images/mathoverflow_scalar.lep
Binary file not shown.
41 changes: 31 additions & 10 deletions src/enabled_features.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
// features that are enabled in the encoder. Turn off for potential backward compat issues.
#[derive(Debug, Clone, Copy)]
pub struct EnabledFeatures {
/// enables/disables reading of progressive images
pub progressive: bool,
Expand All @@ -12,32 +13,52 @@ pub struct EnabledFeatures {
// maximum jpeg height
pub max_jpeg_height: i32,

// Sadly C++ version has a bug where it uses 16 bit math in the SIMD path and 32 bit math in the scalar path
/// Sadly C++ version has a bug where it uses 16 bit math in the SIMD path and 32 bit math in the scalar path
pub use_16bit_dc_estimate: bool,

/// Sadly C++ version has a bug where it uses 16 bit math in the SIMD path and 32 bit math in the scalar path
pub use_16bit_adv_predict: bool,
}

impl Default for EnabledFeatures {
fn default() -> Self {
impl EnabledFeatures {
/// parameters that allow everything for encoding that is compatible with c++ lepton compiled with SIMD
#[allow(dead_code)]
pub fn compat_lepton_vector_write() -> Self {
Self {
progressive: true,
reject_dqts_with_zeros: true,
max_jpeg_width: 16386,
max_jpeg_height: 16386,
use_16bit_dc_estimate: false,
max_jpeg_width: 16386,
use_16bit_dc_estimate: true,
use_16bit_adv_predict: true,
}
}
}

impl EnabledFeatures {
/// parameters that allow everything
/// parameters that allow everything for decoding c++ lepton images encoded
/// with the scalar compile options
#[allow(dead_code)]
pub fn all() -> Self {
pub fn compat_lepton_scalar_read() -> Self {
Self {
progressive: true,
reject_dqts_with_zeros: true,
reject_dqts_with_zeros: false,
max_jpeg_height: i32::MAX,
max_jpeg_width: i32::MAX,
use_16bit_dc_estimate: false,
use_16bit_adv_predict: false,
}
}

/// parameters that allow everything for decoding c++ lepton images encoded
/// with the vector (SSE2/AVX2) compile options
#[allow(dead_code)]
pub fn compat_lepton_vector_read() -> Self {
Self {
progressive: true,
reject_dqts_with_zeros: false,
max_jpeg_height: i32::MAX,
max_jpeg_width: i32::MAX,
use_16bit_dc_estimate: true,
use_16bit_adv_predict: true,
}
}
}
15 changes: 10 additions & 5 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,7 @@ pub unsafe extern "C" fn WrapperCompressImage(
&mut reader,
&mut writer,
number_of_threads as usize,
&EnabledFeatures::default(),
&EnabledFeatures::compat_lepton_vector_write(),
) {
Ok(_) => {}
Err(e) => match e.root_cause().downcast_ref::<LeptonError>() {
Expand Down Expand Up @@ -159,14 +159,19 @@ pub unsafe extern "C" fn WrapperDecompressImageEx(
) -> i32 {
match catch_unwind(|| {
// For back-compat with C++ version we allow decompression of images with zeros in DQT tables
let mut enabled_features = EnabledFeatures::all();
enabled_features.reject_dqts_with_zeros = false;

// C++ version has a bug where it uses 16 bit math in the SIMD path and 32 bit math in the scalar path
// depending on the compiler options. If use_16bit_dc_estimate=true, the decompression uses a back-compat
// mode that considers it. The caller should set use_16bit_dc_estimate to true only for images that were
// compressed by C++ version with relevant compiler options.
enabled_features.use_16bit_dc_estimate = use_16bit_dc_estimate;

// this is a bit of a mess since for a while we were encoded a mix of 16 and 32 bit math
// (hence the two parameters in features).

let mut enabled_features = EnabledFeatures {
use_16bit_dc_estimate: use_16bit_dc_estimate,
..EnabledFeatures::compat_lepton_vector_read()
};

loop {
let input = std::slice::from_raw_parts(input_buffer, input_buffer_size as usize);
Expand All @@ -179,7 +184,7 @@ pub unsafe extern "C" fn WrapperDecompressImageEx(
&mut reader,
&mut writer,
number_of_threads as usize,
&enabled_features,
&mut enabled_features,
) {
Ok(_) => {
*result_size = writer.position().into();
Expand Down
14 changes: 11 additions & 3 deletions src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ fn main_with_result() -> anyhow::Result<()> {
let mut dump = false;
let mut all = false;
let mut overwrite = false;
let mut enabled_features = EnabledFeatures::default();
let mut enabled_features = EnabledFeatures::compat_lepton_vector_read();

// only output the log if we are connected to a console (otherwise if there is redirection we would corrupt the file)
if stdout().is_terminal() {
Expand Down Expand Up @@ -81,6 +81,14 @@ fn main_with_result() -> anyhow::Result<()> {
enabled_features.reject_dqts_with_zeros = false;
} else if args[i] == "-use16bitdc" {
enabled_features.use_16bit_dc_estimate = true;
} else if args[i] == "-useleptonscalar" {
// lepton files that were encoded by the dropbox c++ version compiled in scalar mode
enabled_features.use_16bit_adv_predict = false;
enabled_features.use_16bit_dc_estimate = false;
} else if args[i] == "-useleptonvector" {
// lepton files that were encoded by the dropbox c++ version compiled in AVX2/SSE2 mode
enabled_features.use_16bit_adv_predict = true;
enabled_features.use_16bit_dc_estimate = true;
} else {
return err_exit_code(
ExitCode::SyntaxError,
Expand Down Expand Up @@ -111,7 +119,7 @@ fn main_with_result() -> anyhow::Result<()> {
.context(here!())?;
} else {
lh = LeptonHeader::new();
lh.read_lepton_header(&mut reader, &enabled_features)
lh.read_lepton_header(&mut reader, &mut enabled_features)
.context(here!())?;

let _metrics;
Expand All @@ -131,7 +139,7 @@ fn main_with_result() -> anyhow::Result<()> {
println!("{0}", s.replace("},", "},\r\n").replace("],", "],\r\n"));

if !lh
.advance_next_header_segment(&EnabledFeatures::default())
.advance_next_header_segment(&enabled_features)
.context(here!())?
{
break;
Expand Down
3 changes: 2 additions & 1 deletion src/structs/lepton_decoder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -360,7 +360,8 @@ fn parse_token<R: Read, const ALL_PRESENT: bool>(
eob_y,
)?;

let predicted_dc = pt.adv_predict_dc_pix::<ALL_PRESENT>(&output, qt, context, num_non_zeros);
let predicted_dc =
pt.adv_predict_dc_pix::<ALL_PRESENT>(&output, qt, context, num_non_zeros, features);

let coef = model
.read_dc(
Expand Down
3 changes: 2 additions & 1 deletion src/structs/lepton_encoder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -378,7 +378,8 @@ fn serialize_tokens<W: Write, const ALL_PRESENT: bool>(
)
.context(here!())?;

let predicted_val = pt.adv_predict_dc_pix::<ALL_PRESENT>(&block, qt, context, &num_non_zeros);
let predicted_val =
pt.adv_predict_dc_pix::<ALL_PRESENT>(&block, qt, context, &num_non_zeros, features);

let avg_predicted_dc = ProbabilityTables::adv_predict_or_unpredict_dc(
block.get_dc(),
Expand Down
59 changes: 43 additions & 16 deletions src/structs/lepton_format.rs
Original file line number Diff line number Diff line change
Expand Up @@ -56,11 +56,13 @@ pub fn decode_lepton_wrapper<R: Read + Seek, W: Write>(

let mut lh = LeptonHeader::new();

lh.read_lepton_header(reader, enabled_features)
let mut features_mut = enabled_features.clone();

lh.read_lepton_header(reader, &mut features_mut)
.context(here!())?;

let metrics = lh
.recode_jpeg(writer, reader, size, num_threads, enabled_features)
.recode_jpeg(writer, reader, size, num_threads, &features_mut)
.context(here!())?;

return Ok(metrics);
Expand All @@ -75,7 +77,8 @@ pub fn encode_lepton_wrapper<R: Read + Seek, W: Write + Seek>(
) -> Result<Metrics> {
let (lp, image_data) = read_jpeg(reader, enabled_features, max_threads, |_jh| {})?;

lp.write_lepton_header(writer).context(here!())?;
lp.write_lepton_header(writer, enabled_features)
.context(here!())?;

let metrics = run_lepton_encoder_threads(
&lp.jpeg_header,
Expand Down Expand Up @@ -125,14 +128,11 @@ pub fn encode_lepton_wrapper_verify(

info!("decompressing to verify contents");

let mut c = enabled_features.clone();

metrics.merge_from(
decode_lepton_wrapper(
&mut verifyreader,
&mut verify_buffer,
max_threads,
&enabled_features,
)
.context(here!())?,
decode_lepton_wrapper(&mut verifyreader, &mut verify_buffer, max_threads, &mut c)
.context(here!())?,
);

if input_data.len() != verify_buffer.len() {
Expand Down Expand Up @@ -997,7 +997,7 @@ impl LeptonHeader {
pub fn read_lepton_header<R: Read>(
&mut self,
reader: &mut R,
enabled_features: &EnabledFeatures,
enabled_features: &mut EnabledFeatures,
) -> Result<()> {
let mut header = [0 as u8; LEPTON_FILE_HEADER.len()];

Expand Down Expand Up @@ -1039,6 +1039,13 @@ impl LeptonHeader {
if header[5] == 'M' as u8 && header[6] == 'S' as u8 {
c.set_position(7);
self.uncompressed_lepton_header_size = c.read_u32::<LittleEndian>()?;

// read the flag bits to know how we should decode this file
let flags = c.read_u8()?;
if (flags & 0x80) != 0 {
enabled_features.use_16bit_dc_estimate = (flags & 0x01) != 0;
enabled_features.use_16bit_adv_predict = (flags & 0x02) != 0;
}
}

// full size of the original file
Expand Down Expand Up @@ -1230,7 +1237,11 @@ impl LeptonHeader {
return Ok(hdr_data);
}

pub fn write_lepton_header<W: Write>(&self, writer: &mut W) -> Result<()> {
pub fn write_lepton_header<W: Write>(
&self,
writer: &mut W,
enabled_features: &EnabledFeatures,
) -> Result<()> {
let mut lepton_header = Vec::<u8>::new();

{
Expand Down Expand Up @@ -1273,7 +1284,21 @@ impl LeptonHeader {
writer.write_u8('M' as u8)?;
writer.write_u8('S' as u8)?;
writer.write_u32::<LittleEndian>(lepton_header.len() as u32)?;
writer.write_all(&[0; 6])?;

// write the flags that were used to encode this file
writer.write_u8(
0x80 | if enabled_features.use_16bit_dc_estimate {
1
} else {
0
} | if enabled_features.use_16bit_adv_predict {
2
} else {
0
},
)?;

writer.write_all(&[0; 5])?;

writer.write_u32::<LittleEndian>(self.jpeg_file_size)?;
writer.write_u32::<LittleEndian>(compressed_header.len() as u32)?;
Expand Down Expand Up @@ -1706,10 +1731,12 @@ fn parse_and_write_header() {
0x00, 0x08, 0x01, 0x01, 0x00, 0x00, 0x3f, 0x00, 0xd2, 0xcf, 0x20, 0xff, 0xd9, // EOI
];

let mut enabled_features = EnabledFeatures::compat_lepton_vector_read();

let mut lh = LeptonHeader::new();
lh.jpeg_file_size = 123;

lh.parse_jpeg_header(&mut Cursor::new(min_jpeg), &EnabledFeatures::all())
lh.parse_jpeg_header(&mut Cursor::new(min_jpeg), &enabled_features)
.unwrap();
lh.thread_handoff.push(ThreadHandoff {
luma_y_start: 0,
Expand All @@ -1722,12 +1749,12 @@ fn parse_and_write_header() {
});

let mut serialized = Vec::new();
lh.write_lepton_header(&mut Cursor::new(&mut serialized))
lh.write_lepton_header(&mut Cursor::new(&mut serialized), &enabled_features)
.unwrap();

let mut other = LeptonHeader::new();
let mut other_reader = Cursor::new(&serialized);
other
.read_lepton_header(&mut other_reader, &EnabledFeatures::all())
.read_lepton_header(&mut other_reader, &mut enabled_features)
.unwrap();
}
52 changes: 40 additions & 12 deletions src/structs/probability_tables.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
use std::cmp;

use crate::consts::*;
use crate::enabled_features;
use crate::helpers::*;
use crate::structs::idct::*;
use crate::structs::model::*;
Expand Down Expand Up @@ -258,6 +259,7 @@ impl ProbabilityTables {
qt: &QuantizationTables,
block_context: &BlockContext,
num_non_zeros: &[NeighborSummary],
enabled_features: &enabled_features::EnabledFeatures,
) -> PredictDCResult {
let q_transposed = qt.get_quantization_table_transposed();

Expand All @@ -268,25 +270,51 @@ impl ProbabilityTables {
let calc_left = || {
let left_context = block_context.neighbor_context_left(num_non_zeros);

let a1 = ProbabilityTables::from_stride(&pixels_sans_dc.get_block(), 0, 8);
let a2 = ProbabilityTables::from_stride(&pixels_sans_dc.get_block(), 1, 8);
let pixel_delta = a1 - a2;
let a: i16x8 = a1 + 1024;
let b : i16x8 = i16x8::new(*left_context.get_vertical()) - (pixel_delta - (pixel_delta>>15) >> 1) /* divide pixel_delta by 2 rounding towards 0 */;
if enabled_features.use_16bit_adv_predict {
let a1 = ProbabilityTables::from_stride(&pixels_sans_dc.get_block(), 0, 8);
let a2 = ProbabilityTables::from_stride(&pixels_sans_dc.get_block(), 1, 8);
let pixel_delta = a1 - a2;
let a: i16x8 = a1 + 1024;
let b : i16x8 = i16x8::new(*left_context.get_vertical()) - (pixel_delta - (pixel_delta>>15) >> 1) /* divide pixel_delta by 2 rounding towards 0 */;

b - a
b - a
} else {
let mut dc_estimates = [0i16; 8];
for i in 0..8 {
let a = i32::from(pixels_sans_dc.get_block()[i << 3]) + 1024;
let pixel_delta = i32::from(pixels_sans_dc.get_block()[i << 3])
- i32::from(pixels_sans_dc.get_block()[(i << 3) + 1]);
let b = i32::from(left_context.get_vertical()[i]) - (pixel_delta / 2); //round to zero
dc_estimates[i] = (b - a) as i16;
}

i16x8::from(dc_estimates)
}
};

let calc_above = || {
let above_context = block_context.neighbor_context_above(num_non_zeros);

let a1 = ProbabilityTables::from_stride(&pixels_sans_dc.get_block(), 0, 1);
let a2 = ProbabilityTables::from_stride(&pixels_sans_dc.get_block(), 8, 1);
let pixel_delta = a1 - a2;
let a: i16x8 = a1 + 1024;
let b : i16x8 = i16x8::new(*above_context.get_horizontal()) - (pixel_delta - (pixel_delta>>15) >> 1) /* divide pixel_delta by 2 rounding towards 0 */;
if enabled_features.use_16bit_adv_predict {
let a1 = ProbabilityTables::from_stride(&pixels_sans_dc.get_block(), 0, 1);
let a2 = ProbabilityTables::from_stride(&pixels_sans_dc.get_block(), 8, 1);
let pixel_delta = a1 - a2;
let a: i16x8 = a1 + 1024;
let b : i16x8 = i16x8::new(*above_context.get_horizontal()) - (pixel_delta - (pixel_delta>>15) >> 1) /* divide pixel_delta by 2 rounding towards 0 */;

b - a
b - a
} else {
let mut dc_estimates = [0i16; 8];
for i in 0..8 {
let a = i32::from(pixels_sans_dc.get_block()[i]) + 1024;
let pixel_delta = i32::from(pixels_sans_dc.get_block()[i])
- i32::from(pixels_sans_dc.get_block()[i + 8]);
let b = i32::from(above_context.get_horizontal()[i]) - (pixel_delta / 2); //round to zero
dc_estimates[i] = (b - a) as i16;
}

i16x8::from(dc_estimates)
}
};

let min_dc;
Expand Down
Loading

0 comments on commit 2e9bf26

Please sign in to comment.