diff --git a/images/mathoverflow_scalar.jpg b/images/mathoverflow_scalar.jpg new file mode 100644 index 00000000..621a519a Binary files /dev/null and b/images/mathoverflow_scalar.jpg differ diff --git a/images/mathoverflow_scalar.lep b/images/mathoverflow_scalar.lep new file mode 100644 index 00000000..cbcf24b9 Binary files /dev/null and b/images/mathoverflow_scalar.lep differ diff --git a/src/enabled_features.rs b/src/enabled_features.rs index de053f38..ac1828b5 100644 --- a/src/enabled_features.rs +++ b/src/enabled_features.rs @@ -1,4 +1,5 @@ // features that are enabled in the encoder. Turn off for potential backward compat issues. +#[derive(Debug, Clone, Copy)] pub struct EnabledFeatures { /// enables/disables reading of progressive images pub progressive: bool, @@ -12,32 +13,52 @@ pub struct EnabledFeatures { // maximum jpeg height pub max_jpeg_height: i32, - // Sadly C++ version has a bug where it uses 16 bit math in the SIMD path and 32 bit math in the scalar path + /// Sadly C++ version has a bug where it uses 16 bit math in the SIMD path and 32 bit math in the scalar path pub use_16bit_dc_estimate: bool, + + /// Sadly C++ version has a bug where it uses 16 bit math in the SIMD path and 32 bit math in the scalar path + pub use_16bit_adv_predict: bool, } -impl Default for EnabledFeatures { - fn default() -> Self { +impl EnabledFeatures { + /// parameters that allow everything for encoding that is compatible with c++ lepton compiled with SIMD + #[allow(dead_code)] + pub fn compat_lepton_vector_write() -> Self { Self { progressive: true, reject_dqts_with_zeros: true, - max_jpeg_width: 16386, max_jpeg_height: 16386, - use_16bit_dc_estimate: false, + max_jpeg_width: 16386, + use_16bit_dc_estimate: true, + use_16bit_adv_predict: true, } } -} -impl EnabledFeatures { - /// parameters that allow everything + /// parameters that allow everything for decoding c++ lepton images encoded + /// with the scalar compile options #[allow(dead_code)] - pub fn all() -> Self { + pub fn compat_lepton_scalar_read() -> Self { Self { progressive: true, - reject_dqts_with_zeros: true, + reject_dqts_with_zeros: false, max_jpeg_height: i32::MAX, max_jpeg_width: i32::MAX, use_16bit_dc_estimate: false, + use_16bit_adv_predict: false, + } + } + + /// parameters that allow everything for decoding c++ lepton images encoded + /// with the vector (SSE2/AVX2) compile options + #[allow(dead_code)] + pub fn compat_lepton_vector_read() -> Self { + Self { + progressive: true, + reject_dqts_with_zeros: false, + max_jpeg_height: i32::MAX, + max_jpeg_width: i32::MAX, + use_16bit_dc_estimate: true, + use_16bit_adv_predict: true, } } } diff --git a/src/lib.rs b/src/lib.rs index 974b9a69..3789a560 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -96,7 +96,7 @@ pub unsafe extern "C" fn WrapperCompressImage( &mut reader, &mut writer, number_of_threads as usize, - &EnabledFeatures::default(), + &EnabledFeatures::compat_lepton_vector_write(), ) { Ok(_) => {} Err(e) => match e.root_cause().downcast_ref::() { @@ -159,14 +159,19 @@ pub unsafe extern "C" fn WrapperDecompressImageEx( ) -> i32 { match catch_unwind(|| { // For back-compat with C++ version we allow decompression of images with zeros in DQT tables - let mut enabled_features = EnabledFeatures::all(); - enabled_features.reject_dqts_with_zeros = false; // C++ version has a bug where it uses 16 bit math in the SIMD path and 32 bit math in the scalar path // depending on the compiler options. If use_16bit_dc_estimate=true, the decompression uses a back-compat // mode that considers it. The caller should set use_16bit_dc_estimate to true only for images that were // compressed by C++ version with relevant compiler options. - enabled_features.use_16bit_dc_estimate = use_16bit_dc_estimate; + + // this is a bit of a mess since for a while we were encoded a mix of 16 and 32 bit math + // (hence the two parameters in features). + + let mut enabled_features = EnabledFeatures { + use_16bit_dc_estimate: use_16bit_dc_estimate, + ..EnabledFeatures::compat_lepton_vector_read() + }; loop { let input = std::slice::from_raw_parts(input_buffer, input_buffer_size as usize); @@ -179,7 +184,7 @@ pub unsafe extern "C" fn WrapperDecompressImageEx( &mut reader, &mut writer, number_of_threads as usize, - &enabled_features, + &mut enabled_features, ) { Ok(_) => { *result_size = writer.position().into(); diff --git a/src/main.rs b/src/main.rs index 3a3b81ce..49474fa3 100644 --- a/src/main.rs +++ b/src/main.rs @@ -52,7 +52,7 @@ fn main_with_result() -> anyhow::Result<()> { let mut dump = false; let mut all = false; let mut overwrite = false; - let mut enabled_features = EnabledFeatures::default(); + let mut enabled_features = EnabledFeatures::compat_lepton_vector_read(); // only output the log if we are connected to a console (otherwise if there is redirection we would corrupt the file) if stdout().is_terminal() { @@ -81,6 +81,14 @@ fn main_with_result() -> anyhow::Result<()> { enabled_features.reject_dqts_with_zeros = false; } else if args[i] == "-use16bitdc" { enabled_features.use_16bit_dc_estimate = true; + } else if args[i] == "-useleptonscalar" { + // lepton files that were encoded by the dropbox c++ version compiled in scalar mode + enabled_features.use_16bit_adv_predict = false; + enabled_features.use_16bit_dc_estimate = false; + } else if args[i] == "-useleptonvector" { + // lepton files that were encoded by the dropbox c++ version compiled in AVX2/SSE2 mode + enabled_features.use_16bit_adv_predict = true; + enabled_features.use_16bit_dc_estimate = true; } else { return err_exit_code( ExitCode::SyntaxError, @@ -111,7 +119,7 @@ fn main_with_result() -> anyhow::Result<()> { .context(here!())?; } else { lh = LeptonHeader::new(); - lh.read_lepton_header(&mut reader, &enabled_features) + lh.read_lepton_header(&mut reader, &mut enabled_features) .context(here!())?; let _metrics; @@ -131,7 +139,7 @@ fn main_with_result() -> anyhow::Result<()> { println!("{0}", s.replace("},", "},\r\n").replace("],", "],\r\n")); if !lh - .advance_next_header_segment(&EnabledFeatures::default()) + .advance_next_header_segment(&enabled_features) .context(here!())? { break; diff --git a/src/structs/lepton_decoder.rs b/src/structs/lepton_decoder.rs index c079cc17..c420413c 100644 --- a/src/structs/lepton_decoder.rs +++ b/src/structs/lepton_decoder.rs @@ -360,7 +360,8 @@ fn parse_token( eob_y, )?; - let predicted_dc = pt.adv_predict_dc_pix::(&output, qt, context, num_non_zeros); + let predicted_dc = + pt.adv_predict_dc_pix::(&output, qt, context, num_non_zeros, features); let coef = model .read_dc( diff --git a/src/structs/lepton_encoder.rs b/src/structs/lepton_encoder.rs index 674c2d2a..61a336a3 100644 --- a/src/structs/lepton_encoder.rs +++ b/src/structs/lepton_encoder.rs @@ -378,7 +378,8 @@ fn serialize_tokens( ) .context(here!())?; - let predicted_val = pt.adv_predict_dc_pix::(&block, qt, context, &num_non_zeros); + let predicted_val = + pt.adv_predict_dc_pix::(&block, qt, context, &num_non_zeros, features); let avg_predicted_dc = ProbabilityTables::adv_predict_or_unpredict_dc( block.get_dc(), diff --git a/src/structs/lepton_format.rs b/src/structs/lepton_format.rs index e7c96c98..2c2c694a 100644 --- a/src/structs/lepton_format.rs +++ b/src/structs/lepton_format.rs @@ -56,11 +56,13 @@ pub fn decode_lepton_wrapper( let mut lh = LeptonHeader::new(); - lh.read_lepton_header(reader, enabled_features) + let mut features_mut = enabled_features.clone(); + + lh.read_lepton_header(reader, &mut features_mut) .context(here!())?; let metrics = lh - .recode_jpeg(writer, reader, size, num_threads, enabled_features) + .recode_jpeg(writer, reader, size, num_threads, &features_mut) .context(here!())?; return Ok(metrics); @@ -75,7 +77,8 @@ pub fn encode_lepton_wrapper( ) -> Result { let (lp, image_data) = read_jpeg(reader, enabled_features, max_threads, |_jh| {})?; - lp.write_lepton_header(writer).context(here!())?; + lp.write_lepton_header(writer, enabled_features) + .context(here!())?; let metrics = run_lepton_encoder_threads( &lp.jpeg_header, @@ -125,14 +128,11 @@ pub fn encode_lepton_wrapper_verify( info!("decompressing to verify contents"); + let mut c = enabled_features.clone(); + metrics.merge_from( - decode_lepton_wrapper( - &mut verifyreader, - &mut verify_buffer, - max_threads, - &enabled_features, - ) - .context(here!())?, + decode_lepton_wrapper(&mut verifyreader, &mut verify_buffer, max_threads, &mut c) + .context(here!())?, ); if input_data.len() != verify_buffer.len() { @@ -997,7 +997,7 @@ impl LeptonHeader { pub fn read_lepton_header( &mut self, reader: &mut R, - enabled_features: &EnabledFeatures, + enabled_features: &mut EnabledFeatures, ) -> Result<()> { let mut header = [0 as u8; LEPTON_FILE_HEADER.len()]; @@ -1039,6 +1039,13 @@ impl LeptonHeader { if header[5] == 'M' as u8 && header[6] == 'S' as u8 { c.set_position(7); self.uncompressed_lepton_header_size = c.read_u32::()?; + + // read the flag bits to know how we should decode this file + let flags = c.read_u8()?; + if (flags & 0x80) != 0 { + enabled_features.use_16bit_dc_estimate = (flags & 0x01) != 0; + enabled_features.use_16bit_adv_predict = (flags & 0x02) != 0; + } } // full size of the original file @@ -1230,7 +1237,11 @@ impl LeptonHeader { return Ok(hdr_data); } - pub fn write_lepton_header(&self, writer: &mut W) -> Result<()> { + pub fn write_lepton_header( + &self, + writer: &mut W, + enabled_features: &EnabledFeatures, + ) -> Result<()> { let mut lepton_header = Vec::::new(); { @@ -1273,7 +1284,21 @@ impl LeptonHeader { writer.write_u8('M' as u8)?; writer.write_u8('S' as u8)?; writer.write_u32::(lepton_header.len() as u32)?; - writer.write_all(&[0; 6])?; + + // write the flags that were used to encode this file + writer.write_u8( + 0x80 | if enabled_features.use_16bit_dc_estimate { + 1 + } else { + 0 + } | if enabled_features.use_16bit_adv_predict { + 2 + } else { + 0 + }, + )?; + + writer.write_all(&[0; 5])?; writer.write_u32::(self.jpeg_file_size)?; writer.write_u32::(compressed_header.len() as u32)?; @@ -1706,10 +1731,12 @@ fn parse_and_write_header() { 0x00, 0x08, 0x01, 0x01, 0x00, 0x00, 0x3f, 0x00, 0xd2, 0xcf, 0x20, 0xff, 0xd9, // EOI ]; + let mut enabled_features = EnabledFeatures::compat_lepton_vector_read(); + let mut lh = LeptonHeader::new(); lh.jpeg_file_size = 123; - lh.parse_jpeg_header(&mut Cursor::new(min_jpeg), &EnabledFeatures::all()) + lh.parse_jpeg_header(&mut Cursor::new(min_jpeg), &enabled_features) .unwrap(); lh.thread_handoff.push(ThreadHandoff { luma_y_start: 0, @@ -1722,12 +1749,12 @@ fn parse_and_write_header() { }); let mut serialized = Vec::new(); - lh.write_lepton_header(&mut Cursor::new(&mut serialized)) + lh.write_lepton_header(&mut Cursor::new(&mut serialized), &enabled_features) .unwrap(); let mut other = LeptonHeader::new(); let mut other_reader = Cursor::new(&serialized); other - .read_lepton_header(&mut other_reader, &EnabledFeatures::all()) + .read_lepton_header(&mut other_reader, &mut enabled_features) .unwrap(); } diff --git a/src/structs/probability_tables.rs b/src/structs/probability_tables.rs index 347244a1..beb61be3 100644 --- a/src/structs/probability_tables.rs +++ b/src/structs/probability_tables.rs @@ -7,6 +7,7 @@ use std::cmp; use crate::consts::*; +use crate::enabled_features; use crate::helpers::*; use crate::structs::idct::*; use crate::structs::model::*; @@ -258,6 +259,7 @@ impl ProbabilityTables { qt: &QuantizationTables, block_context: &BlockContext, num_non_zeros: &[NeighborSummary], + enabled_features: &enabled_features::EnabledFeatures, ) -> PredictDCResult { let q_transposed = qt.get_quantization_table_transposed(); @@ -268,25 +270,51 @@ impl ProbabilityTables { let calc_left = || { let left_context = block_context.neighbor_context_left(num_non_zeros); - let a1 = ProbabilityTables::from_stride(&pixels_sans_dc.get_block(), 0, 8); - let a2 = ProbabilityTables::from_stride(&pixels_sans_dc.get_block(), 1, 8); - let pixel_delta = a1 - a2; - let a: i16x8 = a1 + 1024; - let b : i16x8 = i16x8::new(*left_context.get_vertical()) - (pixel_delta - (pixel_delta>>15) >> 1) /* divide pixel_delta by 2 rounding towards 0 */; + if enabled_features.use_16bit_adv_predict { + let a1 = ProbabilityTables::from_stride(&pixels_sans_dc.get_block(), 0, 8); + let a2 = ProbabilityTables::from_stride(&pixels_sans_dc.get_block(), 1, 8); + let pixel_delta = a1 - a2; + let a: i16x8 = a1 + 1024; + let b : i16x8 = i16x8::new(*left_context.get_vertical()) - (pixel_delta - (pixel_delta>>15) >> 1) /* divide pixel_delta by 2 rounding towards 0 */; - b - a + b - a + } else { + let mut dc_estimates = [0i16; 8]; + for i in 0..8 { + let a = i32::from(pixels_sans_dc.get_block()[i << 3]) + 1024; + let pixel_delta = i32::from(pixels_sans_dc.get_block()[i << 3]) + - i32::from(pixels_sans_dc.get_block()[(i << 3) + 1]); + let b = i32::from(left_context.get_vertical()[i]) - (pixel_delta / 2); //round to zero + dc_estimates[i] = (b - a) as i16; + } + + i16x8::from(dc_estimates) + } }; let calc_above = || { let above_context = block_context.neighbor_context_above(num_non_zeros); - let a1 = ProbabilityTables::from_stride(&pixels_sans_dc.get_block(), 0, 1); - let a2 = ProbabilityTables::from_stride(&pixels_sans_dc.get_block(), 8, 1); - let pixel_delta = a1 - a2; - let a: i16x8 = a1 + 1024; - let b : i16x8 = i16x8::new(*above_context.get_horizontal()) - (pixel_delta - (pixel_delta>>15) >> 1) /* divide pixel_delta by 2 rounding towards 0 */; + if enabled_features.use_16bit_adv_predict { + let a1 = ProbabilityTables::from_stride(&pixels_sans_dc.get_block(), 0, 1); + let a2 = ProbabilityTables::from_stride(&pixels_sans_dc.get_block(), 8, 1); + let pixel_delta = a1 - a2; + let a: i16x8 = a1 + 1024; + let b : i16x8 = i16x8::new(*above_context.get_horizontal()) - (pixel_delta - (pixel_delta>>15) >> 1) /* divide pixel_delta by 2 rounding towards 0 */; - b - a + b - a + } else { + let mut dc_estimates = [0i16; 8]; + for i in 0..8 { + let a = i32::from(pixels_sans_dc.get_block()[i]) + 1024; + let pixel_delta = i32::from(pixels_sans_dc.get_block()[i]) + - i32::from(pixels_sans_dc.get_block()[i + 8]); + let b = i32::from(above_context.get_horizontal()[i]) - (pixel_delta / 2); //round to zero + dc_estimates[i] = (b - a) as i16; + } + + i16x8::from(dc_estimates) + } }; let min_dc; diff --git a/tests/end_to_end.rs b/tests/end_to_end.rs index 884ea4df..6c95ee09 100644 --- a/tests/end_to_end.rs +++ b/tests/end_to_end.rs @@ -83,13 +83,33 @@ fn verify_decode( &mut Cursor::new(input), &mut output, 8, - &EnabledFeatures::all(), + &EnabledFeatures::compat_lepton_vector_read(), ) .unwrap(); assert!(output[..] == expected[..]); } +/// verifies that the decode will accept existing Lepton files and generate +/// exactly the same jpeg from them. Used to detect unexpected divergences in coding format. +#[test] +fn verify_decode_scalar_overflow() { + let file = "mathoverflow_scalar"; + + println!("decoding {0:?}", file); + + let input = read_file(file, ".lep"); + let expected = read_file(file, ".jpg"); + + let mut output = Vec::new(); + + let features = EnabledFeatures::compat_lepton_scalar_read(); + + decode_lepton(&mut Cursor::new(input), &mut output, 8, &features).unwrap(); + + assert!(output[..] == expected[..]); +} + /// Verifies that the decode will accept existing Lepton files and generate /// exactly the same jpeg from them when called by an external interface /// with use_16bit_dc_estimate=true for C++ backward compatibility. @@ -208,7 +228,7 @@ fn verify_encode( &mut Cursor::new(&input), &mut Cursor::new(&mut lepton), 8, - &EnabledFeatures::all(), + &EnabledFeatures::compat_lepton_vector_write(), ) .unwrap(); @@ -216,7 +236,7 @@ fn verify_encode( &mut Cursor::new(lepton), &mut output, 8, - &EnabledFeatures::all(), + &EnabledFeatures::compat_lepton_vector_read(), ) .unwrap(); @@ -232,8 +252,7 @@ fn verify_16bitmath() { let mut output = Vec::new(); - let mut features = EnabledFeatures::all(); - features.use_16bit_dc_estimate = true; + let features = EnabledFeatures::compat_lepton_vector_read(); decode_lepton(&mut Cursor::new(input), &mut output, 8, &features).unwrap(); @@ -247,7 +266,7 @@ fn verify_16bitmath() { let mut output = Vec::new(); - let mut features = EnabledFeatures::all(); + let mut features = EnabledFeatures::compat_lepton_vector_read(); features.use_16bit_dc_estimate = false; decode_lepton(&mut Cursor::new(input), &mut output, 8, &features).unwrap(); @@ -290,7 +309,12 @@ fn verify_extern_16bit_math_retry() { fn verify_encode_verify(#[values("slrcity")] file: &str) { let input = read_file(file, ".jpg"); - encode_lepton_verify(&input[..], 8, &EnabledFeatures::all()).unwrap(); + encode_lepton_verify( + &input[..], + 8, + &EnabledFeatures::compat_lepton_vector_write(), + ) + .unwrap(); } fn assert_exception(expected_error: ExitCode, result: Result) { @@ -317,7 +341,7 @@ fn verify_encode_progressive_false( 8, &EnabledFeatures { progressive: false, - ..Default::default() + ..EnabledFeatures::compat_lepton_vector_write() }, ), ); @@ -335,7 +359,7 @@ fn verify_nonoptimal() { &mut Cursor::new(&input), &mut Cursor::new(&mut lepton), 8, - &EnabledFeatures::all(), + &EnabledFeatures::compat_lepton_vector_write(), ), ); } @@ -345,13 +369,14 @@ fn verify_nonoptimal() { fn verify_encode_image_with_zeros_in_dqt_tables() { let input = read_file("zeros_in_dqt_tables", ".jpg"); let mut lepton = Vec::new(); + assert_exception( ExitCode::UnsupportedJpeg, encode_lepton( &mut Cursor::new(&input), &mut Cursor::new(&mut lepton), 8, - &EnabledFeatures::all(), + &EnabledFeatures::compat_lepton_vector_write(), ), ); }