Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

added scalar overflow handling #57

Merged
merged 2 commits into from
Mar 15, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Binary file added images/mathoverflow_scalar.jpg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added images/mathoverflow_scalar.lep
Binary file not shown.
41 changes: 31 additions & 10 deletions src/enabled_features.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
// features that are enabled in the encoder. Turn off for potential backward compat issues.
#[derive(Debug, Clone, Copy)]
pub struct EnabledFeatures {
/// enables/disables reading of progressive images
pub progressive: bool,
Expand All @@ -12,32 +13,52 @@ pub struct EnabledFeatures {
// maximum jpeg height
pub max_jpeg_height: i32,

// Sadly C++ version has a bug where it uses 16 bit math in the SIMD path and 32 bit math in the scalar path
/// Sadly C++ version has a bug where it uses 16 bit math in the SIMD path and 32 bit math in the scalar path
pub use_16bit_dc_estimate: bool,

/// Sadly C++ version has a bug where it uses 16 bit math in the SIMD path and 32 bit math in the scalar path
pub use_16bit_adv_predict: bool,
}

impl Default for EnabledFeatures {
fn default() -> Self {
impl EnabledFeatures {
/// parameters that allow everything for encoding that is compatible with c++ lepton compiled with SIMD
#[allow(dead_code)]
pub fn compat_lepton_vector_write() -> Self {
Self {
progressive: true,
reject_dqts_with_zeros: true,
max_jpeg_width: 16386,
max_jpeg_height: 16386,
use_16bit_dc_estimate: false,
max_jpeg_width: 16386,
use_16bit_dc_estimate: true,
use_16bit_adv_predict: true,
}
}
}

impl EnabledFeatures {
/// parameters that allow everything
/// parameters that allow everything for decoding c++ lepton images encoded
/// with the scalar compile options
#[allow(dead_code)]
pub fn all() -> Self {
pub fn compat_lepton_scalar_read() -> Self {
Self {
progressive: true,
reject_dqts_with_zeros: true,
reject_dqts_with_zeros: false,
max_jpeg_height: i32::MAX,
max_jpeg_width: i32::MAX,
use_16bit_dc_estimate: false,
use_16bit_adv_predict: false,
}
}

/// parameters that allow everything for decoding c++ lepton images encoded
/// with the vector (SSE2/AVX2) compile options
#[allow(dead_code)]
pub fn compat_lepton_vector_read() -> Self {
Self {
progressive: true,
reject_dqts_with_zeros: false,
max_jpeg_height: i32::MAX,
max_jpeg_width: i32::MAX,
use_16bit_dc_estimate: true,
use_16bit_adv_predict: true,
}
}
}
15 changes: 10 additions & 5 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,7 @@ pub unsafe extern "C" fn WrapperCompressImage(
&mut reader,
&mut writer,
number_of_threads as usize,
&EnabledFeatures::default(),
&EnabledFeatures::compat_lepton_vector_write(),
) {
Ok(_) => {}
Err(e) => match e.root_cause().downcast_ref::<LeptonError>() {
Expand Down Expand Up @@ -159,14 +159,19 @@ pub unsafe extern "C" fn WrapperDecompressImageEx(
) -> i32 {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In the comment above please update the description to "that were compressed by C++ SIMD version of Lepton"

match catch_unwind(|| {
// For back-compat with C++ version we allow decompression of images with zeros in DQT tables
let mut enabled_features = EnabledFeatures::all();
enabled_features.reject_dqts_with_zeros = false;

// C++ version has a bug where it uses 16 bit math in the SIMD path and 32 bit math in the scalar path
// depending on the compiler options. If use_16bit_dc_estimate=true, the decompression uses a back-compat
// mode that considers it. The caller should set use_16bit_dc_estimate to true only for images that were
// compressed by C++ version with relevant compiler options.
enabled_features.use_16bit_dc_estimate = use_16bit_dc_estimate;

// this is a bit of a mess since for a while we were encoded a mix of 16 and 32 bit math
// (hence the two parameters in features).

let mut enabled_features = EnabledFeatures {
use_16bit_dc_estimate: use_16bit_dc_estimate,
..EnabledFeatures::compat_lepton_vector_read()
};

loop {
let input = std::slice::from_raw_parts(input_buffer, input_buffer_size as usize);
Expand All @@ -179,7 +184,7 @@ pub unsafe extern "C" fn WrapperDecompressImageEx(
&mut reader,
&mut writer,
number_of_threads as usize,
&enabled_features,
&mut enabled_features,
) {
Ok(_) => {
*result_size = writer.position().into();
Expand Down
14 changes: 11 additions & 3 deletions src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ fn main_with_result() -> anyhow::Result<()> {
let mut dump = false;
let mut all = false;
let mut overwrite = false;
let mut enabled_features = EnabledFeatures::default();
let mut enabled_features = EnabledFeatures::compat_lepton_vector_read();

// only output the log if we are connected to a console (otherwise if there is redirection we would corrupt the file)
if stdout().is_terminal() {
Expand Down Expand Up @@ -81,6 +81,14 @@ fn main_with_result() -> anyhow::Result<()> {
enabled_features.reject_dqts_with_zeros = false;
} else if args[i] == "-use16bitdc" {
enabled_features.use_16bit_dc_estimate = true;
} else if args[i] == "-useleptonscalar" {
// lepton files that were encoded by the dropbox c++ version compiled in scalar mode
enabled_features.use_16bit_adv_predict = false;
enabled_features.use_16bit_dc_estimate = false;
} else if args[i] == "-useleptonvector" {
// lepton files that were encoded by the dropbox c++ version compiled in AVX2/SSE2 mode
enabled_features.use_16bit_adv_predict = true;
enabled_features.use_16bit_dc_estimate = true;
} else {
return err_exit_code(
ExitCode::SyntaxError,
Expand Down Expand Up @@ -111,7 +119,7 @@ fn main_with_result() -> anyhow::Result<()> {
.context(here!())?;
} else {
lh = LeptonHeader::new();
lh.read_lepton_header(&mut reader, &enabled_features)
lh.read_lepton_header(&mut reader, &mut enabled_features)
.context(here!())?;

let _metrics;
Expand All @@ -131,7 +139,7 @@ fn main_with_result() -> anyhow::Result<()> {
println!("{0}", s.replace("},", "},\r\n").replace("],", "],\r\n"));

if !lh
.advance_next_header_segment(&EnabledFeatures::default())
.advance_next_header_segment(&enabled_features)
.context(here!())?
{
break;
Expand Down
3 changes: 2 additions & 1 deletion src/structs/lepton_decoder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -360,7 +360,8 @@ fn parse_token<R: Read, const ALL_PRESENT: bool>(
eob_y,
)?;

let predicted_dc = pt.adv_predict_dc_pix::<ALL_PRESENT>(&output, qt, context, num_non_zeros);
let predicted_dc =
pt.adv_predict_dc_pix::<ALL_PRESENT>(&output, qt, context, num_non_zeros, features);

let coef = model
.read_dc(
Expand Down
3 changes: 2 additions & 1 deletion src/structs/lepton_encoder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -378,7 +378,8 @@ fn serialize_tokens<W: Write, const ALL_PRESENT: bool>(
)
.context(here!())?;

let predicted_val = pt.adv_predict_dc_pix::<ALL_PRESENT>(&block, qt, context, &num_non_zeros);
let predicted_val =
pt.adv_predict_dc_pix::<ALL_PRESENT>(&block, qt, context, &num_non_zeros, features);

let avg_predicted_dc = ProbabilityTables::adv_predict_or_unpredict_dc(
block.get_dc(),
Expand Down
59 changes: 43 additions & 16 deletions src/structs/lepton_format.rs
Original file line number Diff line number Diff line change
Expand Up @@ -56,11 +56,13 @@ pub fn decode_lepton_wrapper<R: Read + Seek, W: Write>(

let mut lh = LeptonHeader::new();

lh.read_lepton_header(reader, enabled_features)
let mut features_mut = enabled_features.clone();

lh.read_lepton_header(reader, &mut features_mut)
.context(here!())?;

let metrics = lh
.recode_jpeg(writer, reader, size, num_threads, enabled_features)
.recode_jpeg(writer, reader, size, num_threads, &features_mut)
.context(here!())?;

return Ok(metrics);
Expand All @@ -75,7 +77,8 @@ pub fn encode_lepton_wrapper<R: Read + Seek, W: Write + Seek>(
) -> Result<Metrics> {
let (lp, image_data) = read_jpeg(reader, enabled_features, max_threads, |_jh| {})?;

lp.write_lepton_header(writer).context(here!())?;
lp.write_lepton_header(writer, enabled_features)
.context(here!())?;

let metrics = run_lepton_encoder_threads(
&lp.jpeg_header,
Expand Down Expand Up @@ -125,14 +128,11 @@ pub fn encode_lepton_wrapper_verify(

info!("decompressing to verify contents");

let mut c = enabled_features.clone();

metrics.merge_from(
decode_lepton_wrapper(
&mut verifyreader,
&mut verify_buffer,
max_threads,
&enabled_features,
)
.context(here!())?,
decode_lepton_wrapper(&mut verifyreader, &mut verify_buffer, max_threads, &mut c)
.context(here!())?,
);

if input_data.len() != verify_buffer.len() {
Expand Down Expand Up @@ -997,7 +997,7 @@ impl LeptonHeader {
pub fn read_lepton_header<R: Read>(
&mut self,
reader: &mut R,
enabled_features: &EnabledFeatures,
enabled_features: &mut EnabledFeatures,
) -> Result<()> {
let mut header = [0 as u8; LEPTON_FILE_HEADER.len()];

Expand Down Expand Up @@ -1039,6 +1039,13 @@ impl LeptonHeader {
if header[5] == 'M' as u8 && header[6] == 'S' as u8 {
c.set_position(7);
self.uncompressed_lepton_header_size = c.read_u32::<LittleEndian>()?;

// read the flag bits to know how we should decode this file
let flags = c.read_u8()?;
if (flags & 0x80) != 0 {
enabled_features.use_16bit_dc_estimate = (flags & 0x01) != 0;
enabled_features.use_16bit_adv_predict = (flags & 0x02) != 0;
}
}

// full size of the original file
Expand Down Expand Up @@ -1230,7 +1237,11 @@ impl LeptonHeader {
return Ok(hdr_data);
}

pub fn write_lepton_header<W: Write>(&self, writer: &mut W) -> Result<()> {
pub fn write_lepton_header<W: Write>(
&self,
writer: &mut W,
enabled_features: &EnabledFeatures,
) -> Result<()> {
let mut lepton_header = Vec::<u8>::new();

{
Expand Down Expand Up @@ -1273,7 +1284,21 @@ impl LeptonHeader {
writer.write_u8('M' as u8)?;
writer.write_u8('S' as u8)?;
writer.write_u32::<LittleEndian>(lepton_header.len() as u32)?;
writer.write_all(&[0; 6])?;

// write the flags that were used to encode this file
writer.write_u8(
0x80 | if enabled_features.use_16bit_dc_estimate {
1
} else {
0
} | if enabled_features.use_16bit_adv_predict {
2
} else {
0
},
)?;

writer.write_all(&[0; 5])?;

writer.write_u32::<LittleEndian>(self.jpeg_file_size)?;
writer.write_u32::<LittleEndian>(compressed_header.len() as u32)?;
Expand Down Expand Up @@ -1706,10 +1731,12 @@ fn parse_and_write_header() {
0x00, 0x08, 0x01, 0x01, 0x00, 0x00, 0x3f, 0x00, 0xd2, 0xcf, 0x20, 0xff, 0xd9, // EOI
];

let mut enabled_features = EnabledFeatures::compat_lepton_vector_read();

let mut lh = LeptonHeader::new();
lh.jpeg_file_size = 123;

lh.parse_jpeg_header(&mut Cursor::new(min_jpeg), &EnabledFeatures::all())
lh.parse_jpeg_header(&mut Cursor::new(min_jpeg), &enabled_features)
.unwrap();
lh.thread_handoff.push(ThreadHandoff {
luma_y_start: 0,
Expand All @@ -1722,12 +1749,12 @@ fn parse_and_write_header() {
});

let mut serialized = Vec::new();
lh.write_lepton_header(&mut Cursor::new(&mut serialized))
lh.write_lepton_header(&mut Cursor::new(&mut serialized), &enabled_features)
.unwrap();

let mut other = LeptonHeader::new();
let mut other_reader = Cursor::new(&serialized);
other
.read_lepton_header(&mut other_reader, &EnabledFeatures::all())
.read_lepton_header(&mut other_reader, &mut enabled_features)
.unwrap();
}
52 changes: 40 additions & 12 deletions src/structs/probability_tables.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
use std::cmp;

use crate::consts::*;
use crate::enabled_features;
use crate::helpers::*;
use crate::structs::idct::*;
use crate::structs::model::*;
Expand Down Expand Up @@ -258,6 +259,7 @@ impl ProbabilityTables {
qt: &QuantizationTables,
block_context: &BlockContext,
num_non_zeros: &[NeighborSummary],
enabled_features: &enabled_features::EnabledFeatures,
) -> PredictDCResult {
let q_transposed = qt.get_quantization_table_transposed();

Expand All @@ -268,25 +270,51 @@ impl ProbabilityTables {
let calc_left = || {
let left_context = block_context.neighbor_context_left(num_non_zeros);

let a1 = ProbabilityTables::from_stride(&pixels_sans_dc.get_block(), 0, 8);
let a2 = ProbabilityTables::from_stride(&pixels_sans_dc.get_block(), 1, 8);
let pixel_delta = a1 - a2;
let a: i16x8 = a1 + 1024;
let b : i16x8 = i16x8::new(*left_context.get_vertical()) - (pixel_delta - (pixel_delta>>15) >> 1) /* divide pixel_delta by 2 rounding towards 0 */;
if enabled_features.use_16bit_adv_predict {
let a1 = ProbabilityTables::from_stride(&pixels_sans_dc.get_block(), 0, 8);
let a2 = ProbabilityTables::from_stride(&pixels_sans_dc.get_block(), 1, 8);
let pixel_delta = a1 - a2;
let a: i16x8 = a1 + 1024;
let b : i16x8 = i16x8::new(*left_context.get_vertical()) - (pixel_delta - (pixel_delta>>15) >> 1) /* divide pixel_delta by 2 rounding towards 0 */;

b - a
b - a
} else {
let mut dc_estimates = [0i16; 8];
for i in 0..8 {
let a = i32::from(pixels_sans_dc.get_block()[i << 3]) + 1024;
let pixel_delta = i32::from(pixels_sans_dc.get_block()[i << 3])
- i32::from(pixels_sans_dc.get_block()[(i << 3) + 1]);
let b = i32::from(left_context.get_vertical()[i]) - (pixel_delta / 2); //round to zero
dc_estimates[i] = (b - a) as i16;
}

i16x8::from(dc_estimates)
}
};

let calc_above = || {
let above_context = block_context.neighbor_context_above(num_non_zeros);

let a1 = ProbabilityTables::from_stride(&pixels_sans_dc.get_block(), 0, 1);
let a2 = ProbabilityTables::from_stride(&pixels_sans_dc.get_block(), 8, 1);
let pixel_delta = a1 - a2;
let a: i16x8 = a1 + 1024;
let b : i16x8 = i16x8::new(*above_context.get_horizontal()) - (pixel_delta - (pixel_delta>>15) >> 1) /* divide pixel_delta by 2 rounding towards 0 */;
if enabled_features.use_16bit_adv_predict {
let a1 = ProbabilityTables::from_stride(&pixels_sans_dc.get_block(), 0, 1);
let a2 = ProbabilityTables::from_stride(&pixels_sans_dc.get_block(), 8, 1);
let pixel_delta = a1 - a2;
let a: i16x8 = a1 + 1024;
let b : i16x8 = i16x8::new(*above_context.get_horizontal()) - (pixel_delta - (pixel_delta>>15) >> 1) /* divide pixel_delta by 2 rounding towards 0 */;

b - a
b - a
} else {
let mut dc_estimates = [0i16; 8];
for i in 0..8 {
let a = i32::from(pixels_sans_dc.get_block()[i]) + 1024;
let pixel_delta = i32::from(pixels_sans_dc.get_block()[i])
- i32::from(pixels_sans_dc.get_block()[i + 8]);
let b = i32::from(above_context.get_horizontal()[i]) - (pixel_delta / 2); //round to zero
dc_estimates[i] = (b - a) as i16;
}

i16x8::from(dc_estimates)
}
};

let min_dc;
Expand Down
Loading
Loading