diff --git a/src/consts.rs b/src/consts.rs index 2ef2747..de9a96c 100644 --- a/src/consts.rs +++ b/src/consts.rs @@ -4,15 +4,17 @@ * This software incorporates material from third parties. See NOTICE.txt for details. *--------------------------------------------------------------------------------------------*/ +use crate::jpeg::jpeg_code; + #[derive(PartialEq, Debug)] -pub enum JPegDecodeStatus { +pub enum JpegDecodeStatus { DecodeInProgress, RestartIntervalExpired, ScanCompleted, } #[derive(PartialEq, Debug, Copy, Clone)] -pub enum JPegType { +pub enum JpegType { Unknown, Sequential, Progressive, @@ -86,8 +88,8 @@ pub const SMALL_FILE_BYTES_PER_ENCDOING_THREAD: usize = 125000; pub const MAX_THREADS_SUPPORTED_BY_LEPTON_FORMAT: usize = 16; // Number of threads minus 1 should fit in 4 bits //pub const SingleFFByte : [u8;1] = [ 0xFF ]; -pub const EOI: [u8; 2] = [0xFF, crate::jpeg_code::EOI]; // EOI segment -pub const SOI: [u8; 2] = [0xFF, crate::jpeg_code::SOI]; // SOI segment +pub const EOI: [u8; 2] = [0xFF, jpeg_code::EOI]; // EOI segment +pub const SOI: [u8; 2] = [0xFF, jpeg_code::SOI]; // SOI segment pub const LEPTON_FILE_HEADER: [u8; 2] = [0xcf, 0x84]; // the tau symbol for a tau lepton in utf-8 pub const LEPTON_HEADER_BASELINE_JPEG_TYPE: [u8; 1] = [b'Z']; pub const LEPTON_HEADER_PROGRESSIVE_JPEG_TYPE: [u8; 1] = [b'X']; diff --git a/src/jpeg/bit_reader.rs b/src/jpeg/bit_reader.rs index 08dbb63..b0447fd 100644 --- a/src/jpeg/bit_reader.rs +++ b/src/jpeg/bit_reader.rs @@ -6,9 +6,10 @@ use std::io::{BufRead, Seek}; +use super::jpeg_code; use crate::helpers::has_ff; use crate::lepton_error::{err_exit_code, ExitCode}; -use crate::{jpeg_code, LeptonError}; +use crate::LeptonError; // Implemenation of bit reader on top of JPEG data stream as read by a reader pub struct BitReader { @@ -17,18 +18,20 @@ pub struct BitReader { bits_left: u32, cpos: u32, eof: bool, - start_offset: u64, truncated_ff: bool, read_ahead_bytes: u32, } impl BitReader { - pub fn get_stream_position(&mut self) -> u32 { + /// Returns the current position in the stream, which corresponds the byte that has + /// unread bits in it. + /// + /// If the last byte was a 0xff, then the position is the byte before + /// the 0xff. + pub fn stream_position(&mut self) -> u64 { self.undo_read_ahead(); - let pos: u32 = (self.inner.stream_position().unwrap() - self.start_offset) - .try_into() - .unwrap(); + let pos = self.inner.stream_position().unwrap(); if self.bits_left > 0 && !self.eof { if self.bits as u8 == 0xff && !self.truncated_ff { @@ -41,16 +44,13 @@ impl BitReader { } } - pub fn new(mut inner: R) -> Self { - let start_offset = inner.stream_position().unwrap(); - + pub fn new(inner: R) -> Self { BitReader { inner: inner, bits: 0, bits_left: 0, cpos: 0, eof: false, - start_offset, truncated_ff: false, read_ahead_bytes: 0, } @@ -305,11 +305,11 @@ fn read_simple() { assert_eq!(1, b.read(4).unwrap()); assert_eq!((4, 0x10), b.overhang()); - assert_eq!(0, b.get_stream_position()); + assert_eq!(0, b.stream_position()); assert_eq!(2, b.read(4).unwrap()); assert_eq!((0, 0), b.overhang()); // byte is aligned should be no overhang - assert_eq!(1, b.get_stream_position()); + assert_eq!(1, b.stream_position()); assert_eq!(3, b.read(4).unwrap()); assert_eq!(4, b.read(4).unwrap()); @@ -319,20 +319,20 @@ fn read_simple() { assert_eq!(0x9f, b.read(8).unwrap()); assert_eq!((4, 0xf0), b.overhang()); - assert_eq!(5, b.get_stream_position()); // should be at the beginning of the escape code + assert_eq!(5, b.stream_position()); // should be at the beginning of the escape code assert_eq!(0xfe, b.read(8).unwrap()); assert_eq!((4, 0xe0), b.overhang()); - assert_eq!(7, b.get_stream_position()); // now we are after the escape code + assert_eq!(7, b.stream_position()); // now we are after the escape code assert_eq!(0xe, b.read(4).unwrap()); assert_eq!((0, 0), b.overhang()); - assert_eq!(8, b.get_stream_position()); // now we read everything and should be at the end of the stream + assert_eq!(8, b.stream_position()); // now we read everything and should be at the end of the stream // read an empty byte passed the end of the stream.. should be zero and trigger EOF assert_eq!(0, b.read(8).unwrap()); assert_eq!(true, b.is_eof()); - assert_eq!(8, b.get_stream_position()); // still at the same position + assert_eq!(8, b.stream_position()); // still at the same position } // what happens when a file has 0xff as the last character (assume that it is an escaped 0xff) @@ -342,23 +342,23 @@ fn read_truncate_ff() { let mut b = BitReader::new(Cursor::new(&arr)); - assert_eq!(0, b.get_stream_position()); + assert_eq!(0, b.stream_position()); assert_eq!(0x1, b.read(4).unwrap()); - assert_eq!(0, b.get_stream_position()); + assert_eq!(0, b.stream_position()); assert_eq!(0x2f, b.read(8).unwrap()); assert_eq!((4, 0xf0), b.overhang()); - assert_eq!(1, b.get_stream_position()); + assert_eq!(1, b.stream_position()); // 4 bits left, not EOF yet assert_eq!(false, b.is_eof()); assert_eq!(0xf, b.read(4).unwrap()); assert_eq!(false, b.is_eof()); // now we are at the end really - assert_eq!(2, b.get_stream_position()); + assert_eq!(2, b.stream_position()); assert_eq!(0, b.read(4).unwrap()); assert_eq!(true, b.is_eof()); - assert_eq!(2, b.get_stream_position()); + assert_eq!(2, b.stream_position()); } diff --git a/src/jpeg/block_based_image.rs b/src/jpeg/block_based_image.rs index eb3393e..c9cc156 100644 --- a/src/jpeg/block_based_image.rs +++ b/src/jpeg/block_based_image.rs @@ -10,7 +10,7 @@ use wide::{i16x8, CmpEq}; use crate::consts::ZIGZAG_TO_TRANSPOSED; -use super::jpeg_header::JPegHeader; +use super::jpeg_header::JpegHeader; /// holds the 8x8 blocks for a given component. Since we do multithreaded encoding, /// the image may only hold a subset of the components (specified by dpos_offset), @@ -30,7 +30,7 @@ static EMPTY: AlignedBlock = AlignedBlock { raw_data: [0; 64] }; impl BlockBasedImage { // constructs new block image for the given y-coordinate range pub fn new( - jpeg_header: &JPegHeader, + jpeg_header: &JpegHeader, component: usize, luma_y_start: u32, luma_y_end: u32, diff --git a/src/jpeg_code.rs b/src/jpeg/jpeg_code.rs similarity index 100% rename from src/jpeg_code.rs rename to src/jpeg/jpeg_code.rs diff --git a/src/jpeg/jpeg_header.rs b/src/jpeg/jpeg_header.rs index c65f2ee..72a8759 100644 --- a/src/jpeg/jpeg_header.rs +++ b/src/jpeg/jpeg_header.rs @@ -32,16 +32,17 @@ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ -use std::io::Read; +use std::io::{Cursor, Read, Write}; use std::num::NonZeroU32; -use crate::consts::JPegType; +use crate::consts::JpegType; use crate::enabled_features::EnabledFeatures; use crate::helpers::*; use crate::lepton_error::{err_exit_code, AddContext, ExitCode, Result}; -use crate::{jpeg_code, LeptonError}; +use crate::LeptonError; use super::component_info::ComponentInfo; +use super::jpeg_code; use super::truncate_components::TruncateComponents; /// Information required to partition the coding the JPEG huffman encoded stream of a scan @@ -65,7 +66,7 @@ impl RestartSegmentCodingInfo { num_overhang_bits: u8, last_dc: [i16; 4], mcu: u32, - jf: &JPegHeader, + jf: &JpegHeader, ) -> Self { let mcu_y = mcu / jf.mcuh; let luma_mul = jf.cmp_info[0].bcv / jf.mcuv; @@ -124,6 +125,71 @@ pub struct ReconstructionInfo { /// information about how to truncate the image if it was partially written pub truncate_components: TruncateComponents, + + /// trailing RST marking information + pub rst_err: Vec, + + /// raw jpeg header to be written back to the file when it is recreated + pub raw_jpeg_header: Vec, + + /// garbage data (default value - empty segment - means no garbage data) + pub garbage_data: Vec, +} + +pub fn parse_jpeg_header( + reader: &mut R, + enabled_features: &EnabledFeatures, + jpeg_header: &mut JpegHeader, + rinfo: &mut ReconstructionInfo, +) -> Result { + // the raw header in the lepton file can actually be spread across different sections + // seperated by the Start-of-Scan marker. We use the mirror to write out whatever + // data we parse until we hit the SOS + + let mut output = Vec::new(); + let mut output_cursor = Cursor::new(&mut output); + + let mut mirror = Mirror::new(reader, &mut output_cursor); + + if jpeg_header.parse(&mut mirror, enabled_features).context()? { + // append the header if it was not the end of file marker + rinfo.raw_jpeg_header.append(&mut output); + return Ok(true); + } else { + // if the output was more than 2 bytes then was a trailing header, so keep that around as well, + // but we don't want the EOI since that goes into the garbage data. + if output.len() > 2 { + rinfo.raw_jpeg_header.extend(&output[0..output.len() - 2]); + } + + return Ok(false); + } +} + +// internal utility we use to collect the header that we read for later +struct Mirror<'a, R, W> { + read: &'a mut R, + output: &'a mut W, + amount_written: usize, +} + +impl<'a, R, W> Mirror<'a, R, W> { + pub fn new(read: &'a mut R, output: &'a mut W) -> Self { + Mirror { + read, + output, + amount_written: 0, + } + } +} + +impl Read for Mirror<'_, R, W> { + fn read(&mut self, buf: &mut [u8]) -> std::io::Result { + let n = self.read.read(buf)?; + self.output.write_all(&buf[..n])?; + self.amount_written += n; + Ok(n) + } } #[derive(Copy, Clone, Debug)] @@ -349,7 +415,7 @@ impl HuffTree { /// JPEG information parsed out of segments found before the image segment #[derive(Debug, Clone)] -pub struct JPegHeader { +pub struct JpegHeader { /// quantization tables 4 x 64 pub q_tables: [[u16; 64]; 4], @@ -374,7 +440,7 @@ pub struct JPegHeader { /// height of image pub img_height: u32, - pub jpeg_type: JPegType, + pub jpeg_type: JpegType, /// max horizontal sample factor pub sfhm: u32, @@ -419,9 +485,9 @@ enum ParseSegmentResult { SOS, } -impl Default for JPegHeader { +impl Default for JpegHeader { fn default() -> Self { - return JPegHeader { + return JpegHeader { q_tables: [[0; 64]; 4], h_codes: [[HuffCodes::default(); 4]; 2], h_trees: [[HuffTree::default(); 4]; 2], @@ -435,7 +501,7 @@ impl Default for JPegHeader { cmpc: 0, img_width: 0, img_height: 0, - jpeg_type: JPegType::Unknown, + jpeg_type: JpegType::Unknown, sfhm: 0, sfvm: 0, mcuv: NonZeroU32::MIN, @@ -452,7 +518,7 @@ impl Default for JPegHeader { } } -impl JPegHeader { +impl JpegHeader { #[inline(always)] pub(super) fn get_huff_dc_codes(&self, cmp: usize) -> &HuffCodes { &self.h_codes[0][usize::from(self.cmp_info[cmp].huff_dc)] @@ -510,7 +576,7 @@ impl JPegHeader { if (self.cmp_info[cmp].sfv == 0) || (self.cmp_info[cmp].sfh == 0) || (self.q_tables[usize::from(self.cmp_info[cmp].q_table_index)][0] == 0) - || (self.jpeg_type == JPegType::Unknown) + || (self.jpeg_type == JpegType::Unknown) { return err_exit_code( ExitCode::UnsupportedJpeg, @@ -819,7 +885,7 @@ impl JPegHeader { jpeg_code::SOF1| // SOF1 segment, coding process: extended sequential DCT jpeg_code::SOF2 => // SOF2 segment, coding process: progressive DCT { - if self.jpeg_type != JPegType::Unknown + if self.jpeg_type != JpegType::Unknown { return err_exit_code(ExitCode::UnsupportedJpeg, "image cannot have multiple SOF blocks"); } @@ -827,11 +893,11 @@ impl JPegHeader { // set JPEG coding type if btype == jpeg_code::SOF2 { - self.jpeg_type = JPegType::Progressive; + self.jpeg_type = JpegType::Progressive; } else { - self.jpeg_type = JPegType::Sequential; + self.jpeg_type = JpegType::Sequential; } ensure_space(segment,hpos, 6).context()?; diff --git a/src/jpeg/jpeg_position_state.rs b/src/jpeg/jpeg_position_state.rs index 0b9babd..542ee7d 100644 --- a/src/jpeg/jpeg_position_state.rs +++ b/src/jpeg/jpeg_position_state.rs @@ -4,11 +4,11 @@ * This software incorporates material from third parties. See NOTICE.txt for details. *--------------------------------------------------------------------------------------------*/ -use crate::consts::{JPegDecodeStatus, JPegType}; +use crate::consts::{JpegDecodeStatus, JpegType}; use crate::lepton_error::{err_exit_code, AddContext, ExitCode}; use crate::{LeptonError, Result}; -use super::jpeg_header::{HuffCodes, JPegHeader}; +use super::jpeg_header::{HuffCodes, JpegHeader}; /// used to keep track of position while encoding or decoding a jpeg pub struct JpegPositionState { @@ -40,7 +40,7 @@ pub struct JpegPositionState { } impl JpegPositionState { - pub fn new(jf: &JPegHeader, mcu: u32) -> Self { + pub fn new(jf: &JpegHeader, mcu: u32) -> Self { let cmp = jf.cs_cmp[0]; let mcumul = jf.cmp_info[cmp].sfv * jf.cmp_info[cmp].sfh; @@ -71,7 +71,7 @@ impl JpegPositionState { self.cmp } - pub fn get_cumulative_reset_markers(&self, jf: &JPegHeader) -> u32 { + pub fn get_cumulative_reset_markers(&self, jf: &JpegHeader) -> u32 { if self.rstw != 0 { self.get_mcu() / jf.rsti } else { @@ -79,7 +79,7 @@ impl JpegPositionState { } } - pub fn reset_rstw(&mut self, jf: &JPegHeader) { + pub fn reset_rstw(&mut self, jf: &JpegHeader) { self.rstw = jf.rsti; // eobruns don't span reset intervals @@ -87,7 +87,7 @@ impl JpegPositionState { } /// calculates next position (non interleaved) - fn next_mcu_pos_noninterleaved(&mut self, jf: &JPegHeader) -> JPegDecodeStatus { + fn next_mcu_pos_noninterleaved(&mut self, jf: &JpegHeader) -> JpegDecodeStatus { // increment position self.dpos += 1; @@ -104,31 +104,31 @@ impl JpegPositionState { } // now we've updated dpos, update the current MCU to be a fraction of that - if jf.jpeg_type == JPegType::Sequential { + if jf.jpeg_type == JpegType::Sequential { self.mcu = self.dpos / (cmp_info.sfv * cmp_info.sfh); } // check position if self.dpos >= cmp_info.bc { - return JPegDecodeStatus::ScanCompleted; + return JpegDecodeStatus::ScanCompleted; } else if jf.rsti > 0 { self.rstw -= 1; if self.rstw == 0 { - return JPegDecodeStatus::RestartIntervalExpired; + return JpegDecodeStatus::RestartIntervalExpired; } } - return JPegDecodeStatus::DecodeInProgress; + return JpegDecodeStatus::DecodeInProgress; } /// calculates next position for MCU - pub fn next_mcu_pos(&mut self, jf: &JPegHeader) -> JPegDecodeStatus { + pub fn next_mcu_pos(&mut self, jf: &JpegHeader) -> JpegDecodeStatus { // if there is just one component, go the simple route if jf.cs_cmpc == 1 { return self.next_mcu_pos_noninterleaved(jf); } - let mut sta = JPegDecodeStatus::DecodeInProgress; // status + let mut sta = JpegDecodeStatus::DecodeInProgress; // status let local_mcuh = jf.mcuh.get(); let mut local_mcu = self.mcu; let mut local_cmp = self.cmp; @@ -152,11 +152,11 @@ impl JpegPositionState { local_mcu = self.mcu; if local_mcu >= jf.mcuc { - sta = JPegDecodeStatus::ScanCompleted; + sta = JpegDecodeStatus::ScanCompleted; } else if jf.rsti > 0 { self.rstw -= 1; if self.rstw == 0 { - sta = JPegDecodeStatus::RestartIntervalExpired; + sta = JpegDecodeStatus::RestartIntervalExpired; } } } else { @@ -193,11 +193,11 @@ impl JpegPositionState { } /// skips the eobrun, calculates next position - pub fn skip_eobrun(&mut self, jf: &JPegHeader) -> Result { + pub fn skip_eobrun(&mut self, jf: &JpegHeader) -> Result { assert!(jf.cs_cmpc == 1, "this code only works for non-interleved"); if (self.eobrun) == 0 { - return Ok(JPegDecodeStatus::DecodeInProgress); + return Ok(JpegDecodeStatus::DecodeInProgress); } // compare rst wait counter if needed @@ -244,7 +244,7 @@ impl JpegPositionState { // check position to see if we are done decoding if self.dpos == cmp_info.bc { - Ok(JPegDecodeStatus::ScanCompleted) + Ok(JpegDecodeStatus::ScanCompleted) } else if self.dpos > cmp_info.bc { err_exit_code( ExitCode::UnsupportedJpeg, @@ -252,9 +252,9 @@ impl JpegPositionState { ) .context() } else if jf.rsti > 0 && self.rstw == 0 { - Ok(JPegDecodeStatus::RestartIntervalExpired) + Ok(JpegDecodeStatus::RestartIntervalExpired) } else { - Ok(JPegDecodeStatus::DecodeInProgress) + Ok(JpegDecodeStatus::DecodeInProgress) } } diff --git a/src/jpeg/jpeg_read.rs b/src/jpeg/jpeg_read.rs index 73e84cc..05c89ce 100644 --- a/src/jpeg/jpeg_read.rs +++ b/src/jpeg/jpeg_read.rs @@ -33,26 +33,219 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ use std::cmp::{self, max}; -use std::io::{BufRead, Seek}; +use std::io::{BufRead, Read, Seek, SeekFrom}; -use crate::consts::*; use crate::helpers::*; use crate::lepton_error::{err_exit_code, AddContext, ExitCode, Result}; +use crate::{consts::*, EnabledFeatures}; use super::bit_reader::BitReader; use super::block_based_image::{AlignedBlock, BlockBasedImage}; -use super::jpeg_header::{HuffTree, JPegHeader, ReconstructionInfo, RestartSegmentCodingInfo}; +use super::jpeg_code; +use super::jpeg_header::{ + parse_jpeg_header, HuffTree, JpegHeader, ReconstructionInfo, RestartSegmentCodingInfo, +}; use super::jpeg_position_state::JpegPositionState; +/// Reads a JPEG file from the provided reader and returns the image data. This function is +/// designed to return all the information needed to reconstruct a bit-level identical +/// JPEG file. +/// +/// In some cases this will not be possible, for example if a JPEG contains certain coding errors +/// that are non-standard, in which case the function will return an error. This doesn't mean the JPEG +/// is corrupt, just that it is not supported for identical reconstruction. +/// +/// The function returns the image data as a vector of `BlockBasedImage`, which contain the +/// DCT coefficients for each block in the image (we do not perform inverse DCT, this would be lossy). +/// In addition, we return a vector of `RestartSegmentCodingInfo` which contains the information +/// needed to reconstruct a portion of the JPEG file starting at the given offset. This is useful +/// for baseline images where we can split the image into sections and decode them in parallel. +/// +/// The callback function is called with the JPEG header information after it has been parsed, and +/// is useful for debugging or logging purposes. Progressive images will contain multiple scans and +/// call the callback multiple times. +/// +/// Non-progressive images support the idea of truncating the image (since this happens frequently) +/// where the bitstream is cut off at an arbitrary point. We assume that all subsequent data is zero, +/// but remember enough to reconstruct the bitstream until there. +/// +/// There is also the concept of "garbage data" which is what comes after the scan data but is not +/// recognized as a header. This garbage data should be appeneded to the end of the file. +pub fn read_jpeg_file( + reader: &mut R, + jpeg_header: &mut JpegHeader, + rinfo: &mut ReconstructionInfo, + enabled_features: &EnabledFeatures, + on_header_callback: fn(&JpegHeader), +) -> Result<( + Vec, + Vec<(u64, RestartSegmentCodingInfo)>, + u64, +)> { + let mut startheader = [0u8; 2]; + reader.read(&mut startheader)?; + if startheader[0] != 0xFF || startheader[1] != jpeg_code::SOI { + return err_exit_code( + ExitCode::UnsupportedJpeg, + "jpeg must start with with 0xff 0xd8", + ); + } + + if !prepare_to_decode_next_scan(jpeg_header, rinfo, reader, enabled_features).context()? { + return err_exit_code(ExitCode::UnsupportedJpeg, "Jpeg does not contain scans"); + } + + on_header_callback(jpeg_header); + + if !enabled_features.progressive && jpeg_header.jpeg_type == JpegType::Progressive { + return err_exit_code( + ExitCode::ProgressiveUnsupported, + "file is progressive, but this is disabled", + ) + .context(); + } + + if jpeg_header.cmpc > COLOR_CHANNEL_NUM_BLOCK_TYPES { + return err_exit_code( + ExitCode::Unsupported4Colors, + "doesn't support 4 color channels", + ) + .context(); + } + + rinfo.truncate_components.init(jpeg_header); + let mut image_data = Vec::::new(); + for i in 0..jpeg_header.cmpc { + // constructor takes height in proportion to the component[0] + image_data.push(BlockBasedImage::new( + &jpeg_header, + i, + 0, + jpeg_header.cmp_info[0].bcv, + )); + } + + let start_scan_position = reader.stream_position()?; + + let mut partitions = Vec::new(); + read_first_scan( + &jpeg_header, + reader, + &mut partitions, + &mut image_data[..], + rinfo, + ) + .context()?; + let mut end_scan_position = reader.stream_position()?; + + if start_scan_position + 2 > end_scan_position { + return err_exit_code(ExitCode::UnsupportedJpeg, "no scan data found in JPEG file") + .context(); + } + + if partitions.len() == 0 { + return err_exit_code( + ExitCode::UnsupportedJpeg, + "no scan information found in JPEG file", + ) + .context(); + } + + if jpeg_header.jpeg_type == JpegType::Sequential { + if rinfo.early_eof_encountered { + rinfo + .truncate_components + .set_truncation_bounds(&jpeg_header, rinfo.max_dpos); + + // If we got an early EOF, then seek backwards and capture the last two bytes and store them as garbage. + // This is necessary since the decoder will assume that zero garbage always means a properly terminated JPEG + // even if early EOF was set to true. + end_scan_position = reader.seek(SeekFrom::Current(-2))?.try_into().unwrap(); + + rinfo.garbage_data.resize(2, 0); + reader.read_exact(&mut rinfo.garbage_data)?; + } + + // rest of data is garbage data if it is a sequential jpeg (including EOI marker) + reader.read_to_end(&mut rinfo.garbage_data).context()?; + } else { + assert!(jpeg_header.jpeg_type == JpegType::Progressive); + + if rinfo.early_eof_encountered { + return err_exit_code( + ExitCode::UnsupportedJpeg, + "truncation is only supported for baseline images", + ) + .context(); + } + + // for progressive images, loop around reading headers and decoding until we a complete image_data + while prepare_to_decode_next_scan(jpeg_header, rinfo, reader, enabled_features).context()? { + on_header_callback(&jpeg_header); + + read_progressive_scan(&jpeg_header, reader, &mut image_data[..], rinfo).context()?; + + if rinfo.early_eof_encountered { + return err_exit_code( + ExitCode::UnsupportedJpeg, + "truncation is only supported for baseline images", + ) + .context(); + } + } + + end_scan_position = reader.stream_position()?; + + // since prepare_to_decode_next_scan consumes the EOI, + // we need to add it to the beginning of the garbage data (if there is any) + rinfo.garbage_data = Vec::from(EOI); + + // append the rest of the file to the buffer + if reader.read_to_end(&mut rinfo.garbage_data).context()? == 0 { + // no need to record EOI garbage data if there wasn't anything read + rinfo.garbage_data.clear(); + } + } + + Ok((image_data, partitions, end_scan_position)) +} + +// false means we hit the end of file marker +fn prepare_to_decode_next_scan( + jpeg_header: &mut JpegHeader, + rinfo: &mut ReconstructionInfo, + reader: &mut R, + enabled_features: &EnabledFeatures, +) -> Result { + // parse the header and store it in the raw_jpeg_header + if !parse_jpeg_header(reader, enabled_features, jpeg_header, rinfo).context()? { + return Ok(false); + } + + rinfo.max_bpos = cmp::max(rinfo.max_bpos, u32::from(jpeg_header.cs_to)); + + // FIXME: not sure why only first bit of csSah is examined but 4 bits of it are stored + rinfo.max_sah = cmp::max( + rinfo.max_sah, + cmp::max(jpeg_header.cs_sal, jpeg_header.cs_sah), + ); + + for i in 0..jpeg_header.cs_cmpc { + rinfo.max_cmp = cmp::max(rinfo.max_cmp, jpeg_header.cs_cmp[i] as u32); + } + + return Ok(true); +} + /// Reads the scan from the JPEG file, writes the image data to the image_data array and /// partitions it into restart segments using the partition callback. /// /// This only works for sequential JPEGs or the first scan in a progressive image. /// For subsequent scans, use the `read_progressive_scan`. -pub fn read_first_scan( - jf: &JPegHeader, +fn read_first_scan( + jf: &JpegHeader, reader: &mut R, - partition: &mut FPARTITION, + partitions: &mut Vec<(u64, RestartSegmentCodingInfo)>, image_data: &mut [BlockBasedImage], reconstruct_info: &mut ReconstructionInfo, ) -> Result<()> { @@ -64,20 +257,20 @@ pub fn read_first_scan( - jf: &JPegHeader, +fn read_progressive_scan( + jf: &JpegHeader, reader: &mut R, image_data: &mut [BlockBasedImage], reconstruct_info: &mut ReconstructionInfo, @@ -164,8 +357,8 @@ pub fn read_progressive_scan( let mut state = JpegPositionState::new(jf, 0); // JPEG imagedata decoding routines - let mut sta = JPegDecodeStatus::DecodeInProgress; - while sta != JPegDecodeStatus::ScanCompleted { + let mut sta = JpegDecodeStatus::DecodeInProgress; + while sta != JpegDecodeStatus::ScanCompleted { // decoding for interleaved data state.reset_rstw(&jf); // restart wait counter @@ -181,7 +374,7 @@ pub fn read_progressive_scan( // only need DC jf.verify_huffman_table(true, false).context()?; - while sta == JPegDecodeStatus::DecodeInProgress { + while sta == JpegDecodeStatus::DecodeInProgress { let current_block = image_data[state.get_cmp()].get_block_mut(state.get_dpos()); // ---> progressive DC encoding <--- @@ -226,7 +419,7 @@ pub fn read_progressive_scan( // ---> succesive approximation first stage <--- let mut block = [0; 64]; - while sta == JPegDecodeStatus::DecodeInProgress { + while sta == JpegDecodeStatus::DecodeInProgress { let current_block = image_data[state.get_cmp()].get_block_mut(state.get_dpos()); if state.eobrun == 0 { @@ -259,7 +452,7 @@ pub fn read_progressive_scan( sta = state.skip_eobrun(&jf).context()?; // proceed only if no error encountered - if sta == JPegDecodeStatus::DecodeInProgress { + if sta == JpegDecodeStatus::DecodeInProgress { sta = state.next_mcu_pos(jf); } } @@ -268,7 +461,7 @@ pub fn read_progressive_scan( let mut block = [0; 64]; - while sta == JPegDecodeStatus::DecodeInProgress { + while sta == JpegDecodeStatus::DecodeInProgress { let current_block = image_data[state.get_cmp()].get_block_mut(state.get_dpos()); for bpos in jf.cs_from..jf.cs_to + 1 { @@ -329,10 +522,10 @@ pub fn read_progressive_scan( // verify that we got the right RST code here since the above should do 1 mcu. // If we didn't then we won't re-encode the file binary identical so there's no point in continuing - if sta == JPegDecodeStatus::RestartIntervalExpired { + if sta == JpegDecodeStatus::RestartIntervalExpired { bit_reader.verify_reset_code().context()?; - sta = JPegDecodeStatus::DecodeInProgress; + sta = JpegDecodeStatus::DecodeInProgress; } } @@ -340,27 +533,27 @@ pub fn read_progressive_scan( } /// reads an entire interval until the RST code -fn decode_baseline_rst( +fn decode_baseline_rst( state: &mut JpegPositionState, - partition: &mut FPARTITION, bit_reader: &mut BitReader, image_data: &mut [BlockBasedImage], do_handoff: &mut bool, - jpeg_header: &JPegHeader, + jpeg_header: &JpegHeader, reconstruct_info: &mut ReconstructionInfo, -) -> Result { + partitions: &mut Vec<(u64, RestartSegmentCodingInfo)>, +) -> Result { // should have both AC and DC components jpeg_header.verify_huffman_table(true, true).context()?; - let mut sta = JPegDecodeStatus::DecodeInProgress; + let mut sta = JpegDecodeStatus::DecodeInProgress; let mut lastdc = [0i16; 4]; // (re)set last DCs for diff coding - while sta == JPegDecodeStatus::DecodeInProgress { + while sta == JpegDecodeStatus::DecodeInProgress { if *do_handoff { let (bits_already_read, byte_being_read) = bit_reader.overhang(); - partition( - bit_reader.get_stream_position(), + partitions.push(( + bit_reader.stream_position(), RestartSegmentCodingInfo::new( byte_being_read, bits_already_read, @@ -368,7 +561,7 @@ fn decode_baseline_rst Result> { let max_coded_heights: Vec = rinfo.truncate_components.get_max_coded_heights(); @@ -116,7 +117,7 @@ pub fn jpeg_write_baseline_row_range( /// supports progressive encoding whereas the row range version does not pub fn jpeg_write_entire_scan( image_data: &[BlockBasedImage], - jpeg_header: &JPegHeader, + jpeg_header: &JpegHeader, rinfo: &ReconstructionInfo, current_scan_index: usize, ) -> Result> { @@ -172,7 +173,7 @@ fn recode_one_mcu_row( mcu: u32, lastdc: &mut [i16], framebuffer: &[BlockBasedImage], - jf: &JPegHeader, + jf: &JpegHeader, rinfo: &ReconstructionInfo, current_scan_index: usize, ) -> Result { @@ -186,15 +187,15 @@ fn recode_one_mcu_row( // JPEG imagedata encoding routines while !end_of_row { // (re)set status - let mut sta = JPegDecodeStatus::DecodeInProgress; + let mut sta = JpegDecodeStatus::DecodeInProgress; // ---> sequential interleaved encoding <--- - while sta == JPegDecodeStatus::DecodeInProgress { + while sta == JpegDecodeStatus::DecodeInProgress { let current_block = framebuffer[state.get_cmp()].get_block(state.get_dpos()); let old_mcu = state.get_mcu(); - if jf.jpeg_type == JPegType::Sequential { + if jf.jpeg_type == JpegType::Sequential { // unzigzag let mut block = current_block.zigzag_from_transposed(); @@ -270,7 +271,7 @@ fn recode_one_mcu_row( sta = state.next_mcu_pos(jf); // encode remaining eobrun (iff end of mcu or scan) - if sta != JPegDecodeStatus::DecodeInProgress { + if sta != JpegDecodeStatus::DecodeInProgress { encode_eobrun(huffw, jf.get_huff_ac_codes(state.get_cmp()), &mut state); } } else { @@ -291,7 +292,7 @@ fn recode_one_mcu_row( sta = state.next_mcu_pos(jf); // encode remaining eobrun and correction bits (iff end of mcu or scan) - if sta != JPegDecodeStatus::DecodeInProgress { + if sta != JpegDecodeStatus::DecodeInProgress { encode_eobrun(huffw, jf.get_huff_ac_codes(state.get_cmp()), &mut state); // encode remaining correction bits @@ -302,7 +303,7 @@ fn recode_one_mcu_row( if old_mcu != state.get_mcu() && state.get_mcu() % jf.mcuh == 0 { end_of_row = true; - if sta == JPegDecodeStatus::DecodeInProgress { + if sta == JpegDecodeStatus::DecodeInProgress { // completed only MCU aligned row, not reset interval so don't emit anything special return Ok(false); } @@ -318,10 +319,10 @@ fn recode_one_mcu_row( ); // evaluate status - if sta == JPegDecodeStatus::ScanCompleted { + if sta == JpegDecodeStatus::ScanCompleted { return Ok(true); // leave decoding loop, everything is done here } else { - assert!(sta == JPegDecodeStatus::RestartIntervalExpired); + assert!(sta == JpegDecodeStatus::RestartIntervalExpired); // status 1 means restart if jf.rsti > 0 { diff --git a/src/jpeg/mod.rs b/src/jpeg/mod.rs index 59668ad..9abf4cb 100644 --- a/src/jpeg/mod.rs +++ b/src/jpeg/mod.rs @@ -10,6 +10,7 @@ mod bit_reader; mod bit_writer; mod component_info; +pub mod jpeg_code; mod jpeg_position_state; pub mod block_based_image; diff --git a/src/jpeg/truncate_components.rs b/src/jpeg/truncate_components.rs index 677ff70..2b3872d 100644 --- a/src/jpeg/truncate_components.rs +++ b/src/jpeg/truncate_components.rs @@ -7,7 +7,7 @@ use std::cmp; use super::component_info::ComponentInfo; -use super::jpeg_header::JPegHeader; +use super::jpeg_header::JpegHeader; #[derive(Debug, Clone)] struct TrucateComponentsInfo { @@ -39,7 +39,7 @@ impl Default for TruncateComponents { } impl TruncateComponents { - pub fn init(&mut self, jpeg_header: &JPegHeader) { + pub fn init(&mut self, jpeg_header: &JpegHeader) { self.mcu_count_horizontal = jpeg_header.mcuh.get(); self.mcu_count_vertical = jpeg_header.mcuv.get(); self.components_count = jpeg_header.cmpc; @@ -61,7 +61,7 @@ impl TruncateComponents { return retval; } - pub fn set_truncation_bounds(&mut self, jpeg_header: &JPegHeader, max_d_pos: [u32; 4]) { + pub fn set_truncation_bounds(&mut self, jpeg_header: &JpegHeader, max_d_pos: [u32; 4]) { for i in 0..self.components_count { TruncateComponents::set_block_count_d_pos( &mut self.trunc_info[i], diff --git a/src/lib.rs b/src/lib.rs index bdbceed..309d718 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -7,7 +7,6 @@ mod consts; mod helpers; pub mod jpeg; -mod jpeg_code; pub mod metrics; mod structs; diff --git a/src/structs/lepton_file_reader.rs b/src/structs/lepton_file_reader.rs index 9fc5d21..8682a63 100644 --- a/src/structs/lepton_file_reader.rs +++ b/src/structs/lepton_file_reader.rs @@ -16,9 +16,9 @@ use log::warn; use crate::consts::*; use crate::enabled_features::EnabledFeatures; use crate::jpeg::block_based_image::BlockBasedImage; -use crate::jpeg::jpeg_header::{JPegHeader, ReconstructionInfo, RestartSegmentCodingInfo}; +use crate::jpeg::jpeg_code; +use crate::jpeg::jpeg_header::{JpegHeader, ReconstructionInfo, RestartSegmentCodingInfo}; use crate::jpeg::jpeg_write::{jpeg_write_baseline_row_range, jpeg_write_entire_scan}; -use crate::jpeg_code; use crate::lepton_error::{err_exit_code, AddContext, ExitCode, Result}; use crate::metrics::{CpuTimeMeasure, Metrics}; use crate::structs::lepton_decoder::lepton_decode_row_range; @@ -253,9 +253,10 @@ impl LeptonFileReader { // Blit any trailing header data. // Run this logic even if early_eof_encountered to be compatible with C++ version. results.push( - self.lh.raw_jpeg_header[self.lh.raw_jpeg_header_read_index..].to_vec(), + self.lh.rinfo.raw_jpeg_header[self.lh.raw_jpeg_header_read_index..] + .to_vec(), ); - results.push(mem::take(&mut self.lh.garbage_data)); + results.push(mem::take(&mut self.lh.rinfo.garbage_data)); self.state = DecoderState::ReturnResults(0, mem::take(results)); } @@ -315,7 +316,7 @@ impl LeptonFileReader { let mut header = Vec::new(); header.write_all(&SOI)?; header - .write_all(&lh.raw_jpeg_header[0..lh.raw_jpeg_header_read_index]) + .write_all(&lh.rinfo.raw_jpeg_header[0..lh.raw_jpeg_header_read_index]) .context()?; results.insert(0, header); @@ -325,7 +326,7 @@ impl LeptonFileReader { // // This logic is no longer needed for Rust generated Lepton files, since we just use the garbage // data to store any extra RST codes or whatever else might be at the end of the file. - if lh.rst_err.len() > 0 { + if lh.rinfo.rst_err.len() > 0 { let mut markers = Vec::new(); let cumulative_reset_markers = if lh.jpeg_header.rsti != 0 { @@ -334,15 +335,15 @@ impl LeptonFileReader { 0 } as u8; - for i in 0..lh.rst_err[0] { + for i in 0..lh.rinfo.rst_err[0] { let rst = jpeg_code::RST0 + ((cumulative_reset_markers + i) & 7); markers.push(0xFF); markers.push(rst); } let expected_total_length = results.iter().map(|x| x.len()).sum::() - + lh.garbage_data.len() - + (lh.raw_jpeg_header.len() - lh.raw_jpeg_header_read_index); + + lh.rinfo.garbage_data.len() + + (lh.rinfo.raw_jpeg_header.len() - lh.raw_jpeg_header_read_index); if expected_total_length < lh.jpeg_file_size as usize { // figure out how much extra space we have, since C++ files can have @@ -375,7 +376,7 @@ impl LeptonFileReader { let mut header = Vec::new(); header.write_all(&SOI)?; header - .write_all(&lh.raw_jpeg_header[0..lh.raw_jpeg_header_read_index]) + .write_all(&lh.rinfo.raw_jpeg_header[0..lh.raw_jpeg_header_read_index]) .context()?; let mut results = Vec::new(); @@ -392,7 +393,7 @@ impl LeptonFileReader { let old_pos = lh.raw_jpeg_header_read_index; let result = lh.advance_next_header_segment(enabled_features).context()?; - results.push(lh.raw_jpeg_header[old_pos..lh.raw_jpeg_header_read_index].to_vec()); + results.push(lh.rinfo.raw_jpeg_header[old_pos..lh.raw_jpeg_header_read_index].to_vec()); if !result { break; @@ -413,7 +414,7 @@ impl LeptonFileReader { if v[..] != LEPTON_HEADER_COMPLETION_MARKER { return err_exit_code(ExitCode::BadLeptonFile, "CMP marker not found"); } - Ok(if lh.jpeg_header.jpeg_type == JPegType::Progressive { + Ok(if lh.jpeg_header.jpeg_type == JpegType::Progressive { let mux = Self::run_lepton_decoder_threads( lh, enabled_features, @@ -511,7 +512,7 @@ impl LeptonFileReader { process: fn( thread_handoff: &ThreadHandoff, image_data: Vec, - jpeg_header: &JPegHeader, + jpeg_header: &JpegHeader, rinfo: &ReconstructionInfo, ) -> Result

, ) -> Result> { @@ -547,7 +548,7 @@ impl LeptonFileReader { /// the logic of a decoder thread. Takes a range of rows fn run_lepton_decoder_processor

( - jpeg_header: &JPegHeader, + jpeg_header: &JpegHeader, rinfo: &ReconstructionInfo, thread_handoff: &ThreadHandoff, is_last_thread: bool, @@ -557,7 +558,7 @@ impl LeptonFileReader { process: fn( &ThreadHandoff, Vec, - &JPegHeader, + &JpegHeader, &ReconstructionInfo, ) -> Result

, ) -> Result<(Metrics, P)> { @@ -606,35 +607,26 @@ impl LeptonFileReader { // test serializing and deserializing header #[test] fn parse_and_write_header() { + use crate::jpeg::jpeg_read::read_jpeg_file; use std::io::Read; - // minimal jpeg that will pass the validity read tests - let min_jpeg = [ - 0xffu8, 0xe0, // APP0 - 0x00, 0x10, 0x4a, 0x46, 0x49, 0x46, 0x00, 0x01, 0x01, 0x01, 0x00, 0x48, 0x00, 0x48, 0x00, - 0x00, 0xff, 0xdb, // DQT - 0x00, 0x43, 0x00, 0x03, 0x02, 0x02, 0x02, 0x02, 0x02, 0x03, 0x02, 0x02, 0x02, 0x03, 0x03, - 0x03, 0x03, 0x04, 0x06, 0x04, 0x04, 0x04, 0x04, 0x04, 0x08, 0x06, 0x06, 0x05, 0x06, 0x09, - 0x08, 0x0a, 0x0a, 0x09, 0x08, 0x09, 0x09, 0x0a, 0x0c, 0x0f, 0x0c, 0x0a, 0x0b, 0x0e, 0x0b, - 0x09, 0x09, 0x0d, 0x11, 0x0d, 0x0e, 0x0f, 0x10, 0x10, 0x11, 0x10, 0x0a, 0x0c, 0x12, 0x13, - 0x12, 0x10, 0x13, 0x0f, 0x10, 0x10, 0x10, 0xff, 0xC1, 0x00, 0x0b, 0x08, 0x00, - 0x10, // width - 0x00, 0x10, // height - 0x01, // cmpc - 0x01, // Jid - 0x11, // sfv / sfh - 0x00, 0xff, 0xda, // SOS - 0x00, 0x08, 0x01, 0x01, 0x00, 0x00, 0x3f, 0x00, 0xd2, 0xcf, 0x20, 0xff, 0xd9, // EOI - ]; + let min_jpeg = read_file("tiny", ".jpg"); + let mut lh = LeptonHeader::default_boxed(); let enabled_features = EnabledFeatures::compat_lepton_vector_read(); - let mut lh = LeptonHeader::default_boxed(); - lh.jpeg_file_size = 123; - lh.uncompressed_lepton_header_size = Some(140); + lh.jpeg_file_size = min_jpeg.len() as u32; + lh.uncompressed_lepton_header_size = Some(752); + + let (_image_data, _partitions, _end_scan) = read_jpeg_file( + &mut Cursor::new(min_jpeg), + &mut lh.jpeg_header, + &mut lh.rinfo, + &enabled_features, + |_| {}, + ) + .unwrap(); - lh.parse_jpeg_header(&mut Cursor::new(min_jpeg), &enabled_features) - .unwrap(); lh.thread_handoff.push(ThreadHandoff { luma_y_start: 0, luma_y_end: 1, diff --git a/src/structs/lepton_file_writer.rs b/src/structs/lepton_file_writer.rs index 377cf08..4187718 100644 --- a/src/structs/lepton_file_writer.rs +++ b/src/structs/lepton_file_writer.rs @@ -5,7 +5,7 @@ *--------------------------------------------------------------------------------------------*/ use std::cmp; -use std::io::{BufRead, Cursor, Read, Seek, SeekFrom, Write}; +use std::io::{BufRead, Cursor, Seek, Write}; use std::time::Instant; use byteorder::{LittleEndian, WriteBytesExt}; @@ -15,10 +15,9 @@ use log::info; use crate::consts::*; use crate::enabled_features::EnabledFeatures; use crate::jpeg::block_based_image::BlockBasedImage; -use crate::jpeg::jpeg_header::JPegHeader; -use crate::jpeg::jpeg_read::{read_first_scan, read_progressive_scan}; +use crate::jpeg::jpeg_header::JpegHeader; +use crate::jpeg::jpeg_read::read_jpeg_file; use crate::jpeg::truncate_components::TruncateComponents; -use crate::jpeg_code; use crate::lepton_error::{err_exit_code, AddContext, ExitCode, Result}; use crate::metrics::{CpuTimeMeasure, Metrics}; use crate::structs::lepton_encoder::lepton_encode_row_range; @@ -115,89 +114,40 @@ pub fn encode_lepton_wrapper_verify( pub fn read_jpeg( reader: &mut R, enabled_features: &EnabledFeatures, - callback: fn(&JPegHeader), + callback: fn(&JpegHeader), ) -> Result<(Box, Vec)> { - let var_name = [0u8; 2]; - let mut startheader = var_name; - reader.read_exact(&mut startheader)?; - if startheader[0] != 0xFF || startheader[1] != jpeg_code::SOI { - return err_exit_code(ExitCode::UnsupportedJpeg, "header invalid"); - } - let mut lp = LeptonHeader::default_boxed(); get_git_revision(&mut lp); - if !prepare_to_decode_next_scan(&mut lp, reader, enabled_features).context()? { - return err_exit_code(ExitCode::UnsupportedJpeg, "JPeg does not contain scans"); - } - - callback(&lp.jpeg_header); - - if !enabled_features.progressive && lp.jpeg_header.jpeg_type == JPegType::Progressive { - return err_exit_code( - ExitCode::ProgressiveUnsupported, - "file is progressive, but this is disabled", - ) - .context(); - } - - if lp.jpeg_header.cmpc > COLOR_CHANNEL_NUM_BLOCK_TYPES { - return err_exit_code( - ExitCode::Unsupported4Colors, - " can't support this kind of image", - ) - .context(); - } - - lp.rinfo.truncate_components.init(&lp.jpeg_header); - let mut image_data = Vec::::new(); - for i in 0..lp.jpeg_header.cmpc { - // constructor takes height in proportion to the component[0] - image_data.push(BlockBasedImage::new( - &lp.jpeg_header, - i, - 0, - lp.jpeg_header.cmp_info[0].bcv, - )); - } - - let mut thread_handoff = Vec::::new(); - let start_scan: u32 = reader.stream_position()?.try_into().unwrap(); - read_first_scan( - &lp.jpeg_header, + let (image_data, partitions, end_scan) = read_jpeg_file( reader, - &mut |segment_offset_in_file, restart_info| { - let retval = ThreadHandoff { - segment_offset_in_file: segment_offset_in_file, - luma_y_start: restart_info.luma_y_start, - luma_y_end: restart_info.luma_y_end, - overhang_byte: restart_info.overhang_byte, - num_overhang_bits: restart_info.num_overhang_bits, - last_dc: restart_info.last_dc, - segment_size: 0, // initialized later - }; - - thread_handoff.push(retval); - }, - &mut image_data[..], + &mut lp.jpeg_header, &mut lp.rinfo, - ) - .context()?; - - let mut end_scan = reader.stream_position()?.try_into().unwrap(); + enabled_features, + callback, + )?; - // need at least two bytes of scan data - if start_scan + 2 > end_scan || thread_handoff.len() == 0 { - return err_exit_code( - ExitCode::UnsupportedJpeg, - "couldnt find any sections to encode", - ) - .context(); - } + let mut thread_handoff = Vec::::new(); - for i in 0..thread_handoff.len() { - thread_handoff[i].segment_offset_in_file += start_scan; + for i in 0..partitions.len() { + let (segment_offset, r) = &partitions[i]; + + let segment_size = if i == partitions.len() - 1 { + end_scan - segment_offset + } else { + partitions[i + 1].0 - segment_offset + }; + + thread_handoff.push(ThreadHandoff { + segment_offset_in_file: (*segment_offset).try_into().unwrap(), + luma_y_start: r.luma_y_start, + luma_y_end: r.luma_y_end, + overhang_byte: r.overhang_byte, + num_overhang_bits: r.num_overhang_bits, + last_dc: r.last_dc, + segment_size: segment_size.try_into().unwrap(), + }); #[cfg(feature = "detailed_tracing")] info!( @@ -210,70 +160,6 @@ pub fn read_jpeg( ); } - if lp.jpeg_header.jpeg_type == JPegType::Sequential { - if lp.rinfo.early_eof_encountered { - lp.rinfo - .truncate_components - .set_truncation_bounds(&lp.jpeg_header, lp.rinfo.max_dpos); - - // If we got an early EOF, then seek backwards and capture the last two bytes and store them as garbage. - // This is necessary since the decoder will assume that zero garbage always means a properly terminated JPEG - // even if early EOF was set to true. - reader.seek(SeekFrom::Current(-2))?; - lp.garbage_data.resize(2, 0); - reader.read_exact(&mut lp.garbage_data)?; - - // take these two last bytes off the last segment. For some reason the C++/CS version only chop of one byte - // and then fix up the broken file later in the decoder. The following logic will create a valid file - // that the C++ and CS version will still decode properly without the fixup logic. - let len = thread_handoff.len(); - thread_handoff[len - 1].segment_size = - thread_handoff[len - 1].segment_size.saturating_sub(2); - } - - // rest of data is garbage data if it is a sequential jpeg (including EOI marker) - reader.read_to_end(&mut lp.garbage_data).context()?; - } else { - assert!(lp.jpeg_header.jpeg_type == JPegType::Progressive); - - if lp.rinfo.early_eof_encountered { - return err_exit_code( - ExitCode::UnsupportedJpeg, - "truncation is only supported for baseline images", - ) - .context(); - } - - // for progressive images, loop around reading headers and decoding until we a complete image_data - while prepare_to_decode_next_scan(&mut lp, reader, enabled_features).context()? { - callback(&lp.jpeg_header); - - read_progressive_scan(&lp.jpeg_header, reader, &mut image_data[..], &mut lp.rinfo) - .context()?; - - if lp.rinfo.early_eof_encountered { - return err_exit_code( - ExitCode::UnsupportedJpeg, - "truncation is only supported for baseline images", - ) - .context(); - } - } - - end_scan = reader.stream_position()? as u32; - - // since prepare_to_decode_next_scan consumes the EOI, - // we need to add it to the beginning of the garbage data (if there is any) - lp.garbage_data = Vec::from(EOI); - - // append the rest of the file to the buffer - if reader.read_to_end(&mut lp.garbage_data).context()? == 0 { - // no need to record EOI garbage data if there wasn't anything read - lp.garbage_data.clear(); - } - } - - set_segment_size_in_row_thread_handoffs(&mut thread_handoff[..], end_scan); let merged_handoffs = split_row_handoffs_to_threads(&thread_handoff[..], enabled_features.max_threads as usize); lp.thread_handoff = merged_handoffs; @@ -318,7 +204,7 @@ fn get_git_revision(lp: &mut LeptonHeader) { /// runs the encoding threads and returns the total amount of CPU time consumed (including worker threads) fn run_lepton_encoder_threads( - jpeg_header: &JPegHeader, + jpeg_header: &JpegHeader, colldata: &TruncateComponents, writer: &mut W, thread_handoffs: &[ThreadHandoff], @@ -460,47 +346,6 @@ fn get_number_of_threads_for_encoding( return num_threads; } -// false means we hit the end of file marker -fn prepare_to_decode_next_scan( - lp: &mut LeptonHeader, - reader: &mut R, - enabled_features: &EnabledFeatures, -) -> Result { - // parse the header and store it in the raw_jpeg_header - if !lp.parse_jpeg_header(reader, enabled_features).context()? { - return Ok(false); - } - - lp.rinfo.max_bpos = cmp::max(lp.rinfo.max_bpos, u32::from(lp.jpeg_header.cs_to)); - - // FIXME: not sure why only first bit of csSah is examined but 4 bits of it are stored - lp.rinfo.max_sah = cmp::max( - lp.rinfo.max_sah, - cmp::max(lp.jpeg_header.cs_sal, lp.jpeg_header.cs_sah), - ); - - for i in 0..lp.jpeg_header.cs_cmpc { - lp.rinfo.max_cmp = cmp::max(lp.rinfo.max_cmp, lp.jpeg_header.cs_cmp[i] as u32); - } - - return Ok(true); -} - -fn set_segment_size_in_row_thread_handoffs( - thread_handoffs: &mut [ThreadHandoff], - entropy_data_end_offset_in_file: u32, -) { - if thread_handoffs.len() != 0 { - for i in 0..thread_handoffs.len() - 1 { - thread_handoffs[i].segment_size = thread_handoffs[i + 1].segment_offset_in_file - - thread_handoffs[i].segment_offset_in_file; - } - - thread_handoffs[thread_handoffs.len() - 1].segment_size = entropy_data_end_offset_in_file - - thread_handoffs[thread_handoffs.len() - 1].segment_offset_in_file; - } -} - #[test] fn test_get_git_revision() { let mut lh = LeptonHeader::default_boxed(); diff --git a/src/structs/lepton_header.rs b/src/structs/lepton_header.rs index 9e42157..1c15df4 100644 --- a/src/structs/lepton_header.rs +++ b/src/structs/lepton_header.rs @@ -8,7 +8,7 @@ use flate2::Compression; use crate::consts::*; use crate::helpers::buffer_prefix_matches_marker; -use crate::jpeg::jpeg_header::{JPegHeader, ReconstructionInfo}; +use crate::jpeg::jpeg_header::{JpegHeader, ReconstructionInfo}; use crate::lepton_error::{err_exit_code, AddContext, ExitCode, Result}; use crate::structs::thread_handoff::ThreadHandoff; use crate::EnabledFeatures; @@ -17,9 +17,6 @@ pub const FIXED_HEADER_SIZE: usize = 28; #[derive(Debug, DefaultBoxed)] pub struct LeptonHeader { - /// raw jpeg header to be written back to the file when it is recreated - pub raw_jpeg_header: Vec, - /// how far we have read into the raw header, since the header is divided /// into multiple chucks for each scan. For example, a progressive image /// would start with the jpeg image segments, followed by a SOS (start of scan) @@ -29,12 +26,7 @@ pub struct LeptonHeader { pub thread_handoff: Vec, - pub jpeg_header: JPegHeader, - - pub rst_err: Vec, - - /// garbage data (default value - empty segment - means no garbage data) - pub garbage_data: Vec, + pub jpeg_header: JpegHeader, pub rinfo: ReconstructionInfo, @@ -135,14 +127,14 @@ impl LeptonHeader { // limit reading to the compressed header let mut compressed_reader = reader.take(compressed_header_size as u64); - self.raw_jpeg_header = self + self.rinfo.raw_jpeg_header = self .read_lepton_compressed_header(&mut compressed_reader) .context()?; self.raw_jpeg_header_read_index = 0; { - let mut header_data_cursor = Cursor::new(&self.raw_jpeg_header[..]); + let mut header_data_cursor = Cursor::new(&self.rinfo.raw_jpeg_header[..]); self.jpeg_header .parse(&mut header_data_cursor, &enabled_features) .context()?; @@ -167,7 +159,7 @@ impl LeptonHeader { // (a bit of broken logic in the encoder, but can't change it without breaking the file format) if self.rinfo.early_eof_encountered { let mut max_last_segment_size = self.jpeg_file_size - - u32::try_from(self.garbage_data.len())? + - u32::try_from(self.rinfo.garbage_data.len())? - u32::try_from(self.raw_jpeg_header_read_index)? - u32::try_from(SOI.len())?; @@ -195,7 +187,7 @@ impl LeptonHeader { enabled_features: &EnabledFeatures, ) -> Result { let mut header_cursor = - Cursor::new(&self.raw_jpeg_header[self.raw_jpeg_header_read_index..]); + Cursor::new(&self.rinfo.raw_jpeg_header[self.raw_jpeg_header_read_index..]); let result = self .jpeg_header @@ -224,12 +216,12 @@ impl LeptonHeader { hdr_data.resize(hdrs, 0); header_reader.read_exact(&mut hdr_data)?; - if self.garbage_data.len() == 0 { + if self.rinfo.garbage_data.len() == 0 { // if we don't have any garbage, assume FFD9 EOI // kind of broken logic since this assumes a EOF even if there was a 0 byte garbage header // in the file, but this is what the file format is. - self.garbage_data.extend(EOI); + self.rinfo.garbage_data.extend(EOI); } // beginning here: recovery information (needed for exact JPEG recovery) @@ -284,7 +276,7 @@ impl LeptonHeader { header_reader.read_exact(&mut rst_err_data)?; - self.rst_err.append(&mut rst_err_data); + self.rinfo.rst_err.append(&mut rst_err_data); } else if buffer_prefix_matches_marker( current_lepton_marker, LEPTON_HEADER_GARBAGE_MARKER, @@ -297,7 +289,7 @@ impl LeptonHeader { garbage_data_array.resize(garbage_size, 0); header_reader.read_exact(&mut garbage_data_array)?; - self.garbage_data = garbage_data_array; + self.rinfo.garbage_data = garbage_data_array; } else if buffer_prefix_matches_marker( current_lepton_marker, LEPTON_HEADER_EARLY_EOF_MARKER, @@ -356,7 +348,7 @@ impl LeptonHeader { writer.write_all(&LEPTON_FILE_HEADER)?; writer.write_u8(LEPTON_VERSION)?; - if self.jpeg_header.jpeg_type == JPegType::Progressive { + if self.jpeg_header.jpeg_type == JpegType::Progressive { writer.write_all(&LEPTON_HEADER_PROGRESSIVE_JPEG_TYPE)?; } else { writer.write_all(&LEPTON_HEADER_BASELINE_JPEG_TYPE)?; @@ -407,10 +399,10 @@ impl LeptonHeader { // marker: "HDR" + [size of header] mrw.write_all(&LEPTON_HEADER_MARKER)?; - mrw.write_u32::(self.raw_jpeg_header.len() as u32)?; + mrw.write_u32::(self.rinfo.raw_jpeg_header.len() as u32)?; // data: data from header - mrw.write_all(&self.raw_jpeg_header[..])?; + mrw.write_all(&self.rinfo.raw_jpeg_header[..])?; Ok(()) } @@ -452,13 +444,13 @@ impl LeptonHeader { fn write_lepton_jpeg_restart_errors_if_needed(&self, mrw: &mut W) -> Result<()> { // write number of false set RST markers per scan (if available) to file - if self.rst_err.len() > 0 { + if self.rinfo.rst_err.len() > 0 { // marker: "FRS" + [number of scans] mrw.write_all(&LEPTON_HEADER_JPG_RESTART_ERRORS_MARKER)?; - mrw.write_u32::(self.rst_err.len() as u32)?; + mrw.write_u32::(self.rinfo.rst_err.len() as u32)?; - mrw.write_all(&self.rst_err[..])?; + mrw.write_all(&self.rinfo.rst_err[..])?; } Ok(()) @@ -490,7 +482,7 @@ impl LeptonHeader { prefix_garbage: bool, ) -> Result<()> { // write garbage (if any) to file - if self.garbage_data.len() > 0 { + if self.rinfo.garbage_data.len() > 0 { // marker: "PGR/GRB" + [size of garbage] if prefix_garbage { mrw.write_all(&LEPTON_HEADER_PREFIX_GARBAGE_MARKER)?; @@ -498,71 +490,12 @@ impl LeptonHeader { mrw.write_all(&LEPTON_HEADER_GARBAGE_MARKER)?; } - mrw.write_u32::(self.garbage_data.len() as u32)?; - mrw.write_all(&self.garbage_data[..])?; + mrw.write_u32::(self.rinfo.garbage_data.len() as u32)?; + mrw.write_all(&self.rinfo.garbage_data[..])?; } Ok(()) } - - pub fn parse_jpeg_header( - &mut self, - reader: &mut R, - enabled_features: &EnabledFeatures, - ) -> Result { - // the raw header in the lepton file can actually be spread across different sections - // seperated by the Start-of-Scan marker. We use the mirror to write out whatever - // data we parse until we hit the SOS - - let mut output = Vec::new(); - let mut output_cursor = Cursor::new(&mut output); - - let mut mirror = Mirror::new(reader, &mut output_cursor); - - if self - .jpeg_header - .parse(&mut mirror, enabled_features) - .context()? - { - // append the header if it was not the end of file marker - self.raw_jpeg_header.append(&mut output); - return Ok(true); - } else { - // if the output was more than 2 bytes then was a trailing header, so keep that around as well, - // but we don't want the EOI since that goes into the garbage data. - if output.len() > 2 { - self.raw_jpeg_header.extend(&output[0..output.len() - 2]); - } - - return Ok(false); - } - } -} - -// internal utility we use to collect the header that we read for later -struct Mirror<'a, R, W> { - read: &'a mut R, - output: &'a mut W, - amount_written: usize, -} - -impl<'a, R, W> Mirror<'a, R, W> { - pub fn new(read: &'a mut R, output: &'a mut W) -> Self { - Mirror { - read, - output, - amount_written: 0, - } - } -} - -impl Read for Mirror<'_, R, W> { - fn read(&mut self, buf: &mut [u8]) -> std::io::Result { - let n = self.read.read(buf)?; - self.output.write_all(&buf[..n])?; - self.amount_written += n; - Ok(n) - } } #[test] @@ -655,13 +588,15 @@ fn parse_and_write_header() { assert_eq!(lh.encoder_version, other.encoder_version); assert_eq!(lh.jpeg_file_size, other.jpeg_file_size); - assert_eq!(lh.raw_jpeg_header, other.raw_jpeg_header); + assert_eq!(lh.rinfo.raw_jpeg_header, other.rinfo.raw_jpeg_header); assert_eq!(lh.thread_handoff, other.thread_handoff); } #[cfg(test)] fn make_minimal_lepton_header() -> Box { // minimal jpeg that will pass the validity read tests + + use crate::jpeg::jpeg_header::parse_jpeg_header; let min_jpeg = [ 0xffu8, 0xe0, // APP0 0x00, 0x10, 0x4a, 0x46, 0x49, 0x46, 0x00, 0x01, 0x01, 0x01, 0x00, 0x48, 0x00, 0x48, 0x00, @@ -686,8 +621,13 @@ fn make_minimal_lepton_header() -> Box { lh.jpeg_file_size = 123; lh.uncompressed_lepton_header_size = Some(156); - lh.parse_jpeg_header(&mut Cursor::new(min_jpeg), &enabled_features) - .unwrap(); + parse_jpeg_header( + &mut Cursor::new(min_jpeg), + &enabled_features, + &mut lh.jpeg_header, + &mut lh.rinfo, + ) + .unwrap(); lh.thread_handoff.push(ThreadHandoff { luma_y_start: 0, luma_y_end: 1, diff --git a/src/structs/quantization_tables.rs b/src/structs/quantization_tables.rs index f597068..12ffaed 100644 --- a/src/structs/quantization_tables.rs +++ b/src/structs/quantization_tables.rs @@ -6,7 +6,7 @@ use crate::consts::*; use crate::helpers::*; -use crate::jpeg::jpeg_header::JPegHeader; +use crate::jpeg::jpeg_header::JpegHeader; use crate::lepton_error::err_exit_code; use crate::{ExitCode, Result}; @@ -21,7 +21,7 @@ pub struct QuantizationTables { } impl QuantizationTables { - pub fn new(jpeg_header: &JPegHeader, component: usize) -> Self { + pub fn new(jpeg_header: &JpegHeader, component: usize) -> Self { Self::new_from_table( &jpeg_header.q_tables[usize::from(jpeg_header.cmp_info[component].q_table_index)], ) @@ -65,7 +65,7 @@ impl QuantizationTables { /// constructs the quantization table based on the jpeg header pub fn construct_quantization_tables( - jpeg_header: &JPegHeader, + jpeg_header: &JpegHeader, ) -> Result> { let mut quantization_tables = Vec::new(); for i in 0..jpeg_header.cmpc {