Made JPEG reading completely independent from Lepton encoding (#129)

* refactoring * move jpeg code into jpeg * simplified jpeg_read * jpegread is now a single call * remove warnings * added comments * fixed comments * add more comments * Update src/jpeg/jpeg_read.rs Co-authored-by: Ivan Siutsou <[email protected]> Signed-off-by: Kristof Roomp <[email protected]> * Update src/jpeg/jpeg_read.rs Co-authored-by: Ivan Siutsou <[email protected]> Signed-off-by: Kristof Roomp <[email protected]> * rename JPeg to Jpeg --------- Signed-off-by: Kristof Roomp <[email protected]> Co-authored-by: Ivan Siutsou <[email protected]>
microsoft · Dec 21, 2024 · eb53c11 · eb53c11
1 parent 4460093
commit eb53c11
Show file tree

Hide file tree

Showing 15 changed files with 466 additions and 427 deletions.
diff --git a/src/consts.rs b/src/consts.rs
@@ -4,15 +4,17 @@
  *  This software incorporates material from third parties. See NOTICE.txt for details.
  *--------------------------------------------------------------------------------------------*/
 
+use crate::jpeg::jpeg_code;
+
 #[derive(PartialEq, Debug)]
-pub enum JPegDecodeStatus {
+pub enum JpegDecodeStatus {
     DecodeInProgress,
     RestartIntervalExpired,
     ScanCompleted,
 }
 
 #[derive(PartialEq, Debug, Copy, Clone)]
-pub enum JPegType {
+pub enum JpegType {
     Unknown,
     Sequential,
     Progressive,
@@ -86,8 +88,8 @@ pub const SMALL_FILE_BYTES_PER_ENCDOING_THREAD: usize = 125000;
 pub const MAX_THREADS_SUPPORTED_BY_LEPTON_FORMAT: usize = 16; // Number of threads minus 1 should fit in 4 bits
 
 //pub const SingleFFByte : [u8;1] = [ 0xFF ];
-pub const EOI: [u8; 2] = [0xFF, crate::jpeg_code::EOI]; // EOI segment
-pub const SOI: [u8; 2] = [0xFF, crate::jpeg_code::SOI]; // SOI segment
+pub const EOI: [u8; 2] = [0xFF, jpeg_code::EOI]; // EOI segment
+pub const SOI: [u8; 2] = [0xFF, jpeg_code::SOI]; // SOI segment
 pub const LEPTON_FILE_HEADER: [u8; 2] = [0xcf, 0x84]; // the tau symbol for a tau lepton in utf-8
 pub const LEPTON_HEADER_BASELINE_JPEG_TYPE: [u8; 1] = [b'Z'];
 pub const LEPTON_HEADER_PROGRESSIVE_JPEG_TYPE: [u8; 1] = [b'X'];

diff --git a/src/jpeg/bit_reader.rs b/src/jpeg/bit_reader.rs
@@ -6,9 +6,10 @@
 
 use std::io::{BufRead, Seek};
 
+use super::jpeg_code;
 use crate::helpers::has_ff;
 use crate::lepton_error::{err_exit_code, ExitCode};
-use crate::{jpeg_code, LeptonError};
+use crate::LeptonError;
 
 // Implemenation of bit reader on top of JPEG data stream as read by a reader
 pub struct BitReader<R> {
@@ -17,18 +18,20 @@ pub struct BitReader<R> {
     bits_left: u32,
     cpos: u32,
     eof: bool,
-    start_offset: u64,
     truncated_ff: bool,
     read_ahead_bytes: u32,
 }
 
 impl<R: BufRead + Seek> BitReader<R> {
-    pub fn get_stream_position(&mut self) -> u32 {
+    /// Returns the current position in the stream, which corresponds the byte that has
+    /// unread bits in it.
+    ///
+    /// If the last byte was a 0xff, then the position is the byte before
+    /// the 0xff.
+    pub fn stream_position(&mut self) -> u64 {
         self.undo_read_ahead();
 
-        let pos: u32 = (self.inner.stream_position().unwrap() - self.start_offset)
-            .try_into()
-            .unwrap();
+        let pos = self.inner.stream_position().unwrap();
 
         if self.bits_left > 0 && !self.eof {
             if self.bits as u8 == 0xff && !self.truncated_ff {
@@ -41,16 +44,13 @@ impl<R: BufRead + Seek> BitReader<R> {
         }
     }
 
-    pub fn new(mut inner: R) -> Self {
-        let start_offset = inner.stream_position().unwrap();
-
+    pub fn new(inner: R) -> Self {
         BitReader {
             inner: inner,
             bits: 0,
             bits_left: 0,
             cpos: 0,
             eof: false,
-            start_offset,
             truncated_ff: false,
             read_ahead_bytes: 0,
         }
@@ -305,11 +305,11 @@ fn read_simple() {
 
     assert_eq!(1, b.read(4).unwrap());
     assert_eq!((4, 0x10), b.overhang());
-    assert_eq!(0, b.get_stream_position());
+    assert_eq!(0, b.stream_position());
 
     assert_eq!(2, b.read(4).unwrap());
     assert_eq!((0, 0), b.overhang()); // byte is aligned should be no overhang
-    assert_eq!(1, b.get_stream_position());
+    assert_eq!(1, b.stream_position());
 
     assert_eq!(3, b.read(4).unwrap());
     assert_eq!(4, b.read(4).unwrap());
@@ -319,20 +319,20 @@ fn read_simple() {
 
     assert_eq!(0x9f, b.read(8).unwrap());
     assert_eq!((4, 0xf0), b.overhang());
-    assert_eq!(5, b.get_stream_position()); // should be at the beginning of the escape code
+    assert_eq!(5, b.stream_position()); // should be at the beginning of the escape code
 
     assert_eq!(0xfe, b.read(8).unwrap());
     assert_eq!((4, 0xe0), b.overhang());
-    assert_eq!(7, b.get_stream_position()); // now we are after the escape code
+    assert_eq!(7, b.stream_position()); // now we are after the escape code
 
     assert_eq!(0xe, b.read(4).unwrap());
     assert_eq!((0, 0), b.overhang());
-    assert_eq!(8, b.get_stream_position()); // now we read everything and should be at the end of the stream
+    assert_eq!(8, b.stream_position()); // now we read everything and should be at the end of the stream
 
     // read an empty byte passed the end of the stream.. should be zero and trigger EOF
     assert_eq!(0, b.read(8).unwrap());
     assert_eq!(true, b.is_eof());
-    assert_eq!(8, b.get_stream_position()); // still at the same position
+    assert_eq!(8, b.stream_position()); // still at the same position
 }
 
 // what happens when a file has 0xff as the last character (assume that it is an escaped 0xff)
@@ -342,23 +342,23 @@ fn read_truncate_ff() {
 
     let mut b = BitReader::new(Cursor::new(&arr));
 
-    assert_eq!(0, b.get_stream_position());
+    assert_eq!(0, b.stream_position());
 
     assert_eq!(0x1, b.read(4).unwrap());
-    assert_eq!(0, b.get_stream_position());
+    assert_eq!(0, b.stream_position());
 
     assert_eq!(0x2f, b.read(8).unwrap());
     assert_eq!((4, 0xf0), b.overhang());
-    assert_eq!(1, b.get_stream_position());
+    assert_eq!(1, b.stream_position());
 
     // 4 bits left, not EOF yet
     assert_eq!(false, b.is_eof());
 
     assert_eq!(0xf, b.read(4).unwrap());
     assert_eq!(false, b.is_eof()); // now we are at the end really
-    assert_eq!(2, b.get_stream_position());
+    assert_eq!(2, b.stream_position());
 
     assert_eq!(0, b.read(4).unwrap());
     assert_eq!(true, b.is_eof());
-    assert_eq!(2, b.get_stream_position());
+    assert_eq!(2, b.stream_position());
 }
diff --git a/src/jpeg/block_based_image.rs b/src/jpeg/block_based_image.rs
@@ -10,7 +10,7 @@ use wide::{i16x8, CmpEq};
 
 use crate::consts::ZIGZAG_TO_TRANSPOSED;
 
-use super::jpeg_header::JPegHeader;
+use super::jpeg_header::JpegHeader;
 
 /// holds the 8x8 blocks for a given component. Since we do multithreaded encoding,
 /// the image may only hold a subset of the components (specified by dpos_offset),
@@ -30,7 +30,7 @@ static EMPTY: AlignedBlock = AlignedBlock { raw_data: [0; 64] };
 impl BlockBasedImage {
     // constructs new block image for the given y-coordinate range
     pub fn new(
-        jpeg_header: &JPegHeader,
+        jpeg_header: &JpegHeader,
         component: usize,
         luma_y_start: u32,
         luma_y_end: u32,

diff --git a/src/jpeg_code.rs → src/jpeg/jpeg_code.rs b/src/jpeg_code.rs → src/jpeg/jpeg_code.rs
diff --git a/src/jpeg/jpeg_header.rs b/src/jpeg/jpeg_header.rs
@@ -32,16 +32,17 @@ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */
 
-use std::io::Read;
+use std::io::{Cursor, Read, Write};
 use std::num::NonZeroU32;
 
-use crate::consts::JPegType;
+use crate::consts::JpegType;
 use crate::enabled_features::EnabledFeatures;
 use crate::helpers::*;
 use crate::lepton_error::{err_exit_code, AddContext, ExitCode, Result};
-use crate::{jpeg_code, LeptonError};
+use crate::LeptonError;
 
 use super::component_info::ComponentInfo;
+use super::jpeg_code;
 use super::truncate_components::TruncateComponents;
 
 /// Information required to partition the coding the JPEG huffman encoded stream of a scan
@@ -65,7 +66,7 @@ impl RestartSegmentCodingInfo {
         num_overhang_bits: u8,
         last_dc: [i16; 4],
         mcu: u32,
-        jf: &JPegHeader,
+        jf: &JpegHeader,
     ) -> Self {
         let mcu_y = mcu / jf.mcuh;
         let luma_mul = jf.cmp_info[0].bcv / jf.mcuv;
@@ -124,6 +125,71 @@ pub struct ReconstructionInfo {
 
     /// information about how to truncate the image if it was partially written
     pub truncate_components: TruncateComponents,
+
+    /// trailing RST marking information
+    pub rst_err: Vec<u8>,
+
+    /// raw jpeg header to be written back to the file when it is recreated
+    pub raw_jpeg_header: Vec<u8>,
+
+    /// garbage data (default value - empty segment - means no garbage data)
+    pub garbage_data: Vec<u8>,
+}
+
+pub fn parse_jpeg_header<R: Read>(
+    reader: &mut R,
+    enabled_features: &EnabledFeatures,
+    jpeg_header: &mut JpegHeader,
+    rinfo: &mut ReconstructionInfo,
+) -> Result<bool> {
+    // the raw header in the lepton file can actually be spread across different sections
+    // seperated by the Start-of-Scan marker. We use the mirror to write out whatever
+    // data we parse until we hit the SOS
+
+    let mut output = Vec::new();
+    let mut output_cursor = Cursor::new(&mut output);
+
+    let mut mirror = Mirror::new(reader, &mut output_cursor);
+
+    if jpeg_header.parse(&mut mirror, enabled_features).context()? {
+        // append the header if it was not the end of file marker
+        rinfo.raw_jpeg_header.append(&mut output);
+        return Ok(true);
+    } else {
+        // if the output was more than 2 bytes then was a trailing header, so keep that around as well,
+        // but we don't want the EOI since that goes into the garbage data.
+        if output.len() > 2 {
+            rinfo.raw_jpeg_header.extend(&output[0..output.len() - 2]);
+        }
+
+        return Ok(false);
+    }
+}
+
+// internal utility we use to collect the header that we read for later
+struct Mirror<'a, R, W> {
+    read: &'a mut R,
+    output: &'a mut W,
+    amount_written: usize,
+}
+
+impl<'a, R, W> Mirror<'a, R, W> {
+    pub fn new(read: &'a mut R, output: &'a mut W) -> Self {
+        Mirror {
+            read,
+            output,
+            amount_written: 0,
+        }
+    }
+}
+
+impl<R: Read, W: Write> Read for Mirror<'_, R, W> {
+    fn read(&mut self, buf: &mut [u8]) -> std::io::Result<usize> {
+        let n = self.read.read(buf)?;
+        self.output.write_all(&buf[..n])?;
+        self.amount_written += n;
+        Ok(n)
+    }
 }
 
 #[derive(Copy, Clone, Debug)]
@@ -349,7 +415,7 @@ impl HuffTree {
 
 /// JPEG information parsed out of segments found before the image segment
 #[derive(Debug, Clone)]
-pub struct JPegHeader {
+pub struct JpegHeader {
     /// quantization tables 4 x 64
     pub q_tables: [[u16; 64]; 4],
 
@@ -374,7 +440,7 @@ pub struct JPegHeader {
     /// height of image
     pub img_height: u32,
 
-    pub jpeg_type: JPegType,
+    pub jpeg_type: JpegType,
 
     /// max horizontal sample factor
     pub sfhm: u32,
@@ -419,9 +485,9 @@ enum ParseSegmentResult {
     SOS,
 }
 
-impl Default for JPegHeader {
+impl Default for JpegHeader {
     fn default() -> Self {
-        return JPegHeader {
+        return JpegHeader {
             q_tables: [[0; 64]; 4],
             h_codes: [[HuffCodes::default(); 4]; 2],
             h_trees: [[HuffTree::default(); 4]; 2],
@@ -435,7 +501,7 @@ impl Default for JPegHeader {
             cmpc: 0,
             img_width: 0,
             img_height: 0,
-            jpeg_type: JPegType::Unknown,
+            jpeg_type: JpegType::Unknown,
             sfhm: 0,
             sfvm: 0,
             mcuv: NonZeroU32::MIN,
@@ -452,7 +518,7 @@ impl Default for JPegHeader {
     }
 }
 
-impl JPegHeader {
+impl JpegHeader {
     #[inline(always)]
     pub(super) fn get_huff_dc_codes(&self, cmp: usize) -> &HuffCodes {
         &self.h_codes[0][usize::from(self.cmp_info[cmp].huff_dc)]
@@ -510,7 +576,7 @@ impl JPegHeader {
             if (self.cmp_info[cmp].sfv == 0)
                 || (self.cmp_info[cmp].sfh == 0)
                 || (self.q_tables[usize::from(self.cmp_info[cmp].q_table_index)][0] == 0)
-                || (self.jpeg_type == JPegType::Unknown)
+                || (self.jpeg_type == JpegType::Unknown)
             {
                 return err_exit_code(
                     ExitCode::UnsupportedJpeg,
@@ -819,19 +885,19 @@ impl JPegHeader {
             jpeg_code::SOF1| // SOF1 segment, coding process: extended sequential DCT
             jpeg_code::SOF2 =>  // SOF2 segment, coding process: progressive DCT
             {
-                if self.jpeg_type != JPegType::Unknown
+                if self.jpeg_type != JpegType::Unknown
                 {
                     return err_exit_code(ExitCode::UnsupportedJpeg, "image cannot have multiple SOF blocks");
                 }
 
                 // set JPEG coding type
                 if btype == jpeg_code::SOF2
                 {
-                    self.jpeg_type = JPegType::Progressive;
+                    self.jpeg_type = JpegType::Progressive;
                 }
                 else
                 {
-                    self.jpeg_type = JPegType::Sequential;
+                    self.jpeg_type = JpegType::Sequential;
                 }
 
                 ensure_space(segment,hpos, 6).context()?;