diff --git a/arrow-arith/src/bitwise.rs b/arrow-arith/src/bitwise.rs index c829a3c29fff..a3c18136c5eb 100644 --- a/arrow-arith/src/bitwise.rs +++ b/arrow-arith/src/bitwise.rs @@ -15,6 +15,8 @@ // specific language governing permissions and limitations // under the License. +//! Module contains bitwise operations on arrays + use crate::arity::{binary, unary}; use arrow_array::*; use arrow_buffer::ArrowNativeType; diff --git a/arrow-arith/src/lib.rs b/arrow-arith/src/lib.rs index 2d5451e04dd2..c8b6412e5efc 100644 --- a/arrow-arith/src/lib.rs +++ b/arrow-arith/src/lib.rs @@ -17,6 +17,7 @@ //! Arrow arithmetic and aggregation kernels +#![warn(missing_docs)] pub mod aggregate; #[doc(hidden)] // Kernels to be removed in a future release pub mod arithmetic; diff --git a/arrow-array/src/ffi_stream.rs b/arrow-array/src/ffi_stream.rs index db44ebad1c22..34f0cd7cfc74 100644 --- a/arrow-array/src/ffi_stream.rs +++ b/arrow-array/src/ffi_stream.rs @@ -83,17 +83,18 @@ const ENOSYS: i32 = 78; /// This was created by bindgen #[repr(C)] #[derive(Debug)] -#[allow(missing_docs)] +#[allow(non_camel_case_types)] pub struct FFI_ArrowArrayStream { - pub get_schema: Option< - unsafe extern "C" fn(arg1: *mut FFI_ArrowArrayStream, out: *mut FFI_ArrowSchema) -> c_int, - >, - pub get_next: Option< - unsafe extern "C" fn(arg1: *mut FFI_ArrowArrayStream, out: *mut FFI_ArrowArray) -> c_int, - >, - pub get_last_error: - Option *const c_char>, - pub release: Option, + /// C function to get schema from the stream + pub get_schema: + Option c_int>, + /// C function to get next array from the stream + pub get_next: Option c_int>, + /// C function to get the error from last operation on the stream + pub get_last_error: Option *const c_char>, + /// C function to release the stream + pub release: Option, + /// Private data used by the stream pub private_data: *mut c_void, } diff --git a/arrow-avro/src/lib.rs b/arrow-avro/src/lib.rs index f74edd7e9260..d01d681b7af0 100644 --- a/arrow-avro/src/lib.rs +++ b/arrow-avro/src/lib.rs @@ -20,6 +20,7 @@ //! [Apache Arrow]: https://arrow.apache.org //! [Apache Avro]: https://avro.apache.org/ +#![warn(missing_docs)] #![allow(unused)] // Temporary pub mod reader; diff --git a/arrow-buffer/src/bigint/mod.rs b/arrow-buffer/src/bigint/mod.rs index 0bd51e162fca..f5fab75dc5ef 100644 --- a/arrow-buffer/src/bigint/mod.rs +++ b/arrow-buffer/src/bigint/mod.rs @@ -216,6 +216,7 @@ impl i256 { } } + /// Create an `i256` value from a 128-bit value. pub const fn from_i128(v: i128) -> Self { Self::from_parts(v as u128, v >> 127) } diff --git a/arrow-buffer/src/builder/null.rs b/arrow-buffer/src/builder/null.rs index ce5e1dc34aa0..298b479e87df 100644 --- a/arrow-buffer/src/builder/null.rs +++ b/arrow-buffer/src/builder/null.rs @@ -159,6 +159,7 @@ impl NullBufferBuilder { } } + /// Return a mutable reference to the inner bitmap slice. pub fn as_slice_mut(&mut self) -> Option<&mut [u8]> { self.bitmap_builder.as_mut().map(|b| b.as_slice_mut()) } @@ -173,14 +174,12 @@ impl NullBufferBuilder { } impl NullBufferBuilder { + /// Return the number of bits in the buffer. pub fn len(&self) -> usize { - if let Some(b) = &self.bitmap_builder { - b.len() - } else { - self.len - } + self.bitmap_builder.as_ref().map_or(self.len, |b| b.len()) } + /// Check if the builder is empty. pub fn is_empty(&self) -> bool { self.len() == 0 } diff --git a/arrow-buffer/src/builder/offset.rs b/arrow-buffer/src/builder/offset.rs index 6a236d2a3e12..1ef0e3170c96 100644 --- a/arrow-buffer/src/builder/offset.rs +++ b/arrow-buffer/src/builder/offset.rs @@ -19,13 +19,13 @@ use std::ops::Deref; use crate::{ArrowNativeType, OffsetBuffer}; +/// Builder of [`OffsetBuffer`] #[derive(Debug)] pub struct OffsetBufferBuilder { offsets: Vec, last_offset: usize, } -/// Builder of [`OffsetBuffer`] impl OffsetBufferBuilder { /// Create a new builder with space for `capacity + 1` offsets pub fn new(capacity: usize) -> Self { diff --git a/arrow-buffer/src/interval.rs b/arrow-buffer/src/interval.rs index 8f3342131656..fa87fec6ea3a 100644 --- a/arrow-buffer/src/interval.rs +++ b/arrow-buffer/src/interval.rs @@ -68,8 +68,11 @@ use std::ops::Neg; #[derive(Debug, Default, Copy, Clone, Eq, PartialEq, Hash, Ord, PartialOrd)] #[repr(C)] pub struct IntervalMonthDayNano { + /// Number of months pub months: i32, + /// Number of days pub days: i32, + /// Number of nanoseconds pub nanoseconds: i64, } @@ -345,7 +348,9 @@ derive_arith!( #[derive(Debug, Default, Copy, Clone, Eq, PartialEq, Hash, Ord, PartialOrd)] #[repr(C)] pub struct IntervalDayTime { + /// Number of days pub days: i32, + /// Number of milliseconds pub milliseconds: i32, } diff --git a/arrow-buffer/src/lib.rs b/arrow-buffer/src/lib.rs index a7bf93ed0c16..34e432208ada 100644 --- a/arrow-buffer/src/lib.rs +++ b/arrow-buffer/src/lib.rs @@ -19,6 +19,7 @@ // used by [`buffer::mutable::dangling_ptr`] #![cfg_attr(miri, feature(strict_provenance))] +#![warn(missing_docs)] pub mod alloc; pub mod buffer; diff --git a/arrow-buffer/src/util/bit_chunk_iterator.rs b/arrow-buffer/src/util/bit_chunk_iterator.rs index 4404509085f3..54995314c49b 100644 --- a/arrow-buffer/src/util/bit_chunk_iterator.rs +++ b/arrow-buffer/src/util/bit_chunk_iterator.rs @@ -131,26 +131,32 @@ impl<'a> UnalignedBitChunk<'a> { } } + /// Returns the number of leading padding bits pub fn lead_padding(&self) -> usize { self.lead_padding } + /// Returns the number of trailing padding bits pub fn trailing_padding(&self) -> usize { self.trailing_padding } + /// Returns the prefix, if any pub fn prefix(&self) -> Option { self.prefix } + /// Returns the suffix, if any pub fn suffix(&self) -> Option { self.suffix } + /// Returns reference to the chunks pub fn chunks(&self) -> &'a [u64] { self.chunks } + /// Returns an iterator over the chunks pub fn iter(&self) -> UnalignedBitChunkIterator<'a> { self.prefix .into_iter() @@ -164,6 +170,7 @@ impl<'a> UnalignedBitChunk<'a> { } } +/// Iterator over an [`UnalignedBitChunk`] pub type UnalignedBitChunkIterator<'a> = std::iter::Chain< std::iter::Chain, std::iter::Cloned>>, std::option::IntoIter, @@ -212,6 +219,7 @@ pub struct BitChunks<'a> { } impl<'a> BitChunks<'a> { + /// Create a new [`BitChunks`] from a byte array, and an offset and length in bits pub fn new(buffer: &'a [u8], offset: usize, len: usize) -> Self { assert!(ceil(offset + len, 8) <= buffer.len() * 8); @@ -232,6 +240,7 @@ impl<'a> BitChunks<'a> { } } +/// Iterator over chunks of 64 bits represented as an u64 #[derive(Debug)] pub struct BitChunkIterator<'a> { buffer: &'a [u8], diff --git a/arrow-cast/src/display.rs b/arrow-cast/src/display.rs index 6373cf67840a..df96816ea23a 100644 --- a/arrow-cast/src/display.rs +++ b/arrow-cast/src/display.rs @@ -81,6 +81,7 @@ impl<'a> Default for FormatOptions<'a> { } impl<'a> FormatOptions<'a> { + /// Creates a new set of format options pub const fn new() -> Self { Self { safe: true, diff --git a/arrow-cast/src/lib.rs b/arrow-cast/src/lib.rs index 78acf2de5f20..6eac1be37c88 100644 --- a/arrow-cast/src/lib.rs +++ b/arrow-cast/src/lib.rs @@ -16,6 +16,8 @@ // under the License. //! Functions for converting from one data type to another in [Apache Arrow](https://docs.rs/arrow) + +#![warn(missing_docs)] pub mod cast; pub use cast::*; pub mod display; diff --git a/arrow-cast/src/parse.rs b/arrow-cast/src/parse.rs index 20fa882e9e96..e332e5bbaaec 100644 --- a/arrow-cast/src/parse.rs +++ b/arrow-cast/src/parse.rs @@ -432,8 +432,12 @@ fn string_to_time(s: &str) -> Option { /// assert_eq!(ts, 1609459200123456789); /// ``` pub trait Parser: ArrowPrimitiveType { + /// Parse a string to the native type fn parse(string: &str) -> Option; + /// Parse a string to the native type with a format string + /// + /// When not implemented, the format string is unused, and this method is equivalent to [parse](#tymethod.parse) fn parse_formatted(string: &str, _format: &str) -> Option { Self::parse(string) } @@ -966,6 +970,7 @@ pub fn parse_decimal( }) } +/// Parse human-readable interval string to Arrow [IntervalYearMonthType] pub fn parse_interval_year_month( value: &str, ) -> Result<::Native, ArrowError> { @@ -981,6 +986,7 @@ pub fn parse_interval_year_month( Ok(IntervalYearMonthType::make_value(0, months)) } +/// Parse human-readable interval string to Arrow [IntervalDayTimeType] pub fn parse_interval_day_time( value: &str, ) -> Result<::Native, ArrowError> { @@ -994,6 +1000,7 @@ pub fn parse_interval_day_time( Ok(IntervalDayTimeType::make_value(days, millis)) } +/// Parse human-readable interval string to Arrow [IntervalMonthDayNanoType] pub fn parse_interval_month_day_nano_config( value: &str, config: IntervalParseConfig, @@ -1005,6 +1012,7 @@ pub fn parse_interval_month_day_nano_config( Ok(IntervalMonthDayNanoType::make_value(months, days, nanos)) } +/// Parse human-readable interval string to Arrow [IntervalMonthDayNanoType] pub fn parse_interval_month_day_nano( value: &str, ) -> Result<::Native, ArrowError> { @@ -1018,14 +1026,18 @@ const NANOS_PER_HOUR: i64 = 60 * NANOS_PER_MINUTE; #[cfg(test)] const NANOS_PER_DAY: i64 = 24 * NANOS_PER_HOUR; +/// Config to parse interval strings +/// +/// Currently stores the `default_unit` to use if the string doesn't have one specified #[derive(Debug, Clone)] pub struct IntervalParseConfig { /// The default unit to use if none is specified - /// e.g. `INTERVAL 1` represents `INTERVAL 1 SECOND` when default_unit = IntervalType::Second + /// e.g. `INTERVAL 1` represents `INTERVAL 1 SECOND` when default_unit = [IntervalUnit::Second] default_unit: IntervalUnit, } impl IntervalParseConfig { + /// Create a new [IntervalParseConfig] with the given default unit pub fn new(default_unit: IntervalUnit) -> Self { Self { default_unit } } @@ -1034,18 +1046,32 @@ impl IntervalParseConfig { #[rustfmt::skip] #[derive(Debug, Clone, Copy)] #[repr(u16)] +/// Represents the units of an interval, with each variant +/// corresponding to a bit in the interval's bitfield representation pub enum IntervalUnit { + /// A Century Century = 0b_0000_0000_0001, + /// A Decade Decade = 0b_0000_0000_0010, + /// A Year Year = 0b_0000_0000_0100, + /// A Month Month = 0b_0000_0000_1000, + /// A Week Week = 0b_0000_0001_0000, + /// A Day Day = 0b_0000_0010_0000, + /// An Hour Hour = 0b_0000_0100_0000, + /// A Minute Minute = 0b_0000_1000_0000, + /// A Second Second = 0b_0001_0000_0000, + /// A Millisecond Millisecond = 0b_0010_0000_0000, + /// A Microsecond Microsecond = 0b_0100_0000_0000, + /// A Nanosecond Nanosecond = 0b_1000_0000_0000, } @@ -1093,6 +1119,7 @@ impl IntervalUnit { } } +/// A tuple representing (months, days, nanoseconds) in an interval pub type MonthDayNano = (i32, i32, i64); /// Chosen based on the number of decimal digits in 1 week in nanoseconds diff --git a/arrow-cast/src/pretty.rs b/arrow-cast/src/pretty.rs index f41471e38d5e..4a3cbda283a5 100644 --- a/arrow-cast/src/pretty.rs +++ b/arrow-cast/src/pretty.rs @@ -54,7 +54,8 @@ pub fn pretty_format_columns( pretty_format_columns_with_options(col_name, results, &options) } -pub fn pretty_format_columns_with_options( +/// Utility function to create a visual representation of columns with options +fn pretty_format_columns_with_options( col_name: &str, results: &[ArrayRef], options: &FormatOptions, diff --git a/arrow-csv/src/lib.rs b/arrow-csv/src/lib.rs index e6dc69935199..28c0d6ebdbb8 100644 --- a/arrow-csv/src/lib.rs +++ b/arrow-csv/src/lib.rs @@ -17,6 +17,8 @@ //! Transfer data between the Arrow memory format and CSV (comma-separated values). +#![warn(missing_docs)] + pub mod reader; pub mod writer; diff --git a/arrow-csv/src/reader/mod.rs b/arrow-csv/src/reader/mod.rs index c5057599b822..36f80ec90a95 100644 --- a/arrow-csv/src/reader/mod.rs +++ b/arrow-csv/src/reader/mod.rs @@ -236,31 +236,41 @@ pub struct Format { } impl Format { + /// Specify whether the CSV file has a header, defaults to `true` + /// + /// When `true`, the first row of the CSV file is treated as a header row pub fn with_header(mut self, has_header: bool) -> Self { self.header = has_header; self } + /// Specify a custom delimiter character, defaults to comma `','` pub fn with_delimiter(mut self, delimiter: u8) -> Self { self.delimiter = Some(delimiter); self } + /// Specify an escape character, defaults to `None` pub fn with_escape(mut self, escape: u8) -> Self { self.escape = Some(escape); self } + /// Specify a custom quote character, defaults to double quote `'"'` pub fn with_quote(mut self, quote: u8) -> Self { self.quote = Some(quote); self } + /// Specify a custom terminator character, defaults to CRLF pub fn with_terminator(mut self, terminator: u8) -> Self { self.terminator = Some(terminator); self } + /// Specify a comment character, defaults to `None` + /// + /// Lines starting with this character will be ignored pub fn with_comment(mut self, comment: u8) -> Self { self.comment = Some(comment); self @@ -1100,7 +1110,7 @@ impl ReaderBuilder { self } - /// Overrides the [`Format`] of this [`ReaderBuilder] + /// Overrides the [Format] of this [ReaderBuilder] pub fn with_format(mut self, format: Format) -> Self { self.format = format; self @@ -1112,21 +1122,25 @@ impl ReaderBuilder { self } + /// Set the given character as the CSV file's escape character pub fn with_escape(mut self, escape: u8) -> Self { self.format.escape = Some(escape); self } + /// Set the given character as the CSV file's quote character, by default it is double quote pub fn with_quote(mut self, quote: u8) -> Self { self.format.quote = Some(quote); self } + /// Provide a custom terminator character, defaults to CRLF pub fn with_terminator(mut self, terminator: u8) -> Self { self.format.terminator = Some(terminator); self } + /// Provide a comment character, lines starting with this character will be ignored pub fn with_comment(mut self, comment: u8) -> Self { self.format.comment = Some(comment); self diff --git a/arrow/src/error.rs b/arrow/src/error.rs index f7acec0b34d7..82330f88d5bc 100644 --- a/arrow/src/error.rs +++ b/arrow/src/error.rs @@ -19,4 +19,5 @@ pub use arrow_schema::ArrowError; +/// A specialized `Result` type for Arrow operations. pub type Result = std::result::Result; diff --git a/arrow/src/lib.rs b/arrow/src/lib.rs index 581f87121432..5002e5bf181a 100644 --- a/arrow/src/lib.rs +++ b/arrow/src/lib.rs @@ -364,6 +364,7 @@ #![deny(clippy::redundant_clone)] #![warn(missing_debug_implementations)] +#![warn(missing_docs)] #![allow(rustdoc::invalid_html_tags)] pub use arrow_array::{downcast_dictionary_array, downcast_primitive_array}; @@ -389,6 +390,7 @@ pub use arrow_json as json; #[cfg(feature = "pyarrow")] pub mod pyarrow; +/// Contains the `RecordBatch` type and associated traits pub mod record_batch { pub use arrow_array::{ RecordBatch, RecordBatchIterator, RecordBatchOptions, RecordBatchReader, RecordBatchWriter, diff --git a/arrow/src/pyarrow.rs b/arrow/src/pyarrow.rs index 6ff6df01c454..b05c967d7d9b 100644 --- a/arrow/src/pyarrow.rs +++ b/arrow/src/pyarrow.rs @@ -77,23 +77,30 @@ use crate::ffi_stream::{ArrowArrayStreamReader, FFI_ArrowArrayStream}; use crate::record_batch::RecordBatch; import_exception!(pyarrow, ArrowException); +/// Represents an exception raised by PyArrow. pub type PyArrowException = ArrowException; fn to_py_err(err: ArrowError) -> PyErr { PyArrowException::new_err(err.to_string()) } +/// Trait for converting Python objects to arrow-rs types. pub trait FromPyArrow: Sized { + /// Convert a Python object to an arrow-rs type. + /// + /// Takes a GIL-bound value from Python and returns a result with the arrow-rs type. fn from_pyarrow_bound(value: &Bound) -> PyResult; } /// Create a new PyArrow object from a arrow-rs type. pub trait ToPyArrow { + /// Convert the implemented type into a Python object without consuming it. fn to_pyarrow(&self, py: Python) -> PyResult; } /// Convert an arrow-rs type into a PyArrow object. pub trait IntoPyArrow { + /// Convert the implemented type into a Python object while consuming it. fn into_pyarrow(self, py: Python) -> PyResult; } diff --git a/arrow/src/tensor.rs b/arrow/src/tensor.rs index f236e6422cdf..cd135a2f04df 100644 --- a/arrow/src/tensor.rs +++ b/arrow/src/tensor.rs @@ -80,36 +80,67 @@ pub struct Tensor<'a, T: ArrowPrimitiveType> { _marker: PhantomData, } +/// [Tensor] of type [BooleanType] pub type BooleanTensor<'a> = Tensor<'a, BooleanType>; +/// [Tensor] of type [Int8Type] pub type Date32Tensor<'a> = Tensor<'a, Date32Type>; +/// [Tensor] of type [Int16Type] pub type Date64Tensor<'a> = Tensor<'a, Date64Type>; +/// [Tensor] of type [Decimal128Type] pub type Decimal128Tensor<'a> = Tensor<'a, Decimal128Type>; +/// [Tensor] of type [Decimal256Type] pub type Decimal256Tensor<'a> = Tensor<'a, Decimal256Type>; +/// [Tensor] of type [DurationMicrosecondType] pub type DurationMicrosecondTensor<'a> = Tensor<'a, DurationMicrosecondType>; +/// [Tensor] of type [DurationMillisecondType] pub type DurationMillisecondTensor<'a> = Tensor<'a, DurationMillisecondType>; +/// [Tensor] of type [DurationNanosecondType] pub type DurationNanosecondTensor<'a> = Tensor<'a, DurationNanosecondType>; +/// [Tensor] of type [DurationSecondType] pub type DurationSecondTensor<'a> = Tensor<'a, DurationSecondType>; +/// [Tensor] of type [Float16Type] pub type Float16Tensor<'a> = Tensor<'a, Float16Type>; +/// [Tensor] of type [Float32Type] pub type Float32Tensor<'a> = Tensor<'a, Float32Type>; +/// [Tensor] of type [Float64Type] pub type Float64Tensor<'a> = Tensor<'a, Float64Type>; +/// [Tensor] of type [Int8Type] pub type Int8Tensor<'a> = Tensor<'a, Int8Type>; +/// [Tensor] of type [Int16Type] pub type Int16Tensor<'a> = Tensor<'a, Int16Type>; +/// [Tensor] of type [Int32Type] pub type Int32Tensor<'a> = Tensor<'a, Int32Type>; +/// [Tensor] of type [Int64Type] pub type Int64Tensor<'a> = Tensor<'a, Int64Type>; +/// [Tensor] of type [IntervalDayTimeType] pub type IntervalDayTimeTensor<'a> = Tensor<'a, IntervalDayTimeType>; +/// [Tensor] of type [IntervalMonthDayNanoType] pub type IntervalMonthDayNanoTensor<'a> = Tensor<'a, IntervalMonthDayNanoType>; +/// [Tensor] of type [IntervalYearMonthType] pub type IntervalYearMonthTensor<'a> = Tensor<'a, IntervalYearMonthType>; +/// [Tensor] of type [Time32MillisecondType] pub type Time32MillisecondTensor<'a> = Tensor<'a, Time32MillisecondType>; +/// [Tensor] of type [Time32SecondType] pub type Time32SecondTensor<'a> = Tensor<'a, Time32SecondType>; +/// [Tensor] of type [Time64MicrosecondType] pub type Time64MicrosecondTensor<'a> = Tensor<'a, Time64MicrosecondType>; +/// [Tensor] of type [Time64NanosecondType] pub type Time64NanosecondTensor<'a> = Tensor<'a, Time64NanosecondType>; +/// [Tensor] of type [TimestampMicrosecondType] pub type TimestampMicrosecondTensor<'a> = Tensor<'a, TimestampMicrosecondType>; +/// [Tensor] of type [TimestampMillisecondType] pub type TimestampMillisecondTensor<'a> = Tensor<'a, TimestampMillisecondType>; +/// [Tensor] of type [TimestampNanosecondType] pub type TimestampNanosecondTensor<'a> = Tensor<'a, TimestampNanosecondType>; +/// [Tensor] of type [TimestampSecondType] pub type TimestampSecondTensor<'a> = Tensor<'a, TimestampSecondType>; +/// [Tensor] of type [UInt8Type] pub type UInt8Tensor<'a> = Tensor<'a, UInt8Type>; +/// [Tensor] of type [UInt16Type] pub type UInt16Tensor<'a> = Tensor<'a, UInt16Type>; +/// [Tensor] of type [UInt32Type] pub type UInt32Tensor<'a> = Tensor<'a, UInt32Type>; +/// [Tensor] of type [UInt64Type] pub type UInt64Tensor<'a> = Tensor<'a, UInt64Type>; impl<'a, T: ArrowPrimitiveType> Tensor<'a, T> { diff --git a/arrow/src/util/bench_util.rs b/arrow/src/util/bench_util.rs index cd615aa73383..8eaae36dbe56 100644 --- a/arrow/src/util/bench_util.rs +++ b/arrow/src/util/bench_util.rs @@ -50,6 +50,8 @@ where .collect() } +/// Creates a [`PrimitiveArray`] of a given `size` and `null_density` +/// filling it with random numbers generated using the provided `seed`. pub fn create_primitive_array_with_seed( size: usize, null_density: f32, @@ -72,6 +74,8 @@ where .collect() } +/// Creates a [`PrimitiveArray`] of a given `size` and `null_density` +/// filling it with random [`IntervalMonthDayNano`] generated using the provided `seed`. pub fn create_month_day_nano_array_with_seed( size: usize, null_density: f32, diff --git a/arrow/src/util/data_gen.rs b/arrow/src/util/data_gen.rs index edb406c0f3ca..55cab368afab 100644 --- a/arrow/src/util/data_gen.rs +++ b/arrow/src/util/data_gen.rs @@ -394,8 +394,10 @@ fn create_random_null_buffer(size: usize, null_density: f32) -> Buffer { /// Useful for testing. The range of values are not likely to be representative of the /// actual bounds. pub trait RandomTemporalValue: ArrowTemporalType { + /// Returns the range of values for `impl`'d type fn value_range() -> impl SampleRange; + /// Generate a random value within the range of the type fn gen_range(rng: &mut R) -> Self::Native where Self::Native: SampleUniform, @@ -403,6 +405,7 @@ pub trait RandomTemporalValue: ArrowTemporalType { rng.gen_range(Self::value_range()) } + /// Generate a random value of the type fn random(rng: &mut R) -> Self::Native where Self::Native: SampleUniform, diff --git a/arrow/src/util/mod.rs b/arrow/src/util/mod.rs index 7f7257100cd9..2c131669b73e 100644 --- a/arrow/src/util/mod.rs +++ b/arrow/src/util/mod.rs @@ -15,6 +15,7 @@ // specific language governing permissions and limitations // under the License. +//! Utility functions for working with Arrow data pub use arrow_buffer::{bit_chunk_iterator, bit_util}; pub use arrow_data::bit_iterator; diff --git a/arrow/src/util/string_writer.rs b/arrow/src/util/string_writer.rs index 57a98d6f51d2..5d8725d354dd 100644 --- a/arrow/src/util/string_writer.rs +++ b/arrow/src/util/string_writer.rs @@ -63,27 +63,24 @@ //! } //! ``` +use core::str; use std::fmt::Formatter; use std::io::{Error, ErrorKind, Result, Write}; -#[derive(Debug)] +/// A writer that allows writing to a `String` +/// like an `std::io::Write` object. +#[derive(Debug, Default)] pub struct StringWriter { data: String, } impl StringWriter { + /// Create a new `StringWriter` pub fn new() -> Self { - StringWriter { - data: String::new(), - } + Self::default() } } -impl Default for StringWriter { - fn default() -> Self { - Self::new() - } -} impl std::fmt::Display for StringWriter { fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { write!(f, "{}", self.data) @@ -92,13 +89,13 @@ impl std::fmt::Display for StringWriter { impl Write for StringWriter { fn write(&mut self, buf: &[u8]) -> Result { - let string = match String::from_utf8(buf.to_vec()) { + let string = match str::from_utf8(buf) { Ok(x) => x, Err(e) => { return Err(Error::new(ErrorKind::InvalidData, e)); } }; - self.data.push_str(&string); + self.data.push_str(string); Ok(string.len()) }