Skip to content

Commit

Permalink
chore(query): reduce unsafe codes in kernels (#16633)
Browse files Browse the repository at this point in the history
* add tests

* fix typo

* remove unsafe codes
  • Loading branch information
sundy-li authored Oct 18, 2024
1 parent ea58350 commit 3b7ef8b
Show file tree
Hide file tree
Showing 8 changed files with 191 additions and 486 deletions.
4 changes: 2 additions & 2 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 5 additions & 1 deletion src/common/arrow/src/arrow/bitmap/immutable.rs
Original file line number Diff line number Diff line change
Expand Up @@ -269,10 +269,14 @@ impl Bitmap {

/// Returns a pointer to the start of this [`Bitmap`] (ignores `offsets`)
/// This pointer is allocated iff `self.len() > 0`.
pub(crate) fn offset(&self) -> usize {
pub fn offset(&self) -> usize {
self.offset
}

pub fn values(&self) -> &[u8] {
self.bytes.deref()
}

/// Converts this [`Bitmap`] to [`MutableBitmap`], returning itself if the conversion
/// is not possible
///
Expand Down
27 changes: 27 additions & 0 deletions src/common/arrow/src/arrow/bitmap/mutable.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@

use std::hint::unreachable_unchecked;
use std::iter::FromIterator;
use std::ops::Range;
use std::sync::Arc;

use super::utils::count_zeros;
Expand Down Expand Up @@ -202,6 +203,26 @@ impl MutableBitmap {
}
}

/// Append `range` bits from `to_set`
///
/// `to_set` is a slice of bits packed LSB-first into `[u8]`
///
/// # Panics
///
/// Panics if `to_set` does not contain `ceil(range.end / 8)` bytes
pub fn append_packed_range(&mut self, range: Range<usize>, to_set: &[u8]) {
let offset_write = self.len();
let len = range.end - range.start;
self.advance(len);
arrow_data::bit_mask::set_bits(
self.buffer.as_mut_slice(),
to_set,
offset_write,
range.start,
len,
);
}

/// Initializes a zeroed [`MutableBitmap`].
#[inline]
pub fn from_len_zeroed(length: usize) -> Self {
Expand All @@ -227,6 +248,12 @@ impl MutableBitmap {
.reserve((self.length + additional).saturating_add(7) / 8 - self.buffer.len())
}

/// Advances the buffer by `additional` bits
#[inline]
pub fn advance(&mut self, additional: usize) {
self.extend_unset(additional)
}

/// Returns the capacity of [`MutableBitmap`] in number of bits.
#[inline]
pub fn capacity(&self) -> usize {
Expand Down
162 changes: 25 additions & 137 deletions src/query/expression/src/kernels/concat.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ use databend_common_arrow::arrow::bitmap::Bitmap;
use databend_common_arrow::arrow::buffer::Buffer;
use databend_common_exception::ErrorCode;
use databend_common_exception::Result;
use ethnum::i256;
use itertools::Itertools;

use crate::copy_continuous_bits;
Expand All @@ -28,6 +29,7 @@ use crate::kernels::utils::set_vec_len_by_ptr;
use crate::store_advance_aligned;
use crate::types::array::ArrayColumnBuilder;
use crate::types::binary::BinaryColumn;
use crate::types::decimal::Decimal;
use crate::types::decimal::DecimalColumn;
use crate::types::geography::GeographyColumn;
use crate::types::geometry::GeometryType;
Expand All @@ -36,20 +38,20 @@ use crate::types::nullable::NullableColumn;
use crate::types::number::NumberColumn;
use crate::types::string::StringColumn;
use crate::types::AnyType;
use crate::types::ArgType;
use crate::types::ArrayType;
use crate::types::BinaryType;
use crate::types::BitmapType;
use crate::types::BooleanType;
use crate::types::DateType;
use crate::types::DecimalType;
use crate::types::GeographyType;
use crate::types::MapType;
use crate::types::NumberType;
use crate::types::StringType;
use crate::types::TimestampType;
use crate::types::ValueType;
use crate::types::VariantType;
use crate::types::F32;
use crate::types::F64;
use crate::with_decimal_type;
use crate::with_decimal_mapped_type;
use crate::with_number_mapped_type;
use crate::BlockEntry;
use crate::Column;
Expand Down Expand Up @@ -139,121 +141,23 @@ impl Column {
Column::EmptyArray { .. } => Column::EmptyArray { len: capacity },
Column::EmptyMap { .. } => Column::EmptyMap { len: capacity },
Column::Number(col) => with_number_mapped_type!(|NUM_TYPE| match col {
NumberColumn::UInt8(_) => {
let builder = Self::concat_primitive_types(
columns.map(|col| col.into_number().unwrap().into_u_int8().unwrap()),
NumberColumn::NUM_TYPE(_) => {
type NType = NumberType<NUM_TYPE>;
let buffer = Self::concat_primitive_types(
columns.map(|col| NType::try_downcast_column(&col).unwrap()),
capacity,
);
<NumberType<u8>>::upcast_column(<NumberType<u8>>::column_from_vec(builder, &[]))
}
NumberColumn::UInt16(_) => {
let builder = Self::concat_primitive_types(
columns.map(|col| col.into_number().unwrap().into_u_int16().unwrap()),
capacity,
);
<NumberType<u16>>::upcast_column(<NumberType<u16>>::column_from_vec(
builder,
&[],
))
}
NumberColumn::UInt32(_) => {
let builder = Self::concat_primitive_types(
columns.map(|col| col.into_number().unwrap().into_u_int32().unwrap()),
capacity,
);
<NumberType<u32>>::upcast_column(<NumberType<u32>>::column_from_vec(
builder,
&[],
))
}
NumberColumn::UInt64(_) => {
let builder = Self::concat_primitive_types(
columns.map(|col| col.into_number().unwrap().into_u_int64().unwrap()),
capacity,
);
<NumberType<u64>>::upcast_column(<NumberType<u64>>::column_from_vec(
builder,
&[],
))
}
NumberColumn::Int8(_) => {
let builder = Self::concat_primitive_types(
columns.map(|col| col.into_number().unwrap().into_int8().unwrap()),
capacity,
);
<NumberType<i8>>::upcast_column(<NumberType<i8>>::column_from_vec(builder, &[]))
}
NumberColumn::Int16(_) => {
let builder = Self::concat_primitive_types(
columns.map(|col| col.into_number().unwrap().into_int16().unwrap()),
capacity,
);
<NumberType<i16>>::upcast_column(<NumberType<i16>>::column_from_vec(
builder,
&[],
))
}
NumberColumn::Int32(_) => {
let builder = Self::concat_primitive_types(
columns.map(|col| col.into_number().unwrap().into_int32().unwrap()),
capacity,
);
<NumberType<i32>>::upcast_column(<NumberType<i32>>::column_from_vec(
builder,
&[],
))
}
NumberColumn::Int64(_) => {
let builder = Self::concat_primitive_types(
columns.map(|col| col.into_number().unwrap().into_int64().unwrap()),
capacity,
);
<NumberType<i64>>::upcast_column(<NumberType<i64>>::column_from_vec(
builder,
&[],
))
}
NumberColumn::Float32(_) => {
let builder = Self::concat_primitive_types(
columns.map(|col| col.into_number().unwrap().into_float32().unwrap()),
capacity,
);
<NumberType<F32>>::upcast_column(<NumberType<F32>>::column_from_vec(
builder,
&[],
))
}
NumberColumn::Float64(_) => {
let builder = Self::concat_primitive_types(
columns.map(|col| col.into_number().unwrap().into_float64().unwrap()),
capacity,
);
<NumberType<F64>>::upcast_column(<NumberType<F64>>::column_from_vec(
builder,
&[],
))
NType::upcast_column(buffer)
}
}),
Column::Decimal(col) => with_decimal_type!(|DECIMAL_TYPE| match col {
DecimalColumn::Decimal128(_, size) => {
let builder = Self::concat_primitive_types(
columns.map(|col| match col {
Column::Decimal(DecimalColumn::Decimal128(col, _)) => col,
_ => unreachable!(),
}),
capacity,
);
Column::Decimal(DecimalColumn::Decimal128(builder.into(), size))
}
DecimalColumn::Decimal256(_, size) => {
let builder = Self::concat_primitive_types(
columns.map(|col| match col {
Column::Decimal(DecimalColumn::Decimal256(col, _)) => col,
_ => unreachable!(),
}),
Column::Decimal(col) => with_decimal_mapped_type!(|DECIMAL_TYPE| match col {
DecimalColumn::DECIMAL_TYPE(_, size) => {
type DType = DecimalType<DECIMAL_TYPE>;
let buffer = Self::concat_primitive_types(
columns.map(|col| DType::try_downcast_column(&col).unwrap()),
capacity,
);
Column::Decimal(DecimalColumn::Decimal256(builder.into(), size))
DECIMAL_TYPE::upcast_column(buffer, size)
}
}),
Column::Boolean(_) => Column::Boolean(Self::concat_boolean_types(
Expand All @@ -269,34 +173,18 @@ impl Column {
capacity,
)),
Column::Timestamp(_) => {
let builder = Self::concat_primitive_types(
columns.map(|col| col.into_timestamp().unwrap()),
let buffer = Self::concat_primitive_types(
columns.map(|col| TimestampType::try_downcast_column(&col).unwrap()),
capacity,
);
let ts = <NumberType<i64>>::upcast_column(<NumberType<i64>>::column_from_vec(
builder,
&[],
))
.into_number()
.unwrap()
.into_int64()
.unwrap();
Column::Timestamp(ts)
Column::Timestamp(buffer)
}
Column::Date(_) => {
let builder = Self::concat_primitive_types(
columns.map(|col| col.into_date().unwrap()),
let buffer = Self::concat_primitive_types(
columns.map(|col| DateType::try_downcast_column(&col).unwrap()),
capacity,
);
let d = <NumberType<i32>>::upcast_column(<NumberType<i32>>::column_from_vec(
builder,
&[],
))
.into_number()
.unwrap()
.into_int32()
.unwrap();
Column::Date(d)
Column::Date(buffer)
}
Column::Array(col) => {
let mut offsets = Vec::with_capacity(capacity + 1);
Expand Down Expand Up @@ -372,15 +260,15 @@ impl Column {
pub fn concat_primitive_types<T>(
cols: impl Iterator<Item = Buffer<T>>,
num_rows: usize,
) -> Vec<T>
) -> Buffer<T>
where
T: Copy,
{
let mut builder: Vec<T> = Vec::with_capacity(num_rows);
for col in cols {
builder.extend(col.iter());
}
builder
builder.into()
}

pub fn concat_binary_types(
Expand Down
Loading

0 comments on commit 3b7ef8b

Please sign in to comment.