From b61d0540d72657875f8517b21f894300193e632b Mon Sep 17 00:00:00 2001 From: irenjj Date: Sun, 2 Feb 2025 20:13:30 +0800 Subject: [PATCH 1/4] fix: first none in `ListArray` panics in `cast_with_options` --- arrow-cast/src/cast/list.rs | 22 +++++++++++++--------- arrow-cast/src/cast/mod.rs | 37 +++++++++++++++++++++++++++++++++++++ 2 files changed, 50 insertions(+), 9 deletions(-) diff --git a/arrow-cast/src/cast/list.rs b/arrow-cast/src/cast/list.rs index ec7a5c57d504..7263b2da759c 100644 --- a/arrow-cast/src/cast/list.rs +++ b/arrow-cast/src/cast/list.rs @@ -88,11 +88,16 @@ where let mut mutable = MutableArrayData::new(vec![&values], nullable, cap); // The end position in values of the last incorrectly-sized list slice let mut last_pos = 0; + let mut null_first = false; for (idx, w) in array.offsets().windows(2).enumerate() { let start_pos = w[0].as_usize(); let end_pos = w[1].as_usize(); let len = end_pos - start_pos; + if start_pos == 0 && end_pos == 0 { + null_first = true; + } + if len != size as usize { if cast_options.safe || array.is_null(idx) { if last_pos != start_pos { @@ -112,16 +117,15 @@ where } } - let values = match last_pos { - 0 => array.values().slice(0, cap), // All slices were the correct length - _ => { - if mutable.len() != cap { - // Remaining slices were all correct length - let remaining = cap - mutable.len(); - mutable.extend(0, last_pos, last_pos + remaining) - } - make_array(mutable.freeze()) + let values = if last_pos == 0 && !null_first { + array.values().slice(0, cap) // All slices were the correct length + } else { + if mutable.len() != cap { + // Remaining slices were all correct length + let remaining = cap - mutable.len(); + mutable.extend(0, last_pos, last_pos + remaining) } + make_array(mutable.freeze()) }; // Cast the inner values if necessary diff --git a/arrow-cast/src/cast/mod.rs b/arrow-cast/src/cast/mod.rs index 440d0a8becde..538e9b781a37 100644 --- a/arrow-cast/src/cast/mod.rs +++ b/arrow-cast/src/cast/mod.rs @@ -10033,4 +10033,41 @@ mod tests { assert_eq!(result.unwrap_err().to_string(), "Invalid argument error: 123456789 is too large to store in a Decimal256 of precision 6. Max is 999999"); } + + #[test] + fn test_first_none() { + let array = Arc::new(ListArray::from_iter_primitive::(vec![ + None, + Some(vec![Some(1), Some(2)]), + ])) as ArrayRef; + let data_type = + DataType::FixedSizeList(FieldRef::new(Field::new("item", DataType::Int64, true)), 2); + let opt = CastOptions::default(); + let r = cast_with_options(&array, &data_type, &opt).unwrap(); + + let fixed_array = Arc::new(FixedSizeListArray::from_iter_primitive::( + vec![None, Some(vec![Some(1), Some(2)])], + 2, + )) as ArrayRef; + assert_eq!(*fixed_array, *r); + } + + #[test] + fn test_first_last_none() { + let array = Arc::new(ListArray::from_iter_primitive::(vec![ + None, + Some(vec![Some(1), Some(2)]), + None, + ])) as ArrayRef; + let data_type = + DataType::FixedSizeList(FieldRef::new(Field::new("item", DataType::Int64, true)), 2); + let opt = CastOptions::default(); + let r = cast_with_options(&array, &data_type, &opt).unwrap(); + + let fixed_array = Arc::new(FixedSizeListArray::from_iter_primitive::( + vec![None, Some(vec![Some(1), Some(2)]), None], + 2, + )) as ArrayRef; + assert_eq!(*fixed_array, *r); + } } From 7e79d9bf7f033efaa2f0a13d6dc1fdefe9d466fc Mon Sep 17 00:00:00 2001 From: irenjj Date: Wed, 5 Feb 2025 11:33:03 +0800 Subject: [PATCH 2/4] simplify --- arrow-cast/src/cast/list.rs | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/arrow-cast/src/cast/list.rs b/arrow-cast/src/cast/list.rs index 7263b2da759c..be4de2981d13 100644 --- a/arrow-cast/src/cast/list.rs +++ b/arrow-cast/src/cast/list.rs @@ -117,15 +117,16 @@ where } } - let values = if last_pos == 0 && !null_first { - array.values().slice(0, cap) // All slices were the correct length - } else { - if mutable.len() != cap { - // Remaining slices were all correct length - let remaining = cap - mutable.len(); - mutable.extend(0, last_pos, last_pos + remaining) + let values = match last_pos { + 0 if !null_first => array.values().slice(0, cap), // All slices were the correct length + _ => { + if mutable.len() != cap { + // Remaining slices were all correct length + let remaining = cap - mutable.len(); + mutable.extend(0, last_pos, last_pos + remaining) + } + make_array(mutable.freeze()) } - make_array(mutable.freeze()) }; // Cast the inner values if necessary From 085c55377759d9db2015bbee5884def0f484cbd5 Mon Sep 17 00:00:00 2001 From: irenjj Date: Thu, 6 Feb 2025 20:46:50 +0800 Subject: [PATCH 3/4] fix --- arrow-cast/src/cast/list.rs | 26 ++++++++++++++++++++------ 1 file changed, 20 insertions(+), 6 deletions(-) diff --git a/arrow-cast/src/cast/list.rs b/arrow-cast/src/cast/list.rs index be4de2981d13..875c3cd41460 100644 --- a/arrow-cast/src/cast/list.rs +++ b/arrow-cast/src/cast/list.rs @@ -88,16 +88,30 @@ where let mut mutable = MutableArrayData::new(vec![&values], nullable, cap); // The end position in values of the last incorrectly-sized list slice let mut last_pos = 0; - let mut null_first = false; + + // Need to flag when previous vector(s) are empty/None to distinguish from 'All slices were correct length' cases. + let is_prev_empty = if array.offsets().len() < 2 { + false + } else { + let first_offset = array + .offsets() + .first() + .ok_or_else(|| ArrowError::ComputeError("Failed to get the first offset".into()))? + .as_usize(); + let second_offset = array + .offsets() + .get(1) + .ok_or_else(|| ArrowError::ComputeError("Failed to get the second offset".into()))? + .as_usize(); + + first_offset == 0 && second_offset == 0 + }; + for (idx, w) in array.offsets().windows(2).enumerate() { let start_pos = w[0].as_usize(); let end_pos = w[1].as_usize(); let len = end_pos - start_pos; - if start_pos == 0 && end_pos == 0 { - null_first = true; - } - if len != size as usize { if cast_options.safe || array.is_null(idx) { if last_pos != start_pos { @@ -118,7 +132,7 @@ where } let values = match last_pos { - 0 if !null_first => array.values().slice(0, cap), // All slices were the correct length + 0 if !is_prev_empty => array.values().slice(0, cap), // All slices were the correct length _ => { if mutable.len() != cap { // Remaining slices were all correct length From 7e0093eebfa9a8bba8698a571570a14182ed104a Mon Sep 17 00:00:00 2001 From: irenjj Date: Fri, 7 Feb 2025 22:27:06 +0800 Subject: [PATCH 4/4] Update arrow-cast/src/cast/list.rs Co-authored-by: Jeffrey Vo --- arrow-cast/src/cast/list.rs | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) diff --git a/arrow-cast/src/cast/list.rs b/arrow-cast/src/cast/list.rs index 875c3cd41460..ddcbca361bf0 100644 --- a/arrow-cast/src/cast/list.rs +++ b/arrow-cast/src/cast/list.rs @@ -93,16 +93,8 @@ where let is_prev_empty = if array.offsets().len() < 2 { false } else { - let first_offset = array - .offsets() - .first() - .ok_or_else(|| ArrowError::ComputeError("Failed to get the first offset".into()))? - .as_usize(); - let second_offset = array - .offsets() - .get(1) - .ok_or_else(|| ArrowError::ComputeError("Failed to get the second offset".into()))? - .as_usize(); + let first_offset = array.offsets()[0].as_usize(); + let second_offset = array.offsets()[1].as_usize(); first_offset == 0 && second_offset == 0 };