Skip to content

Commit

Permalink
Merge pull request #68 from carlopi/bump_duckdb
Browse files Browse the repository at this point in the history
Vendor DuckDB sources, bumping to duckdb's version v0.10.1
  • Loading branch information
hannes authored Mar 22, 2024
2 parents 00bb900 + 3d1ea37 commit f9a910d
Show file tree
Hide file tree
Showing 699 changed files with 38,049 additions and 29,164 deletions.
5 changes: 4 additions & 1 deletion binding.gyp
Original file line number Diff line number Diff line change
Expand Up @@ -389,6 +389,9 @@
{
"defines": [
"DUCKDB_BUILD_LIBRARY"
],
"libraries": [
"rstrtmgr.lib", "bcrypt.lib"
]
}
]
Expand All @@ -411,4 +414,4 @@
]
}
]
}
}
50 changes: 46 additions & 4 deletions src/duckdb/extension/icu/icu-strptime.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -246,7 +246,7 @@ struct ICUStrptime : public ICUDateFunc {
TailPatch(name, db, types);
}

static bool CastFromVarchar(Vector &source, Vector &result, idx_t count, CastParameters &parameters) {
static bool VarcharToTimestampTZ(Vector &source, Vector &result, idx_t count, CastParameters &parameters) {
auto &cast_data = parameters.cast_data->Cast<CastData>();
auto &info = cast_data.info->Cast<BindData>();
CalendarPtr cal(info.calendar->clone());
Expand All @@ -260,7 +260,7 @@ struct ICUStrptime : public ICUDateFunc {
bool has_offset = false;
if (!Timestamp::TryConvertTimestampTZ(str, len, result, has_offset, tz)) {
auto msg = Timestamp::ConversionError(string(str, len));
HandleCastError::AssignError(msg, parameters.error_message);
HandleCastError::AssignError(msg, parameters);
mask.SetInvalid(idx);
} else if (!has_offset) {
// Convert parts to a TZ (default or parsed) if no offset was provided
Expand All @@ -280,22 +280,64 @@ struct ICUStrptime : public ICUDateFunc {
return true;
}

static bool VarcharToTimeTZ(Vector &source, Vector &result, idx_t count, CastParameters &parameters) {
auto &cast_data = parameters.cast_data->Cast<CastData>();
auto &info = cast_data.info->Cast<BindData>();
CalendarPtr cal(info.calendar->clone());

UnaryExecutor::ExecuteWithNulls<string_t, dtime_tz_t>(
source, result, count, [&](string_t input, ValidityMask &mask, idx_t idx) {
dtime_tz_t result;
const auto str = input.GetData();
const auto len = input.GetSize();
bool has_offset = false;
idx_t pos = 0;
if (!Time::TryConvertTimeTZ(str, len, pos, result, has_offset, false)) {
auto msg = Time::ConversionError(string(str, len));
HandleCastError::AssignError(msg, parameters);
mask.SetInvalid(idx);
} else if (!has_offset) {
// Convert parts to a TZ (default or parsed) if no offset was provided
auto calendar = cal.get();

// Extract the offset from the calendar
auto offset = ExtractField(calendar, UCAL_ZONE_OFFSET);
offset += ExtractField(calendar, UCAL_DST_OFFSET);
offset /= Interval::MSECS_PER_SEC;

// Apply it to the offset +00 time we parsed.
result = dtime_tz_t(result.time(), offset);
}

return result;
});
return true;
}

static BoundCastInfo BindCastFromVarchar(BindCastInput &input, const LogicalType &source,
const LogicalType &target) {
if (!input.context) {
throw InternalException("Missing context for VARCHAR to TIMESTAMPTZ cast.");
throw InternalException("Missing context for VARCHAR to TIME/TIMESTAMPTZ cast.");
}

auto cast_data = make_uniq<CastData>(make_uniq<BindData>(*input.context));

return BoundCastInfo(CastFromVarchar, std::move(cast_data));
switch (target.id()) {
case LogicalTypeId::TIMESTAMP_TZ:
return BoundCastInfo(VarcharToTimestampTZ, std::move(cast_data));
case LogicalTypeId::TIME_TZ:
return BoundCastInfo(VarcharToTimeTZ, std::move(cast_data));
default:
throw InternalException("Unsupported type for VARCHAR to TIME/TIMESTAMPTZ cast.");
}
}

static void AddCasts(DatabaseInstance &db) {
auto &config = DBConfig::GetConfig(db);
auto &casts = config.GetCastFunctions();

casts.RegisterCastFunction(LogicalType::VARCHAR, LogicalType::TIMESTAMP_TZ, BindCastFromVarchar);
casts.RegisterCastFunction(LogicalType::VARCHAR, LogicalType::TIME_TZ, BindCastFromVarchar);
}
};

Expand Down
5 changes: 5 additions & 0 deletions src/duckdb/extension/icu/icu-table-range.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,11 @@ struct ICUTableRange {

auto &inputs = input.inputs;
D_ASSERT(inputs.size() == 3);
for (const auto &value : inputs) {
if (value.IsNull()) {
throw BinderException("RANGE with NULL bounds is not supported");
}
}
result->start = inputs[0].GetValue<timestamp_t>();
result->end = inputs[1].GetValue<timestamp_t>();
result->increment = inputs[2].GetValue<interval_t>();
Expand Down
32 changes: 28 additions & 4 deletions src/duckdb/extension/icu/icu-timezone.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
#include "duckdb/parser/parsed_data/create_table_function_info.hpp"
#include "include/icu-datefunc.hpp"
#include "duckdb/transaction/meta_transaction.hpp"
#include "duckdb/common/operator/cast_operators.hpp"

namespace duckdb {

Expand Down Expand Up @@ -138,13 +139,23 @@ struct ICUFromNaiveTimestamp : public ICUDateFunc {
return GetTime(calendar, micros);
}

struct CastTimestampUsToUs {
template <class SRC, class DST>
static inline DST Operation(SRC input) {
// no-op
return input;
}
};

template <class OP>
static bool CastFromNaive(Vector &source, Vector &result, idx_t count, CastParameters &parameters) {
auto &cast_data = parameters.cast_data->Cast<CastData>();
auto &info = cast_data.info->Cast<BindData>();
CalendarPtr calendar(info.calendar->clone());

UnaryExecutor::Execute<timestamp_t, timestamp_t>(
source, result, count, [&](timestamp_t input) { return Operation(calendar.get(), input); });
UnaryExecutor::Execute<timestamp_t, timestamp_t>(source, result, count, [&](timestamp_t input) {
return Operation(calendar.get(), OP::template Operation<timestamp_t, timestamp_t>(input));
});
return true;
}

Expand All @@ -154,15 +165,28 @@ struct ICUFromNaiveTimestamp : public ICUDateFunc {
}

auto cast_data = make_uniq<CastData>(make_uniq<BindData>(*input.context));

return BoundCastInfo(CastFromNaive, std::move(cast_data));
switch (source.id()) {
case LogicalTypeId::TIMESTAMP:
return BoundCastInfo(CastFromNaive<CastTimestampUsToUs>, std::move(cast_data));
case LogicalTypeId::TIMESTAMP_MS:
return BoundCastInfo(CastFromNaive<CastTimestampMsToUs>, std::move(cast_data));
case LogicalTypeId::TIMESTAMP_NS:
return BoundCastInfo(CastFromNaive<CastTimestampNsToUs>, std::move(cast_data));
case LogicalTypeId::TIMESTAMP_SEC:
return BoundCastInfo(CastFromNaive<CastTimestampSecToUs>, std::move(cast_data));
default:
throw InternalException("Type %s not handled in BindCastFromNaive", LogicalTypeIdToString(source.id()));
}
}

static void AddCasts(DatabaseInstance &db) {
auto &config = DBConfig::GetConfig(db);
auto &casts = config.GetCastFunctions();

casts.RegisterCastFunction(LogicalType::TIMESTAMP, LogicalType::TIMESTAMP_TZ, BindCastFromNaive);
casts.RegisterCastFunction(LogicalType::TIMESTAMP_MS, LogicalType::TIMESTAMP_TZ, BindCastFromNaive);
casts.RegisterCastFunction(LogicalType::TIMESTAMP_NS, LogicalType::TIMESTAMP_TZ, BindCastFromNaive);
casts.RegisterCastFunction(LogicalType::TIMESTAMP_S, LogicalType::TIMESTAMP_TZ, BindCastFromNaive);
}
};

Expand Down
11 changes: 6 additions & 5 deletions src/duckdb/extension/json/buffered_json_reader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -304,11 +304,11 @@ idx_t BufferedJSONReader::GetLineNumber(idx_t buf_index, idx_t line_or_object_in
line += buffer_line_or_object_counts[b_idx];
}
}
}
if (can_throw) {
thrown = true;
// SQL uses 1-based indexing so I guess we will do that in our exception here as well
return line + 1;
if (can_throw) {
thrown = true;
// SQL uses 1-based indexing so I guess we will do that in our exception here as well
return line + 1;
}
}
TaskScheduler::YieldThread();
}
Expand All @@ -331,6 +331,7 @@ void BufferedJSONReader::ThrowTransformError(idx_t buf_index, idx_t line_or_obje
}

double BufferedJSONReader::GetProgress() const {
lock_guard<mutex> guard(lock);
if (HasFileHandle()) {
return 100.0 - 100.0 * double(file_handle->Remaining()) / double(file_handle->FileSize());
} else {
Expand Down
6 changes: 3 additions & 3 deletions src/duckdb/extension/json/include/buffered_json_reader.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -90,9 +90,9 @@ struct JSONFileHandle {

//! Read properties
idx_t read_position;
idx_t requested_reads;
atomic<idx_t> requested_reads;
atomic<idx_t> actual_reads;
bool last_read_requested;
atomic<bool> last_read_requested;

//! Cached buffers for resetting when reading stream
vector<AllocatedData> cached_buffers;
Expand Down Expand Up @@ -161,7 +161,7 @@ class BufferedJSONReader {
bool thrown;

public:
mutex lock;
mutable mutex lock;
MultiFileReaderData reader_data;
};

Expand Down
2 changes: 2 additions & 0 deletions src/duckdb/extension/json/include/json_transform.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,8 @@ struct JSONTransformOptions {
string error_message;
//! Index of the object where the error occurred
idx_t object_index = DConstants::INVALID_INDEX;
//! Cast parameters
CastParameters parameters;

public:
void Serialize(Serializer &serializer) const;
Expand Down
3 changes: 1 addition & 2 deletions src/duckdb/extension/json/json_functions.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -224,8 +224,7 @@ static bool CastVarcharToJSON(Vector &source, Vector &result, idx_t count, CastP
if (!doc) {
mask.SetInvalid(idx);
if (success) {
HandleCastError::AssignError(JSONCommon::FormatParseError(data, length, error),
parameters.error_message);
HandleCastError::AssignError(JSONCommon::FormatParseError(data, length, error), parameters);
success = false;
}
}
Expand Down
6 changes: 6 additions & 0 deletions src/duckdb/extension/json/json_functions/json_create.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,9 @@ static LogicalType GetJSONType(StructNames &const_struct_names, const LogicalTyp
// The nested types need to conform as well
case LogicalTypeId::LIST:
return LogicalType::LIST(GetJSONType(const_struct_names, ListType::GetChildType(type)));
case LogicalTypeId::ARRAY:
return LogicalType::ARRAY(GetJSONType(const_struct_names, ArrayType::GetChildType(type)),
ArrayType::GetSize(type));
// Struct and MAP are treated as JSON values
case LogicalTypeId::STRUCT: {
child_list_t<LogicalType> child_types;
Expand Down Expand Up @@ -435,6 +438,9 @@ static void CreateValuesList(const StructNames &names, yyjson_mut_doc *doc, yyjs

static void CreateValuesArray(const StructNames &names, yyjson_mut_doc *doc, yyjson_mut_val *vals[], Vector &value_v,
idx_t count) {

value_v.Flatten(count);

// Initialize array for the nested values
auto &child_v = ArrayVector::GetEntry(value_v);
auto array_size = ArrayType::GetSize(value_v.GetType());
Expand Down
17 changes: 8 additions & 9 deletions src/duckdb/extension/json/json_functions/json_transform.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,13 +13,13 @@

namespace duckdb {

JSONTransformOptions::JSONTransformOptions() {
JSONTransformOptions::JSONTransformOptions() : parameters(false, &error_message) {
}

JSONTransformOptions::JSONTransformOptions(bool strict_cast_p, bool error_duplicate_key_p, bool error_missing_key_p,
bool error_unkown_key_p)
: strict_cast(strict_cast_p), error_duplicate_key(error_duplicate_key_p), error_missing_key(error_missing_key_p),
error_unknown_key(error_unkown_key_p) {
error_unknown_key(error_unkown_key_p), parameters(false, &error_message) {
}

//! Forward declaration for recursion
Expand Down Expand Up @@ -135,25 +135,24 @@ static inline bool GetValueDecimal(yyjson_val *val, T &result, uint8_t w, uint8_
bool success;
switch (unsafe_yyjson_get_tag(val)) {
case YYJSON_TYPE_STR | YYJSON_SUBTYPE_NONE:
success = OP::template Operation<string_t, T>(GetString(val), result, &options.error_message, w, s);
success = OP::template Operation<string_t, T>(GetString(val), result, options.parameters, w, s);
break;
case YYJSON_TYPE_ARR | YYJSON_SUBTYPE_NONE:
case YYJSON_TYPE_OBJ | YYJSON_SUBTYPE_NONE:
success = false;
break;
case YYJSON_TYPE_BOOL | YYJSON_SUBTYPE_TRUE:
case YYJSON_TYPE_BOOL | YYJSON_SUBTYPE_FALSE:
success = OP::template Operation<bool, T>(unsafe_yyjson_get_bool(val), result, &options.error_message, w, s);
success = OP::template Operation<bool, T>(unsafe_yyjson_get_bool(val), result, options.parameters, w, s);
break;
case YYJSON_TYPE_NUM | YYJSON_SUBTYPE_UINT:
success =
OP::template Operation<uint64_t, T>(unsafe_yyjson_get_uint(val), result, &options.error_message, w, s);
success = OP::template Operation<uint64_t, T>(unsafe_yyjson_get_uint(val), result, options.parameters, w, s);
break;
case YYJSON_TYPE_NUM | YYJSON_SUBTYPE_SINT:
success = OP::template Operation<int64_t, T>(unsafe_yyjson_get_sint(val), result, &options.error_message, w, s);
success = OP::template Operation<int64_t, T>(unsafe_yyjson_get_sint(val), result, options.parameters, w, s);
break;
case YYJSON_TYPE_NUM | YYJSON_SUBTYPE_REAL:
success = OP::template Operation<double, T>(unsafe_yyjson_get_real(val), result, &options.error_message, w, s);
success = OP::template Operation<double, T>(unsafe_yyjson_get_real(val), result, options.parameters, w, s);
break;
default:
throw InternalException("Unknown yyjson tag in GetValueString");
Expand Down Expand Up @@ -982,7 +981,7 @@ static bool JSONToAnyCast(Vector &source, Vector &result, idx_t count, CastParam

auto success = TransformFunctionInternal(source, count, result, alc, options);
if (!success) {
HandleCastError::AssignError(options.error_message, parameters.error_message);
HandleCastError::AssignError(options.error_message, parameters);
}
return success;
}
Expand Down
1 change: 1 addition & 0 deletions src/duckdb/extension/json/json_scan.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -569,6 +569,7 @@ bool JSONScanLocalState::ReadNextBuffer(JSONScanGlobalState &gstate) {
if (file_done) {
lock_guard<mutex> guard(gstate.lock);
TryIncrementFileIndex(gstate);
lock_guard<mutex> reader_guard(current_reader->lock);
current_reader->GetFileHandle().Close();
}

Expand Down
18 changes: 17 additions & 1 deletion src/duckdb/extension/parquet/column_reader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1004,7 +1004,23 @@ idx_t CastColumnReader::Read(uint64_t num_values, parquet_filter_t &filter, data
}
}
}
VectorOperations::DefaultCast(intermediate_vector, result, amount);
string error_message;
bool all_succeeded = VectorOperations::DefaultTryCast(intermediate_vector, result, amount, &error_message);
if (!all_succeeded) {
string extended_error;
extended_error =
StringUtil::Format("In file \"%s\" the column \"%s\" has type %s, but we are trying to read it as type %s.",
reader.file_name, schema.name, intermediate_vector.GetType(), result.GetType());
extended_error += "\nThis can happen when reading multiple Parquet files. The schema information is taken from "
"the first Parquet file by default. Possible solutions:\n";
extended_error += "* Enable the union_by_name=True option to combine the schema of all Parquet files "
"(duckdb.org/docs/data/multiple_files/combining_schemas)\n";
extended_error += "* Use a COPY statement to automatically derive types from an existing table.";
throw ConversionException(
"In Parquet reader of file \"%s\": failed to cast column \"%s\" from type %s to %s: %s\n\n%s",
reader.file_name, schema.name, intermediate_vector.GetType(), result.GetType(), error_message,
extended_error);
}
return amount;
}

Expand Down
Loading

0 comments on commit f9a910d

Please sign in to comment.