Skip to content

Commit

Permalink
bug: empty logical type container is created if there is no matching …
Browse files Browse the repository at this point in the history
…logical type (#314)

* do not create a logical type container if there is no corresponding logical type

* fix test asserts and bump version to 4.9.2
  • Loading branch information
aloneguid authored Apr 25, 2023
1 parent 3c0c0f3 commit 4991b9e
Show file tree
Hide file tree
Showing 4 changed files with 86 additions and 21 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/full.yml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
name: 'Full Workflow'

env:
VERSION: 4.9.1
VERSION: 4.9.2
ASM_VERSION: 4.0.0

on:
Expand Down
2 changes: 1 addition & 1 deletion src/Parquet.PerfRunner/Program.cs
Original file line number Diff line number Diff line change
Expand Up @@ -22,5 +22,5 @@
//var c = new Classes();
//c.SetUp();
//c.Serialise();
await ParquetReader.ReadTableFromFileAsync("C:\\Users\\alone\\Downloads\\wide_parquet\\wide_parquet.parquet");
//await ParquetReader.ReadTableFromFileAsync("C:\\Users\\alone\\Downloads\\wide_parquet\\wide_parquet.parquet");
}
49 changes: 49 additions & 0 deletions src/Parquet.Test/Integration/WriteQuestionableTypesTest.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
using Parquet.Data;
using Parquet.Schema;
using Xunit;
using F = System.IO.File;
using Path = System.IO.Path;

namespace Parquet.Test.Integration {
public class WriteQuestionableTypesTest : IntegrationBase {

private async Task<string> ReadWithPQT(ParquetSchema schema, DataColumn dc) {
string testFileName = Path.GetFullPath($"temp.{nameof(WriteQuestionableTypesTest)}.parquet");
if(F.Exists(testFileName))
F.Delete(testFileName);

using(Stream s = F.OpenWrite(testFileName)) {
using(ParquetWriter writer = await ParquetWriter.CreateAsync(schema, s)) {
using ParquetRowGroupWriter rgw = writer.CreateRowGroup();

await rgw.WriteColumnAsync(dc);
}
}

string? json = ExecMrCat(testFileName);
return json ?? string.Empty;
}

[Fact]
public async Task DateTime_Default() {
var schema = new ParquetSchema(new DataField<DateTime>("qtype"));
var dc = new DataColumn(schema.DataFields.First(), new[] { new DateTime(2023, 04, 25, 1, 2, 3) });
string json = await ReadWithPQT(schema, dc);
Assert.Equal("{\"qtype\":\"AK4X1GIDAACciSUA\"}", json);
}

[Fact]
public async Task Timestamp_Default() {
var schema = new ParquetSchema(new DataField<TimeSpan>("qtype"));
var dc = new DataColumn(schema.DataFields.First(), new[] { TimeSpan.FromHours(7) });
string json = await ReadWithPQT(schema, dc);
Assert.Equal("{\"qtype\":25200000}", json);
}
}
}
54 changes: 35 additions & 19 deletions src/Parquet/Encodings/SchemaEncoder.cs
Original file line number Diff line number Diff line change
Expand Up @@ -330,7 +330,6 @@ private static void Encode(StructField structField, Thrift.SchemaElement parent,
public static Thrift.SchemaElement Encode(DataField field) {
SType st = field.ClrType;
var tse = new Thrift.SchemaElement(field.Name);
tse.LogicalType = new Thrift.LogicalType();

if(st == typeof(bool)) { // boolean
tse.Type = Thrift.Type.BOOLEAN;
Expand All @@ -348,9 +347,11 @@ public static Thrift.SchemaElement Encode(DataField field) {
bw = 32;
bool signed = st == typeof(sbyte) || st == typeof(short) || st == typeof(int);

tse.LogicalType.INTEGER = new Thrift.IntType {
BitWidth = bw,
IsSigned = signed
tse.LogicalType = new LogicalType {
INTEGER = new Thrift.IntType {
BitWidth = bw,
IsSigned = signed
}
};
tse.Converted_type = bw switch {
8 => signed ? Thrift.ConvertedType.INT_8 : Thrift.ConvertedType.UINT_8,
Expand All @@ -360,7 +361,12 @@ public static Thrift.SchemaElement Encode(DataField field) {
};
} else if(st == typeof(long) || st == typeof(ulong)) { // 64-bit number
tse.Type = Thrift.Type.INT64;
tse.LogicalType.INTEGER = new Thrift.IntType { BitWidth = 64, IsSigned = st == typeof(long) };
tse.LogicalType = new LogicalType {
INTEGER = new Thrift.IntType {
BitWidth = 64,
IsSigned = st == typeof(long)
}
};
tse.Converted_type = st == typeof(long) ? Thrift.ConvertedType.INT_64 : Thrift.ConvertedType.UINT_64;
} else if(st == typeof(float)) { // float
tse.Type = Thrift.Type.FLOAT;
Expand All @@ -370,7 +376,9 @@ public static Thrift.SchemaElement Encode(DataField field) {
tse.Type = Thrift.Type.INT96;
} else if(st == typeof(string)) { // string
tse.Type = Thrift.Type.BYTE_ARRAY;
tse.LogicalType.STRING = new Thrift.StringType();
tse.LogicalType = new LogicalType {
STRING = new Thrift.StringType()
};
tse.Converted_type = Thrift.ConvertedType.UTF8;
} else if(st == typeof(decimal)) { // decimal

Expand Down Expand Up @@ -398,9 +406,11 @@ public static Thrift.SchemaElement Encode(DataField field) {
tse.Type_length = 16;
}

tse.LogicalType.DECIMAL = new Thrift.DecimalType {
Precision = precision,
Scale = scale
tse.LogicalType = new LogicalType {
DECIMAL = new Thrift.DecimalType {
Precision = precision,
Scale = scale
}
};
tse.Precision = precision;
tse.Scale = scale;
Expand All @@ -427,25 +437,29 @@ public static Thrift.SchemaElement Encode(DataField field) {
#if NET6_0_OR_GREATER
} else if(st == typeof(DateOnly)) { // DateOnly
tse.Type = Thrift.Type.INT32;
tse.LogicalType.DATE = new Thrift.DateType();
tse.LogicalType = new LogicalType { DATE = new Thrift.DateType() };
tse.Converted_type = Thrift.ConvertedType.DATE;
#endif
} else if(st == typeof(TimeSpan)) { // TimeSpan
if(field is TimeSpanDataField dfTime) {
switch(dfTime.TimeSpanFormat) {
case TimeSpanFormat.MilliSeconds:
tse.Type = Thrift.Type.INT32;
tse.LogicalType.TIME = new Thrift.TimeType {
IsAdjustedToUTC = true,
Unit = new Thrift.TimeUnit { MILLIS = new Thrift.MilliSeconds() }
tse.LogicalType = new LogicalType {
TIME = new Thrift.TimeType {
IsAdjustedToUTC = true,
Unit = new Thrift.TimeUnit { MILLIS = new Thrift.MilliSeconds() }
}
};
tse.Converted_type = Thrift.ConvertedType.TIME_MILLIS;
break;
case TimeSpanFormat.MicroSeconds:
tse.Type = Thrift.Type.INT64;
tse.LogicalType.TIME = new Thrift.TimeType {
IsAdjustedToUTC = true,
Unit = new Thrift.TimeUnit { MICROS = new Thrift.MicroSeconds() }
tse.LogicalType = new LogicalType {
TIME = new Thrift.TimeType {
IsAdjustedToUTC = true,
Unit = new Thrift.TimeUnit { MICROS = new Thrift.MicroSeconds() }
}
};
tse.Converted_type = Thrift.ConvertedType.TIME_MICROS;
break;
Expand All @@ -454,9 +468,11 @@ public static Thrift.SchemaElement Encode(DataField field) {
}
} else {
tse.Type = Thrift.Type.INT32;
tse.LogicalType.TIME = new Thrift.TimeType {
IsAdjustedToUTC = true,
Unit = new Thrift.TimeUnit { MILLIS = new Thrift.MilliSeconds() }
tse.LogicalType = new LogicalType {
TIME = new Thrift.TimeType {
IsAdjustedToUTC = true,
Unit = new Thrift.TimeUnit { MILLIS = new Thrift.MilliSeconds() }
}
};
tse.Converted_type = Thrift.ConvertedType.TIME_MILLIS;
}
Expand Down

0 comments on commit 4991b9e

Please sign in to comment.