Skip to content

Commit

Permalink
nullable guid
Browse files Browse the repository at this point in the history
  • Loading branch information
aloneguid committed May 16, 2023
1 parent db44a56 commit 1bd8eb5
Show file tree
Hide file tree
Showing 5 changed files with 88 additions and 3 deletions.
2 changes: 1 addition & 1 deletion docs/README.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# Apache Parquet for .NET [![NuGet](https://img.shields.io/nuget/v/Parquet.Net.svg)](https://www.nuget.org/packages/Parquet.Net) [![Nuget](https://img.shields.io/nuget/dt/Parquet.Net)](https://www.nuget.org/packages/Parquet.Net)

![Icon](https://www.aloneguid.uk/parquet/banner.png)
![Icon](https://camo.githubusercontent.com/88a2d331de69292268893ec310b8b46216c9c8f7f43c34edd918e42ce74ac51a/68747470733a2f2f7777772e616c6f6e65677569642e756b2f706172717565742f62616e6e65722e706e67)

**Fully managed, safe, extremely fast** .NET library to 📖read and ✍️write [Apache Parquet](https://parquet.apache.org/) files designed for .NET world (not a wrapper). Targets `.NET 7`, `.NET 6.0`, `.NET Core 3.1`, `.NET Standard 2.1` and `.NET Standard 2.0`.

Expand Down
6 changes: 5 additions & 1 deletion src/Parquet.Test/Types/EndToEndTypeTest.cs
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,9 @@ public class EndToEndTypeTest : TestBase {
["bool"] = (new DataField<bool>("bool"), true),
["nullable bool"] = (new DataField<bool?>("bool?"), new bool?(true)),

["guid"] = (new DataField<Guid>("uuid"), Guid.NewGuid())
["guid"] = (new DataField<Guid>("uuid"), Guid.NewGuid()),
["nullable guid (not null)"] = (new DataField<Guid?>("uuid"), Guid.NewGuid()),
["nullable guid (null)"] = (new DataField<Guid?>("uuid"), null)

};

Expand Down Expand Up @@ -145,6 +147,8 @@ public class EndToEndTypeTest : TestBase {
[InlineData("bool")]
[InlineData("nullable bool")]
[InlineData("guid")]
[InlineData("nullable guid (null)")]
[InlineData("nullable guid (not null)")]

public async Task Type_writes_and_reads_end_to_end(string name) {
(DataField field, object? expectedValue) input = _nameToData[name];
Expand Down
77 changes: 77 additions & 0 deletions src/Parquet/Extensions/UntypedArrayExtensions.cs
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,9 @@ public static int CalculateNullCountFast(this Array array, int offset, int count
if(t == typeof(byte[])) {
return CalculateNullCount((byte[][])array, offset, count);
}
if(t == typeof(Guid?)) {
return CalculateNullCount((Guid?[])array, offset, count);
}

throw new NotSupportedException($"cannot count nulls in type {t}");
}
Expand Down Expand Up @@ -236,6 +239,15 @@ private static int CalculateNullCount(byte[][] array, int offset, int count) {
}
return r;
}
private static int CalculateNullCount(Guid?[] array, int offset, int count) {
int r = 0;
for(int i = offset; i < count; i++) {
if(array[i] == null) {
r++;
}
}
return r;
}
#endregion

#region [ Null Packing ]
Expand Down Expand Up @@ -382,6 +394,13 @@ public static void PackNullsFast(this Array array,
dest, fillerValue);
return;
}
if(t == typeof(Guid?)) {
PackNullsTypeFast((Guid?[])array,
offset, count,
(Guid[])packedData,
dest, fillerValue);
return;
}

throw new NotSupportedException($"cannot pack type {t}");
}
Expand Down Expand Up @@ -728,6 +747,25 @@ private static void PackNullsTypeFast(byte[][] array,
}
}

private static void PackNullsTypeFast(Guid?[] array,
int offset, int count,
Guid[] packedArray,
Span<int> dest,
int fillerValue) {

for(int i = offset, y = 0, ir = 0; i < (offset + count); i++, y++) {
Guid? value = array[i];

if(value == null) {
dest[y] = 0;
}
else {
dest[y] = fillerValue;
packedArray[ir++] = (Guid)value;
}
}
}


#endregion

Expand Down Expand Up @@ -850,6 +888,12 @@ public static void UnpackNullsFast(this Array array,
(byte[][])result);
return;
}
if(t == typeof(Guid)) {
UnpackNullsTypeFast((Guid[])array,
flags, fillFlag,
(Guid?[])result);
return;
}

throw new NotSupportedException($"cannot pack type {t}");

Expand Down Expand Up @@ -1107,6 +1151,20 @@ private static void UnpackNullsTypeFast(byte[][] array,
}
}

private static void UnpackNullsTypeFast(Guid[] array,
Span<int> flags, int fillFlag,
Guid?[] result) {

int iarray = 0;
for(int i = 0; i < flags.Length; i++) {
int level = flags[i];

if(level == fillFlag) {
result[i] = array[iarray++];
}
}
}


#endregion

Expand Down Expand Up @@ -1209,6 +1267,11 @@ public static void ExplodeFast(this Array dictionary,
indexes, (byte[][])result, resultOffset, resultCount);
return;
}
if(t == typeof(Guid)) {
ExplodeTypeFast((Guid[])dictionary,
indexes, (Guid[])result, resultOffset, resultCount);
return;
}

throw new NotSupportedException($"cannot pack type {t}");
}
Expand Down Expand Up @@ -1465,6 +1528,20 @@ private static void ExplodeTypeFast(byte[][] dictionary,
}
}

private static void ExplodeTypeFast(Guid[] dictionary,
Span<int> indexes,
Guid[] result, int resultOffset, int resultCount) {

for(int i = 0; i < resultCount; i++) {
int index = indexes[i];
if(index < dictionary.Length) {
// The following is way faster than using Array.Get/SetValue as it avoids boxing (x60 slower)
// It's still x5 slower than native typed operation as it emits "callvirt" IL instruction
Array.Copy(dictionary, index, result, resultOffset + i, 1);
}
}
}


#endregion

Expand Down
2 changes: 1 addition & 1 deletion src/Parquet/Extensions/UntypedArrayExtensions.tt
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
<#@ import namespace="System.Collections.Generic" #>
<#@ output extension=".cs" #>
<#
var types = new[] { "bool?", "byte?", "sbyte?", "short?", "ushort?", "int?", "uint?", "long?", "ulong?", "BigInteger?", "float?", "double?", "decimal?", "DateTime?", "TimeSpan?", "Interval?", "string", "byte[]" };
var types = new[] { "bool?", "byte?", "sbyte?", "short?", "ushort?", "int?", "uint?", "long?", "ulong?", "BigInteger?", "float?", "double?", "decimal?", "DateTime?", "TimeSpan?", "Interval?", "string", "byte[]", "Guid?" };

string nn(string nt) => nt.EndsWith("?") ? nt.Substring(0, nt.Length - 1) : nt;
#>
Expand Down
4 changes: 4 additions & 0 deletions src/Parquet/Parquet.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,10 @@
<Generator>TextTemplatingFileGenerator</Generator>
<LastGenOutput>BitPackedEncoder.cs</LastGenOutput>
</None>
<None Update="Extensions\UntypedArrayExtensions.tt">
<Generator>TextTemplatingFileGenerator</Generator>
<LastGenOutput>UntypedArrayExtensions.cs</LastGenOutput>
</None>
</ItemGroup>

<ItemGroup>
Expand Down

0 comments on commit 1bd8eb5

Please sign in to comment.