diff --git a/.github/workflows/full.yml b/.github/workflows/full.yml index cb0223e3..aa39777c 100644 --- a/.github/workflows/full.yml +++ b/.github/workflows/full.yml @@ -1,7 +1,7 @@ name: 'Full Workflow' env: - VERSION: 4.10.0 + VERSION: 4.10.1 ASM_VERSION: 4.0.0 on: diff --git a/src/Parquet/ParquetExtensions.cs b/src/Parquet/ParquetExtensions.cs index 136d4066..66e118a6 100644 --- a/src/Parquet/ParquetExtensions.cs +++ b/src/Parquet/ParquetExtensions.cs @@ -94,19 +94,28 @@ public static async Task WriteAsync(this Table table, string path, /// /// Open reader /// + /// When specified, only row group at index will be read. /// public static async Task ReadAsTableAsync(this ParquetReader reader, - TableReaderProgressCallback? progressCallback = null) { + TableReaderProgressCallback? progressCallback = null, + int? rowGroupIndex = null) { Table? result = null; DataField[] dataFields = reader.Schema!.GetDataFields(); - int stepsTotal = dataFields.Length * reader.RowGroupCount; + var rowGroupIndexes = new List(); + if(rowGroupIndex != null) { + rowGroupIndexes.Add(rowGroupIndex.Value); + } else { + rowGroupIndexes.AddRange(Enumerable.Range(0, reader.RowGroupCount)); + } + + int stepsTotal = dataFields.Length * rowGroupIndexes.Count; int currentStep = 0; if(reader.RowGroupCount == 0) { result = new Table(reader.Schema, null, 0); } else { - for(int i = 0; i < reader.RowGroupCount; i++) { + foreach(int i in rowGroupIndexes) { using(ParquetRowGroupReader rowGroupReader = reader.OpenRowGroupReader(i)) { DataColumn[] allData = new DataColumn[dataFields.Length];