This is an automated email from the ASF dual-hosted git repository. ptaylor pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/master by this push: new bd03ba8 ARROW-4728: [JS] Fix Table#assign when passed zero-length RecordBatches bd03ba8 is described below commit bd03ba8345f3c53b27050337166f7ead25e87a54 Author: ptaylor <paul.e.tay...@me.com> AuthorDate: Sat Mar 2 12:34:16 2019 -0800 ARROW-4728: [JS] Fix Table#assign when passed zero-length RecordBatches Closes https://issues.apache.org/jira/browse/ARROW-4728 Author: ptaylor <paul.e.tay...@me.com> Closes #3789 from trxcllnt/js/fix-assign-zero-length-batches and squashes the following commits: 4c6ab56f <ptaylor> fix Table#assign when passed zero-length RecordBatches --- js/src/util/recordbatch.ts | 40 +++++++++++++++++------------------ js/test/unit/table/serialize-tests.ts | 3 ++- 2 files changed, 22 insertions(+), 21 deletions(-) diff --git a/js/src/util/recordbatch.ts b/js/src/util/recordbatch.ts index 40ced20..4551d0c 100644 --- a/js/src/util/recordbatch.ts +++ b/js/src/util/recordbatch.ts @@ -68,21 +68,23 @@ function uniformlyDistributeChunksAcrossRecordBatches<T extends { [key: string]: const fields = [...schema.fields]; const batchArgs = [] as [number, Data<T[keyof T]>[]][]; const memo = { numBatches: columns.reduce((n, c) => Math.max(n, c.length), 0) }; - let sameLength = false, numBatches = 0, batchLength = 0, batchData: Data<T[keyof T]>[]; + + let numBatches = 0, batchLength = 0; + let i: number = -1, numColumns = columns.length; + let child: Data<T[keyof T]>, childData: Data<T[keyof T]>[] = []; while (memo.numBatches-- > 0) { - [sameLength, batchLength] = columns.reduce((memo, [chunk]) => { - const [same, batchLength] = memo; - const chunkLength = chunk ? chunk.length : batchLength; - isFinite(batchLength) && same && (memo[0] = chunkLength === batchLength); - memo[1] = Math.min(batchLength, chunkLength); - return memo; - }, [true, Number.POSITIVE_INFINITY] as [boolean, number]); + for (batchLength = Number.POSITIVE_INFINITY, i = -1; ++i < numColumns;) { + childData[i] = child = columns[i].shift()!; + batchLength = Math.min(batchLength, child ? child.length : batchLength); + } - if (isFinite(batchLength) && !(sameLength && batchLength <= 0)) { - batchData = distributeChildData(fields, batchLength, columns, memo); - batchLength > 0 && (batchArgs[numBatches++] = [batchLength, batchData]); + if (isFinite(batchLength)) { + childData = distributeChildData(fields, batchLength, childData, columns, memo); + if (batchLength > 0) { + batchArgs[numBatches++] = [batchLength, childData.slice()]; + } } } return [ @@ -92,27 +94,25 @@ function uniformlyDistributeChunksAcrossRecordBatches<T extends { [key: string]: } /** @ignore */ -function distributeChildData<T extends { [key: string]: DataType } = any>(fields: Field<T[keyof T]>[], batchLength: number, columns: Data<T[keyof T]>[][], memo: { numBatches: number }) { +function distributeChildData<T extends { [key: string]: DataType } = any>(fields: Field<T[keyof T]>[], batchLength: number, childData: Data<T[keyof T]>[], columns: Data<T[keyof T]>[][], memo: { numBatches: number }) { let data: Data<T[keyof T]>; let field: Field<T[keyof T]>; - let chunks: Data<T[keyof T]>[]; let length = 0, i = -1, n = columns.length; - const batchData = [] as Data<T[keyof T]>[]; const bitmapLength = ((batchLength + 63) & ~63) >> 3; while (++i < n) { - if ((data = (chunks = columns[i]).shift()!) && ((length = data.length) >= batchLength)) { + if ((data = childData[i]) && ((length = data.length) >= batchLength)) { if (length === batchLength) { - batchData[i] = data; + childData[i] = data; } else { - batchData[i] = data.slice(0, batchLength); + childData[i] = data.slice(0, batchLength); data = data.slice(batchLength, length - batchLength); - memo.numBatches = Math.max(memo.numBatches, chunks.push(data)); + memo.numBatches = Math.max(memo.numBatches, columns[i].unshift(data)); } } else { (field = fields[i]).nullable || (fields[i] = field.clone({ nullable: true }) as Field<T[keyof T]>); - batchData[i] = data ? data._changeLengthAndBackfillNullBitmap(batchLength) + childData[i] = data ? data._changeLengthAndBackfillNullBitmap(batchLength) : new Data(field.type, 0, batchLength, batchLength, nullBufs(bitmapLength)) as Data<T[keyof T]>; } } - return batchData; + return childData; } diff --git a/js/test/unit/table/serialize-tests.ts b/js/test/unit/table/serialize-tests.ts index a4a2df2..f337913 100644 --- a/js/test/unit/table/serialize-tests.ts +++ b/js/test/unit/table/serialize-tests.ts @@ -65,7 +65,8 @@ describe('Table#serialize()', () => { const chunkLengths = [] as number[]; for (let i = -1; ++i < 3;) { - chunkLengths[i] = (Math.random() * 100) | 0; + chunkLengths[i * 2] = (Math.random() * 100) | 0; + chunkLengths[i * 2 + 1] = 0; const table = <T extends { [key: string]: DataType } = any>(schema: Schema<T>) => createTable(schema, chunkLengths); test(`Table#select round-trips through serialization`, () => { const source = table(schema1).select('a', 'c');