This is an automated email from the ASF dual-hosted git repository. ptaylor pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/master by this push: new ddcb8f5 ARROW-2789: [JS] Add iterator to DataFrame ddcb8f5 is described below commit ddcb8f58301ae7376bac91f17c2c1ee37fd8ea95 Author: Brian Hulette <hulet...@gmail.com> AuthorDate: Sun Jul 8 18:59:49 2018 -0700 ARROW-2789: [JS] Add iterator to DataFrame Add an iterator to `FilteredDataFrame` and the `DataFrame` interface Author: Brian Hulette <hulet...@gmail.com> Closes #2215 from TheNeuralBit/dataframe-improvements and squashes the following commits: dba0e618 <Brian Hulette> rename length getter back to count() a43269f5 <Brian Hulette> fix tests 664a053c <Brian Hulette> Add iterator and readonly length member --- js/src/table.ts | 27 +++++++++++++++++++++++++-- 1 file changed, 25 insertions(+), 2 deletions(-) diff --git a/js/src/table.ts b/js/src/table.ts index de06dd7..8144c98 100644 --- a/js/src/table.ts +++ b/js/src/table.ts @@ -29,10 +29,11 @@ export type NextFunc = (idx: number, batch: RecordBatch) => void; export type BindFunc = (batch: RecordBatch) => void; export interface DataFrame { + count(): number; filter(predicate: Predicate): DataFrame; scan(next: NextFunc, bind?: BindFunc): void; - count(): number; countBy(col: (Col|string)): CountByResult; + [Symbol.iterator](): IterableIterator<Struct['TValue']>; } export class Table implements DataFrame { @@ -143,7 +144,6 @@ export class Table implements DataFrame { } } } - public count(): number { return this.length; } public countBy(name: Col | string): CountByResult { const batches = this.batches, numBatches = batches.length; const count_by = typeof name === 'string' ? new Col(name) : name; @@ -171,6 +171,9 @@ export class Table implements DataFrame { } return new CountByResult(vector.dictionary, IntVector.from(counts)); } + public count(): number { + return this.length; + } public select(...columnNames: string[]) { return new Table(this.batches.map((batch) => batch.select(...columnNames))); } @@ -239,6 +242,26 @@ class FilteredDataFrame implements DataFrame { } return sum; } + public *[Symbol.iterator](): IterableIterator<Struct['TValue']> { + // inlined version of this: + // this.parent.scan((idx, columns) => { + // if (this.predicate(idx, columns)) next(idx, columns); + // }); + const batches = this.batches; + const numBatches = batches.length; + for (let batchIndex = -1; ++batchIndex < numBatches;) { + // load batches + const batch = batches[batchIndex]; + // TODO: bind batches lazily + // If predicate doesn't match anything in the batch we don't need + // to bind the callback + const predicate = this.predicate.bind(batch); + // yield all indices + for (let index = -1, numRows = batch.length; ++index < numRows;) { + if (predicate(index, batch)) { yield batch.get(index) as any; } + } + } + } public filter(predicate: Predicate): DataFrame { return new FilteredDataFrame( this.batches,