// Licensed to the Apache Software Foundation (ASF) under one // or more contributor license agreements. See the NOTICE file // distributed with this work for additional information // regarding copyright ownership. The ASF licenses this file // to you under the Apache License, Version 2.0 (the // "License"); you may not use this file except in compliance // with the License. You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, // software distributed under the License is distributed on an // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY // KIND, either express or implied. See the License for the // specific language governing permissions and limitations // under the License. import { BN } from '../util/bn.js'; import { Data } from '../data.js'; import { Field } from '../schema.js'; import { Vector } from '../vector.js'; import { Visitor } from '../visitor.js'; import { BufferType } from '../enum.js'; import { RecordBatch } from '../recordbatch.js'; import { UnionMode, DateUnit, TimeUnit } from '../enum.js'; import { BitIterator, getBit, getBool } from '../util/bit.js'; import { DataType, Float, Int, Date_, Interval, Time, Timestamp, Union, Duration, Bool, Null, Utf8, LargeUtf8, Binary, LargeBinary, Decimal, FixedSizeBinary, List, FixedSizeList, Map_, Struct, IntArray, } from '../type.js'; /** @ignore */ export interface JSONVectorAssembler extends Visitor { visit(field: Field, node: Data): Record; visitMany(fields: Field[], nodes: readonly Data[]): Record[]; getVisitFn(node: Vector | Data): (data: Data) => { name: string; count: number; VALIDITY: (0 | 1)[]; DATA?: any[]; OFFSET?: number[]; TYPE_ID?: number[]; children?: any[] }; visitNull(data: Data): Record; visitBool(data: Data): { DATA: boolean[] }; visitInt(data: Data): { DATA: number[] | string[] }; visitFloat(data: Data): { DATA: number[] }; visitUtf8(data: Data): { DATA: string[]; OFFSET: number[] }; visitLargeUtf8(data: Data): { DATA: string[]; OFFSET: string[] }; visitBinary(data: Data): { DATA: string[]; OFFSET: number[] }; visitLargeBinary(data: Data): { DATA: string[]; OFFSET: string[] }; visitFixedSizeBinary(data: Data): { DATA: string[] }; visitDate(data: Data): { DATA: number[] }; visitTimestamp(data: Data): { DATA: string[] }; visitTime(data: Data): { DATA: number[] }; visitDecimal(data: Data): { DATA: string[] }; visitList(data: Data): { children: any[]; OFFSET: number[] }; visitStruct(data: Data): { children: any[] }; visitUnion(data: Data): { children: any[]; TYPE_ID: number[] }; visitInterval(data: Data): { DATA: number[] }; visitDuration(data: Data): { DATA: string[] }; visitFixedSizeList(data: Data): { children: any[] }; visitMap(data: Data): { children: any[] }; } /** @ignore */ export class JSONVectorAssembler extends Visitor { /** @nocollapse */ public static assemble(...batches: T[]) { const assembler = new JSONVectorAssembler(); return batches.map(({ schema, data }) => { return assembler.visitMany(schema.fields, data.children); }); } public visit({ name }: Field, data: Data) { const { length } = data; const { offset, nullCount, nullBitmap } = data; const type = DataType.isDictionary(data.type) ? data.type.indices : data.type; const buffers = Object.assign([], data.buffers, { [BufferType.VALIDITY]: undefined }); return { 'name': name, 'count': length, 'VALIDITY': (DataType.isNull(type) || DataType.isUnion(type)) ? undefined : nullCount <= 0 ? Array.from({ length }, () => 1) : [...new BitIterator(nullBitmap, offset, length, null, getBit)], ...super.visit(data.clone(type, offset, length, 0, buffers)) }; } public visitNull() { return {}; } public visitBool({ values, offset, length }: Data) { return { 'DATA': [...new BitIterator(values, offset, length, null, getBool)] }; } public visitInt(data: Data) { return { 'DATA': data.type.bitWidth < 64 ? [...data.values] : [...bigNumsToStrings(data.values, 2)] }; } public visitFloat(data: Data) { return { 'DATA': [...data.values] }; } public visitUtf8(data: Data) { return { 'DATA': [...new Vector([data])], 'OFFSET': [...data.valueOffsets] }; } public visitLargeUtf8(data: Data) { return { 'DATA': [...new Vector([data])], 'OFFSET': [...bigNumsToStrings(data.valueOffsets, 2)] }; } public visitBinary(data: Data) { return { 'DATA': [...binaryToString(new Vector([data]))], 'OFFSET': [...data.valueOffsets] }; } public visitLargeBinary(data: Data) { return { 'DATA': [...binaryToString(new Vector([data]))], 'OFFSET': [...bigNumsToStrings(data.valueOffsets, 2)] }; } public visitFixedSizeBinary(data: Data) { return { 'DATA': [...binaryToString(new Vector([data]))] }; } public visitDate(data: Data) { return { 'DATA': data.type.unit === DateUnit.DAY ? [...data.values] : [...bigNumsToStrings(data.values, 2)] }; } public visitTimestamp(data: Data) { return { 'DATA': [...bigNumsToStrings(data.values, 2)] }; } public visitTime(data: Data) { return { 'DATA': data.type.unit < TimeUnit.MICROSECOND ? [...data.values] : [...bigNumsToStrings(data.values, 2)] }; } public visitDecimal(data: Data) { return { 'DATA': [...bigNumsToStrings(data.values, 4)] }; } public visitList(data: Data) { return { 'OFFSET': [...data.valueOffsets], 'children': this.visitMany(data.type.children, data.children) }; } public visitStruct(data: Data) { return { 'children': this.visitMany(data.type.children, data.children) }; } public visitUnion(data: Data) { return { 'TYPE_ID': [...data.typeIds], 'OFFSET': data.type.mode === UnionMode.Dense ? [...data.valueOffsets] : undefined, 'children': this.visitMany(data.type.children, data.children) }; } public visitInterval(data: Data) { return { 'DATA': [...data.values] }; } public visitDuration(data: Data) { return { 'DATA': [...bigNumsToStrings(data.values, 2)] }; } public visitFixedSizeList(data: Data) { return { 'children': this.visitMany(data.type.children, data.children) }; } public visitMap(data: Data) { return { 'OFFSET': [...data.valueOffsets], 'children': this.visitMany(data.type.children, data.children) }; } } /** @ignore */ function* binaryToString(vector: Vector | Vector | Vector) { for (const octets of vector as Iterable) { yield octets.reduce((str, byte) => { return `${str}${('0' + (byte & 0xFF).toString(16)).slice(-2)}`; }, '').toUpperCase(); } } /** @ignore */ function* bigNumsToStrings(values: BigUint64Array | BigInt64Array | Uint32Array | Int32Array | IntArray, stride: number) { const u32s = new Uint32Array(values.buffer); for (let i = -1, n = u32s.length / stride; ++i < n;) { yield `${BN.new(u32s.subarray((i + 0) * stride, (i + 1) * stride), false)}`; } }