// Licensed to the Apache Software Foundation (ASF) under one // or more contributor license agreements. See the NOTICE file // distributed with this work for additional information // regarding copyright ownership. The ASF licenses this file // to you under the Apache License, Version 2.0 (the // "License"); you may not use this file except in compliance // with the License. You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, // software distributed under the License is distributed on an // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY // KIND, either express or implied. See the License for the // specific language governing permissions and limitations // under the License. import { Data } from '../data.js'; import { BN } from '../util/bn.js'; import { Vector } from '../vector.js'; import { Visitor } from '../visitor.js'; import { MapRow } from '../row/map.js'; import { StructRow, StructRowProxy } from '../row/struct.js'; import { bigIntToNumber, divideBigInts } from '../util/bigint.js'; import { decodeUtf8 } from '../util/utf8.js'; import { TypeToDataType } from '../interfaces.js'; import { uint16ToFloat64 } from '../util/math.js'; import { Type, UnionMode, Precision, DateUnit, TimeUnit, IntervalUnit } from '../enum.js'; import { DataType, Dictionary, Bool, Null, Utf8, LargeUtf8, Binary, LargeBinary, Decimal, FixedSizeBinary, List, FixedSizeList, Map_, Struct, Float, Float16, Float32, Float64, Int, Uint8, Uint16, Uint32, Uint64, Int8, Int16, Int32, Int64, Date_, DateDay, DateMillisecond, Interval, IntervalDayTime, IntervalYearMonth, Time, TimeSecond, TimeMillisecond, TimeMicrosecond, TimeNanosecond, Timestamp, TimestampSecond, TimestampMillisecond, TimestampMicrosecond, TimestampNanosecond, Duration, DurationSecond, DurationMillisecond, DurationMicrosecond, DurationNanosecond, Union, DenseUnion, SparseUnion, } from '../type.js'; /** @ignore */ export interface GetVisitor extends Visitor { visit(node: Data, index: number): T['TValue'] | null; visitMany(nodes: Data[], indices: number[]): (T['TValue'] | null)[]; getVisitFn(node: Vector | Data | T): (data: Data, index: number) => T['TValue'] | null; getVisitFn(node: T): (data: Data>, index: number) => TypeToDataType['TValue']; visitNull(data: Data, index: number): T['TValue'] | null; visitBool(data: Data, index: number): T['TValue'] | null; visitInt(data: Data, index: number): T['TValue'] | null; visitInt8(data: Data, index: number): T['TValue'] | null; visitInt16(data: Data, index: number): T['TValue'] | null; visitInt32(data: Data, index: number): T['TValue'] | null; visitInt64(data: Data, index: number): T['TValue'] | null; visitUint8(data: Data, index: number): T['TValue'] | null; visitUint16(data: Data, index: number): T['TValue'] | null; visitUint32(data: Data, index: number): T['TValue'] | null; visitUint64(data: Data, index: number): T['TValue'] | null; visitFloat(data: Data, index: number): T['TValue'] | null; visitFloat16(data: Data, index: number): T['TValue'] | null; visitFloat32(data: Data, index: number): T['TValue'] | null; visitFloat64(data: Data, index: number): T['TValue'] | null; visitUtf8(data: Data, index: number): T['TValue'] | null; visitLargeUtf8(data: Data, index: number): T['TValue'] | null; visitBinary(data: Data, index: number): T['TValue'] | null; visitLargeBinary(data: Data, index: number): T['TValue'] | null; visitFixedSizeBinary(data: Data, index: number): T['TValue'] | null; visitDate(data: Data, index: number): T['TValue'] | null; visitDateDay(data: Data, index: number): T['TValue'] | null; visitDateMillisecond(data: Data, index: number): T['TValue'] | null; visitTimestamp(data: Data, index: number): T['TValue'] | null; visitTimestampSecond(data: Data, index: number): T['TValue'] | null; visitTimestampMillisecond(data: Data, index: number): T['TValue'] | null; visitTimestampMicrosecond(data: Data, index: number): T['TValue'] | null; visitTimestampNanosecond(data: Data, index: number): T['TValue'] | null; visitTime(data: Data, index: number): T['TValue'] | null; visitTimeSecond(data: Data, index: number): T['TValue'] | null; visitTimeMillisecond(data: Data, index: number): T['TValue'] | null; visitTimeMicrosecond(data: Data, index: number): T['TValue'] | null; visitTimeNanosecond(data: Data, index: number): T['TValue'] | null; visitDecimal(data: Data, index: number): T['TValue'] | null; visitList(data: Data, index: number): T['TValue'] | null; visitStruct(data: Data, index: number): T['TValue'] | null; visitUnion(data: Data, index: number): T['TValue'] | null; visitDenseUnion(data: Data, index: number): T['TValue'] | null; visitSparseUnion(data: Data, index: number): T['TValue'] | null; visitDictionary(data: Data, index: number): T['TValue'] | null; visitInterval(data: Data, index: number): T['TValue'] | null; visitIntervalDayTime(data: Data, index: number): T['TValue'] | null; visitIntervalYearMonth(data: Data, index: number): T['TValue'] | null; visitDuration(data: Data, index: number): T['TValue'] | null; visitDurationSecond(data: Data, index: number): T['TValue'] | null; visitDurationMillisecond(data: Data, index: number): T['TValue'] | null; visitDurationMicrosecond(data: Data, index: number): T['TValue'] | null; visitDurationNanosecond(data: Data, index: number): T['TValue'] | null; visitFixedSizeList(data: Data, index: number): T['TValue'] | null; visitMap(data: Data, index: number): T['TValue'] | null; } /** @ignore */ export class GetVisitor extends Visitor { } /** @ignore */ function wrapGet(fn: (data: Data, _1: any) => any) { return (data: Data, _1: any) => data.getValid(_1) ? fn(data, _1) : null; } /** @ignore */const epochDaysToMs = (data: Int32Array, index: number) => 86400000 * data[index]; /** @ignore */ const getNull = (_data: Data, _index: number): T['TValue'] => null; /** @ignore */ const getVariableWidthBytes = (values: Uint8Array, valueOffsets: Int32Array | BigInt64Array, index: number) => { if (index + 1 >= valueOffsets.length) { return null as any; } const x = bigIntToNumber(valueOffsets[index]); const y = bigIntToNumber(valueOffsets[index + 1]); return values.subarray(x, y); }; /** @ignore */ const getBool = ({ offset, values }: Data, index: number): T['TValue'] => { const idx = offset + index; const byte = values[idx >> 3]; return (byte & 1 << (idx % 8)) !== 0; }; /** @ignore */ type Numeric1X = Int8 | Int16 | Int32 | Uint8 | Uint16 | Uint32 | Float32 | Float64; /** @ignore */ type Numeric2X = Int64 | Uint64; /** @ignore */ const getDateDay = ({ values }: Data, index: number): T['TValue'] => epochDaysToMs(values, index); /** @ignore */ const getDateMillisecond = ({ values }: Data, index: number): T['TValue'] => bigIntToNumber(values[index]); /** @ignore */ const getNumeric = ({ stride, values }: Data, index: number): T['TValue'] => values[stride * index]; /** @ignore */ const getFloat16 = ({ stride, values }: Data, index: number): T['TValue'] => uint16ToFloat64(values[stride * index]); /** @ignore */ const getBigInts = ({ values }: Data, index: number): T['TValue'] => values[index]; /** @ignore */ const getFixedSizeBinary = ({ stride, values }: Data, index: number): T['TValue'] => values.subarray(stride * index, stride * (index + 1)); /** @ignore */ const getBinary = ({ values, valueOffsets }: Data, index: number): T['TValue'] => getVariableWidthBytes(values, valueOffsets, index); /** @ignore */ const getUtf8 = ({ values, valueOffsets }: Data, index: number): T['TValue'] => { const bytes = getVariableWidthBytes(values, valueOffsets, index); return bytes !== null ? decodeUtf8(bytes) : null as any; }; /* istanbul ignore next */ /** @ignore */ const getInt = ({ values }: Data, index: number): T['TValue'] => values[index]; /* istanbul ignore next */ /** @ignore */ const getFloat = ({ type, values }: Data, index: number): T['TValue'] => ( type.precision !== Precision.HALF ? values[index] : uint16ToFloat64(values[index]) ); /* istanbul ignore next */ /** @ignore */ const getDate = (data: Data, index: number): T['TValue'] => ( data.type.unit === DateUnit.DAY ? getDateDay(data as Data, index) : getDateMillisecond(data as Data, index) ); /** @ignore */ const getTimestampSecond = ({ values }: Data, index: number): T['TValue'] => 1000 * bigIntToNumber(values[index]); /** @ignore */ const getTimestampMillisecond = ({ values }: Data, index: number): T['TValue'] => bigIntToNumber(values[index]); /** @ignore */ const getTimestampMicrosecond = ({ values }: Data, index: number): T['TValue'] => divideBigInts(values[index], BigInt(1000)); /** @ignore */ const getTimestampNanosecond = ({ values }: Data, index: number): T['TValue'] => divideBigInts(values[index], BigInt(1000000)); /* istanbul ignore next */ /** @ignore */ const getTimestamp = (data: Data, index: number): T['TValue'] => { switch (data.type.unit) { case TimeUnit.SECOND: return getTimestampSecond(data as Data, index); case TimeUnit.MILLISECOND: return getTimestampMillisecond(data as Data, index); case TimeUnit.MICROSECOND: return getTimestampMicrosecond(data as Data, index); case TimeUnit.NANOSECOND: return getTimestampNanosecond(data as Data, index); } }; /** @ignore */ const getTimeSecond = ({ values }: Data, index: number): T['TValue'] => values[index]; /** @ignore */ const getTimeMillisecond = ({ values }: Data, index: number): T['TValue'] => values[index]; /** @ignore */ const getTimeMicrosecond = ({ values }: Data, index: number): T['TValue'] => values[index]; /** @ignore */ const getTimeNanosecond = ({ values }: Data, index: number): T['TValue'] => values[index]; /* istanbul ignore next */ /** @ignore */ const getTime = (data: Data, index: number): T['TValue'] => { switch (data.type.unit) { case TimeUnit.SECOND: return getTimeSecond(data as Data, index); case TimeUnit.MILLISECOND: return getTimeMillisecond(data as Data, index); case TimeUnit.MICROSECOND: return getTimeMicrosecond(data as Data, index); case TimeUnit.NANOSECOND: return getTimeNanosecond(data as Data, index); } }; /** @ignore */ const getDecimal = ({ values, stride }: Data, index: number): T['TValue'] => BN.decimal(values.subarray(stride * index, stride * (index + 1))); /** @ignore */ const getList = (data: Data, index: number): T['TValue'] => { const { valueOffsets, stride, children } = data; const { [index * stride]: begin, [index * stride + 1]: end } = valueOffsets; const child: Data = children[0]; const slice = child.slice(begin, end - begin); return new Vector([slice]) as T['TValue']; }; /** @ignore */ const getMap = (data: Data, index: number): T['TValue'] => { const { valueOffsets, children } = data; const { [index]: begin, [index + 1]: end } = valueOffsets; const child = children[0] as Data; return new MapRow(child.slice(begin, end - begin)); }; /** @ignore */ const getStruct = (data: Data, index: number): T['TValue'] => { return new StructRow(data, index) as StructRowProxy; }; /* istanbul ignore next */ /** @ignore */ const getUnion = < D extends Data | Data | Data >(data: D, index: number): D['TValue'] => { return data.type.mode === UnionMode.Dense ? getDenseUnion(data as Data, index) : getSparseUnion(data as Data, index); }; /** @ignore */ const getDenseUnion = (data: Data, index: number): T['TValue'] => { const childIndex = data.type.typeIdToChildIndex[data.typeIds[index]]; const child = data.children[childIndex]; return instance.visit(child, data.valueOffsets[index]); }; /** @ignore */ const getSparseUnion = (data: Data, index: number): T['TValue'] => { const childIndex = data.type.typeIdToChildIndex[data.typeIds[index]]; const child = data.children[childIndex]; return instance.visit(child, index); }; /** @ignore */ const getDictionary = (data: Data, index: number): T['TValue'] => { return data.dictionary?.get(data.values[index]); }; /* istanbul ignore next */ /** @ignore */ const getInterval = (data: Data, index: number): T['TValue'] => (data.type.unit === IntervalUnit.DAY_TIME) ? getIntervalDayTime(data as Data, index) : getIntervalYearMonth(data as Data, index); /** @ignore */ const getIntervalDayTime = ({ values }: Data, index: number): T['TValue'] => values.subarray(2 * index, 2 * (index + 1)); /** @ignore */ const getIntervalYearMonth = ({ values }: Data, index: number): T['TValue'] => { const interval = values[index]; const int32s = new Int32Array(2); int32s[0] = Math.trunc(interval / 12); /* years */ int32s[1] = Math.trunc(interval % 12); /* months */ return int32s; }; /** @ignore */ const getDurationSecond = ({ values }: Data, index: number): T['TValue'] => values[index]; /** @ignore */ const getDurationMillisecond = ({ values }: Data, index: number): T['TValue'] => values[index]; /** @ignore */ const getDurationMicrosecond = ({ values }: Data, index: number): T['TValue'] => values[index]; /** @ignore */ const getDurationNanosecond = ({ values }: Data, index: number): T['TValue'] => values[index]; /* istanbul ignore next */ /** @ignore */ const getDuration = (data: Data, index: number): T['TValue'] => { switch (data.type.unit) { case TimeUnit.SECOND: return getDurationSecond(data as Data, index); case TimeUnit.MILLISECOND: return getDurationMillisecond(data as Data, index); case TimeUnit.MICROSECOND: return getDurationMicrosecond(data as Data, index); case TimeUnit.NANOSECOND: return getDurationNanosecond(data as Data, index); } }; /** @ignore */ const getFixedSizeList = (data: Data, index: number): T['TValue'] => { const { stride, children } = data; const child: Data = children[0]; const slice = child.slice(index * stride, stride); return new Vector([slice]); }; GetVisitor.prototype.visitNull = wrapGet(getNull); GetVisitor.prototype.visitBool = wrapGet(getBool); GetVisitor.prototype.visitInt = wrapGet(getInt); GetVisitor.prototype.visitInt8 = wrapGet(getNumeric); GetVisitor.prototype.visitInt16 = wrapGet(getNumeric); GetVisitor.prototype.visitInt32 = wrapGet(getNumeric); GetVisitor.prototype.visitInt64 = wrapGet(getBigInts); GetVisitor.prototype.visitUint8 = wrapGet(getNumeric); GetVisitor.prototype.visitUint16 = wrapGet(getNumeric); GetVisitor.prototype.visitUint32 = wrapGet(getNumeric); GetVisitor.prototype.visitUint64 = wrapGet(getBigInts); GetVisitor.prototype.visitFloat = wrapGet(getFloat); GetVisitor.prototype.visitFloat16 = wrapGet(getFloat16); GetVisitor.prototype.visitFloat32 = wrapGet(getNumeric); GetVisitor.prototype.visitFloat64 = wrapGet(getNumeric); GetVisitor.prototype.visitUtf8 = wrapGet(getUtf8); GetVisitor.prototype.visitLargeUtf8 = wrapGet(getUtf8); GetVisitor.prototype.visitBinary = wrapGet(getBinary); GetVisitor.prototype.visitLargeBinary = wrapGet(getBinary); GetVisitor.prototype.visitFixedSizeBinary = wrapGet(getFixedSizeBinary); GetVisitor.prototype.visitDate = wrapGet(getDate); GetVisitor.prototype.visitDateDay = wrapGet(getDateDay); GetVisitor.prototype.visitDateMillisecond = wrapGet(getDateMillisecond); GetVisitor.prototype.visitTimestamp = wrapGet(getTimestamp); GetVisitor.prototype.visitTimestampSecond = wrapGet(getTimestampSecond); GetVisitor.prototype.visitTimestampMillisecond = wrapGet(getTimestampMillisecond); GetVisitor.prototype.visitTimestampMicrosecond = wrapGet(getTimestampMicrosecond); GetVisitor.prototype.visitTimestampNanosecond = wrapGet(getTimestampNanosecond); GetVisitor.prototype.visitTime = wrapGet(getTime); GetVisitor.prototype.visitTimeSecond = wrapGet(getTimeSecond); GetVisitor.prototype.visitTimeMillisecond = wrapGet(getTimeMillisecond); GetVisitor.prototype.visitTimeMicrosecond = wrapGet(getTimeMicrosecond); GetVisitor.prototype.visitTimeNanosecond = wrapGet(getTimeNanosecond); GetVisitor.prototype.visitDecimal = wrapGet(getDecimal); GetVisitor.prototype.visitList = wrapGet(getList); GetVisitor.prototype.visitStruct = wrapGet(getStruct); GetVisitor.prototype.visitUnion = wrapGet(getUnion); GetVisitor.prototype.visitDenseUnion = wrapGet(getDenseUnion); GetVisitor.prototype.visitSparseUnion = wrapGet(getSparseUnion); GetVisitor.prototype.visitDictionary = wrapGet(getDictionary); GetVisitor.prototype.visitInterval = wrapGet(getInterval); GetVisitor.prototype.visitIntervalDayTime = wrapGet(getIntervalDayTime); GetVisitor.prototype.visitIntervalYearMonth = wrapGet(getIntervalYearMonth); GetVisitor.prototype.visitDuration = wrapGet(getDuration); GetVisitor.prototype.visitDurationSecond = wrapGet(getDurationSecond); GetVisitor.prototype.visitDurationMillisecond = wrapGet(getDurationMillisecond); GetVisitor.prototype.visitDurationMicrosecond = wrapGet(getDurationMicrosecond); GetVisitor.prototype.visitDurationNanosecond = wrapGet(getDurationNanosecond); GetVisitor.prototype.visitFixedSizeList = wrapGet(getFixedSizeList); GetVisitor.prototype.visitMap = wrapGet(getMap); /** @ignore */ export const instance = new GetVisitor();