package org.apache.hadoop.hive.ql.io.parquet.vector;

import java.io.IOException;
import java.time.ZoneId;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.TreeMap;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.common.io.DataCache;
import org.apache.hadoop.hive.common.io.FileMetadataCache;
import org.apache.hadoop.hive.common.io.encoded.MemoryBufferOrBuffers;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.llap.LlapCacheAwareFs;
import org.apache.hadoop.hive.llap.LlapUtil;
import org.apache.hadoop.hive.ql.exec.Utilities;
import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatchCtx;
import org.apache.hadoop.hive.ql.io.HdfsUtils;
import org.apache.hadoop.hive.ql.io.parquet.ParquetRecordReaderBase;
import org.apache.hadoop.hive.ql.io.parquet.ProjectionPusher;
import org.apache.hadoop.hive.ql.io.parquet.read.DataWritableReadSupport;
import org.apache.hadoop.hive.serde2.ColumnProjectionUtils;
import org.apache.hadoop.hive.serde2.SerDeStats;
import org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.mapred.FileSplit;
import org.apache.hadoop.mapred.InputSplit;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.RecordReader;
import org.apache.parquet.ParquetRuntimeException;
import org.apache.parquet.bytes.BytesUtils;
import org.apache.parquet.column.ColumnDescriptor;
import org.apache.parquet.column.page.PageReadStore;
import org.apache.parquet.filter2.compat.RowGroupFilter;
import org.apache.parquet.format.converter.ParquetMetadataConverter;
import org.apache.parquet.hadoop.ParquetFileReader;
import org.apache.parquet.hadoop.ParquetFileWriter;
import org.apache.parquet.hadoop.ParquetInputFormat;
import org.apache.parquet.hadoop.ParquetInputSplit;
import org.apache.parquet.hadoop.metadata.BlockMetaData;
import org.apache.parquet.hadoop.metadata.ColumnChunkMetaData;
import org.apache.parquet.hadoop.metadata.ColumnPath;
import org.apache.parquet.hadoop.metadata.ParquetMetadata;
import org.apache.parquet.hadoop.util.HadoopStreams;
import org.apache.parquet.io.InputFile;
import org.apache.parquet.io.SeekableInputStream;
import org.apache.parquet.schema.GroupType;
import org.apache.parquet.schema.InvalidSchemaException;
import org.apache.parquet.schema.MessageType;
import org.apache.parquet.schema.PrimitiveType;
import org.apache.parquet.schema.Type;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:org/apache/hadoop/hive/ql/io/parquet/vector/VectorizedParquetRecordReader.class */
public class VectorizedParquetRecordReader extends ParquetRecordReaderBase implements RecordReader<NullWritable, VectorizedRowBatch> {
    public static final Logger LOG = LoggerFactory.getLogger((Class<?>) VectorizedParquetRecordReader.class);
    private List<Integer> colsToInclude;
    protected MessageType fileSchema;
    protected MessageType requestedSchema;
    private List<String> columnNamesList;
    private List<TypeInfo> columnTypesList;
    private VectorizedRowBatchCtx rbCtx;
    private Object[] partitionValues;
    private Path cacheFsPath;
    private static final int MAP_DEFINITION_LEVEL_MAX = 3;
    private VectorizedColumnReader[] columnReaders;
    private long rowsReturned;
    private long totalCountLoadedSoFar;
    protected long totalRowCount;
    private ZoneId writerTimezone;
    private FileMetadataCache metadataCache;
    private DataCache cache;
    private Configuration cacheConf;

    public VectorizedParquetRecordReader(InputSplit inputSplit, JobConf jobConf) {
        this(inputSplit, jobConf, null, null, null);
    }

    public VectorizedParquetRecordReader(InputSplit inputSplit, JobConf jobConf, FileMetadataCache fileMetadataCache, DataCache dataCache, Configuration configuration) {
        this.rowsReturned = 0L;
        this.totalCountLoadedSoFar = 0L;
        this.totalRowCount = 0L;
        try {
            this.metadataCache = fileMetadataCache;
            this.cache = dataCache;
            this.cacheConf = configuration;
            this.serDeStats = new SerDeStats();
            this.projectionPusher = new ProjectionPusher();
            this.colsToInclude = ColumnProjectionUtils.getReadColumnIDs(jobConf);
            this.jobConf = jobConf;
            this.rbCtx = Utilities.getVectorizedRowBatchCtx(this.jobConf);
            ParquetInputSplit split = getSplit(inputSplit, jobConf);
            if (split != null) {
                initialize(split, jobConf);
            }
            initPartitionValues((FileSplit) inputSplit, jobConf);
        } catch (Throwable th) {
            LOG.error("Failed to create the vectorized reader due to exception " + th);
            throw new RuntimeException(th);
        }
    }

    private void initPartitionValues(FileSplit fileSplit, JobConf jobConf) throws IOException {
        int partitionColumnCount = this.rbCtx.getPartitionColumnCount();
        if (partitionColumnCount <= 0) {
            this.partitionValues = null;
        } else {
            this.partitionValues = new Object[partitionColumnCount];
            VectorizedRowBatchCtx.getPartitionValues(this.rbCtx, (Configuration) jobConf, fileSplit, this.partitionValues);
        }
    }

    public void initialize(org.apache.hadoop.mapreduce.InputSplit inputSplit, JobConf jobConf) throws IOException, InterruptedException {
        ParquetMetadata readSplitFooter;
        List<BlockMetaData> arrayList;
        if (inputSplit == null) {
            return;
        }
        ParquetInputSplit parquetInputSplit = (ParquetInputSplit) inputSplit;
        boolean z = jobConf.getBoolean(DataWritableReadSupport.PARQUET_COLUMN_INDEX_ACCESS, false);
        this.file = parquetInputSplit.getPath();
        long[] rowGroupOffsets = parquetInputSplit.getRowGroupOffsets();
        this.columnNamesList = DataWritableReadSupport.getColumnNames(jobConf.get("columns"));
        this.columnTypesList = DataWritableReadSupport.getColumnTypes(jobConf.get("columns.types"));
        Object fileId = this.metadataCache != null ? HdfsUtils.getFileId(this.file.getFileSystem(jobConf), this.file, HiveConf.getBoolVar(this.cacheConf, HiveConf.ConfVars.LLAP_CACHE_ALLOW_SYNTHETIC_FILEID), HiveConf.getBoolVar(this.cacheConf, HiveConf.ConfVars.LLAP_CACHE_DEFAULT_FS_FILE_ID)) : null;
        if (fileId != null) {
            r20 = HiveConf.getBoolVar(this.cacheConf, HiveConf.ConfVars.LLAP_TRACK_CACHE_USAGE) ? LlapUtil.getDbAndTableNameForMetrics(this.file, true) : null;
            FileSystem fileSystem = this.file.getFileSystem(jobConf);
            if ((fileId instanceof Long) && HiveConf.getBoolVar(this.cacheConf, HiveConf.ConfVars.LLAP_IO_USE_FILEID_PATH)) {
                this.file = HdfsUtils.getFileIdPath(fileSystem, this.file, ((Long) fileId).longValue());
            }
        }
        if (rowGroupOffsets == null) {
            readSplitFooter = readSplitFooter(jobConf, this.file, fileId, ParquetMetadataConverter.range(parquetInputSplit.getStart(), parquetInputSplit.getEnd()), r20);
            arrayList = RowGroupFilter.filterRowGroups(ParquetInputFormat.getFilter(jobConf), readSplitFooter.getBlocks(), readSplitFooter.getFileMetaData().getSchema());
        } else {
            readSplitFooter = readSplitFooter(jobConf, this.file, fileId, ParquetMetadataConverter.NO_FILTER, r20);
            HashSet hashSet = new HashSet();
            for (long j : rowGroupOffsets) {
                hashSet.add(Long.valueOf(j));
            }
            arrayList = new ArrayList();
            for (BlockMetaData blockMetaData : readSplitFooter.getBlocks()) {
                if (hashSet.contains(Long.valueOf(blockMetaData.getStartingPos()))) {
                    arrayList.add(blockMetaData);
                }
            }
            if (arrayList.size() != rowGroupOffsets.length) {
                long[] jArr = new long[readSplitFooter.getBlocks().size()];
                for (int i = 0; i < jArr.length; i++) {
                    jArr[i] = readSplitFooter.getBlocks().get(i).getStartingPos();
                }
                throw new IllegalStateException("All the offsets listed in the split should be found in the file. expected: " + Arrays.toString(rowGroupOffsets) + " found: " + arrayList + " out of: " + Arrays.toString(jArr) + " in range " + parquetInputSplit.getStart() + ", " + parquetInputSplit.getEnd());
            }
        }
        Iterator<BlockMetaData> it = arrayList.iterator();
        while (it.hasNext()) {
            this.totalRowCount += it.next().getRowCount();
        }
        this.fileSchema = readSplitFooter.getFileMetaData().getSchema();
        this.writerTimezone = DataWritableReadSupport.getWriterTimeZoneId(readSplitFooter.getFileMetaData().getKeyValueMetaData());
        this.colsToInclude = ColumnProjectionUtils.getReadColumnIDs(jobConf);
        this.requestedSchema = DataWritableReadSupport.getRequestedSchema(z, this.columnNamesList, this.columnTypesList, this.fileSchema, jobConf);
        this.reader = new ParquetFileReader(jobConf, readSplitFooter.getFileMetaData(), wrapPathForCache(this.file, fileId, jobConf, arrayList, r20), arrayList, this.requestedSchema.getColumns());
    }

    private Path wrapPathForCache(Path path, Object obj, JobConf jobConf, List<BlockMetaData> list, String str) throws IOException {
        if (obj == null || this.cache == null) {
            return path;
        }
        HashSet hashSet = new HashSet();
        Iterator<ColumnDescriptor> it = this.requestedSchema.getColumns().iterator();
        while (it.hasNext()) {
            hashSet.add(ColumnPath.get(it.next().getPath()));
        }
        TreeMap treeMap = new TreeMap();
        Iterator<BlockMetaData> it2 = list.iterator();
        while (it2.hasNext()) {
            for (ColumnChunkMetaData columnChunkMetaData : it2.next().getColumns()) {
                if (hashSet.contains(columnChunkMetaData.getPath())) {
                    treeMap.put(Long.valueOf(columnChunkMetaData.getStartingPos()), Long.valueOf(columnChunkMetaData.getStartingPos() + columnChunkMetaData.getTotalSize()));
                }
            }
        }
        jobConf.set("fs.llapcache.impl", LlapCacheAwareFs.class.getCanonicalName());
        Path registerFile = LlapCacheAwareFs.registerFile(this.cache, path, obj, treeMap, jobConf, str);
        this.cacheFsPath = registerFile;
        return registerFile;
    }

    private ParquetMetadata readSplitFooter(JobConf jobConf, Path path, Object obj, ParquetMetadataConverter.MetadataFilter metadataFilter, String str) throws IOException {
        MemoryBufferOrBuffers fileMetadata = (obj == null || this.metadataCache == null) ? null : this.metadataCache.getFileMetadata(obj);
        if (fileMetadata != null) {
            if (LOG.isInfoEnabled()) {
                LOG.info("Found the footer in cache for " + obj);
            }
            try {
                ParquetMetadata readFooter = ParquetFileReader.readFooter(new ParquetFooterInputFromCache(fileMetadata), metadataFilter);
                this.metadataCache.decRefBuffer(fileMetadata);
                return readFooter;
            } catch (Throwable th) {
                this.metadataCache.decRefBuffer(fileMetadata);
                throw th;
            }
        }
        FileSystem fileSystem = path.getFileSystem(jobConf);
        FileStatus fileStatus = fileSystem.getFileStatus(path);
        if (obj == null || this.metadataCache == null) {
            return readFooterFromFile(path, fileSystem, fileStatus, metadataFilter);
        }
        SeekableInputStream wrap = HadoopStreams.wrap(fileSystem.open(path));
        Throwable th2 = null;
        try {
            long len = (fileStatus.getLen() - 4) - ParquetFileWriter.MAGIC.length;
            wrap.seek(len);
            int readIntLittleEndian = BytesUtils.readIntLittleEndian(wrap);
            wrap.seek(len - readIntLittleEndian);
            if (LOG.isInfoEnabled()) {
                LOG.info("Caching the footer of length " + readIntLittleEndian + " for " + obj);
            }
            MemoryBufferOrBuffers putFileMetadata = this.metadataCache.putFileMetadata(obj, readIntLittleEndian, wrap, str, null);
            try {
                ParquetMetadata readFooter2 = ParquetFileReader.readFooter(new ParquetFooterInputFromCache(putFileMetadata), metadataFilter);
                this.metadataCache.decRefBuffer(putFileMetadata);
                if (wrap != null) {
                    if (0 != 0) {
                        try {
                            wrap.close();
                        } catch (Throwable th3) {
                            th2.addSuppressed(th3);
                        }
                    } else {
                        wrap.close();
                    }
                }
                return readFooter2;
            } catch (Throwable th4) {
                this.metadataCache.decRefBuffer(putFileMetadata);
                throw th4;
            }
        } catch (Throwable th5) {
            if (wrap != null) {
                if (0 != 0) {
                    try {
                        wrap.close();
                    } catch (Throwable th6) {
                        th2.addSuppressed(th6);
                    }
                } else {
                    wrap.close();
                }
            }
            throw th5;
        }
    }

    private ParquetMetadata readFooterFromFile(final Path path, final FileSystem fileSystem, final FileStatus fileStatus, ParquetMetadataConverter.MetadataFilter metadataFilter) throws IOException {
        return ParquetFileReader.readFooter(new InputFile() { // from class: org.apache.hadoop.hive.ql.io.parquet.vector.VectorizedParquetRecordReader.1
            @Override // org.apache.parquet.io.InputFile
            public SeekableInputStream newStream() throws IOException {
                return HadoopStreams.wrap(fileSystem.open(path));
            }

            @Override // org.apache.parquet.io.InputFile
            public long getLength() throws IOException {
                return fileStatus.getLen();
            }
        }, metadataFilter);
    }

    public boolean next(NullWritable nullWritable, VectorizedRowBatch vectorizedRowBatch) throws IOException {
        return nextBatch(vectorizedRowBatch);
    }

    /* renamed from: createKey, reason: merged with bridge method [inline-methods] */
    public NullWritable m4591createKey() {
        return NullWritable.get();
    }

    /* renamed from: createValue, reason: merged with bridge method [inline-methods] */
    public VectorizedRowBatch m4590createValue() {
        return this.rbCtx.createVectorizedRowBatch();
    }

    public long getPos() throws IOException {
        return 0L;
    }

    public void close() throws IOException {
        if (this.cacheFsPath != null) {
            LlapCacheAwareFs.unregisterFile(this.cacheFsPath);
        }
        if (this.reader != null) {
            this.reader.close();
        }
    }

    public float getProgress() throws IOException {
        return 0.0f;
    }

    private boolean nextBatch(VectorizedRowBatch vectorizedRowBatch) throws IOException {
        vectorizedRowBatch.reset();
        if (this.rowsReturned >= this.totalRowCount) {
            return false;
        }
        if (this.partitionValues != null) {
            this.rbCtx.addPartitionColsToBatch(vectorizedRowBatch, this.partitionValues);
        }
        checkEndOfRowGroup();
        int min = (int) Math.min(1024L, this.totalCountLoadedSoFar - this.rowsReturned);
        if (this.colsToInclude.size() > 0) {
            for (int i = 0; i < this.columnReaders.length; i++) {
                if (this.columnReaders[i] != null) {
                    vectorizedRowBatch.cols[this.colsToInclude.get(i).intValue()].isRepeating = true;
                    this.columnReaders[i].readBatch(min, vectorizedRowBatch.cols[this.colsToInclude.get(i).intValue()], this.columnTypesList.get(this.colsToInclude.get(i).intValue()));
                }
            }
        }
        this.rowsReturned += min;
        vectorizedRowBatch.size = min;
        return true;
    }

    private void checkEndOfRowGroup() throws IOException {
        if (this.rowsReturned != this.totalCountLoadedSoFar) {
            return;
        }
        PageReadStore readNextRowGroup = this.reader.readNextRowGroup();
        if (readNextRowGroup == null) {
            throw new IOException("expecting more rows but reached last block. Read " + this.rowsReturned + " out of " + this.totalRowCount);
        }
        List<ColumnDescriptor> columns = this.requestedSchema.getColumns();
        List<Type> fields = this.requestedSchema.getFields();
        this.columnReaders = new VectorizedColumnReader[columns.size()];
        if (ColumnProjectionUtils.isReadAllColumns(this.jobConf)) {
            for (int i = 0; i < fields.size(); i++) {
                this.columnReaders[i] = buildVectorizedParquetReader(this.columnTypesList.get(i), fields.get(i), readNextRowGroup, this.requestedSchema.getColumns(), this.skipTimestampConversion, this.writerTimezone, 0);
            }
        } else if (!this.colsToInclude.isEmpty()) {
            for (int i2 = 0; i2 < fields.size(); i2++) {
                this.columnReaders[i2] = buildVectorizedParquetReader(this.columnTypesList.get(this.colsToInclude.get(i2).intValue()), fields.get(i2), readNextRowGroup, this.requestedSchema.getColumns(), this.skipTimestampConversion, this.writerTimezone, 0);
            }
        }
        this.totalCountLoadedSoFar += readNextRowGroup.getRowCount();
    }

    private List<ColumnDescriptor> getAllColumnDescriptorByType(int i, Type type, List<ColumnDescriptor> list) throws ParquetRuntimeException {
        ArrayList arrayList = new ArrayList();
        for (ColumnDescriptor columnDescriptor : list) {
            if (i >= columnDescriptor.getPath().length) {
                throw new InvalidSchemaException("Corrupted Parquet schema");
            }
            if (type.getName().equals(columnDescriptor.getPath()[i])) {
                arrayList.add(columnDescriptor);
            }
        }
        return arrayList;
    }

    private PrimitiveType getElementType(Type type) {
        if (type.isPrimitive()) {
            return type.asPrimitiveType();
        }
        if (type.asGroupType().getFields().size() > 1) {
            throw new RuntimeException("Current Parquet Vectorization reader doesn't support nested type");
        }
        return type.asGroupType().getFields().get(0).asGroupType().getFields().get(0).asPrimitiveType();
    }

    private VectorizedColumnReader buildVectorizedParquetReader(TypeInfo typeInfo, Type type, PageReadStore pageReadStore, List<ColumnDescriptor> list, boolean z, ZoneId zoneId, int i) throws IOException {
        List<ColumnDescriptor> allColumnDescriptorByType = getAllColumnDescriptorByType(i, type, list);
        switch (typeInfo.getCategory()) {
            case PRIMITIVE:
                if (list == null || list.isEmpty()) {
                    throw new RuntimeException("Failed to find related Parquet column descriptor with type " + type);
                }
                return this.fileSchema.getColumns().contains(allColumnDescriptorByType.get(0)) ? new VectorizedPrimitiveColumnReader(allColumnDescriptorByType.get(0), pageReadStore.getPageReader(allColumnDescriptorByType.get(0)), z, zoneId, type, typeInfo) : new VectorizedDummyColumnReader();
            case STRUCT:
                ArrayList arrayList = new ArrayList();
                ArrayList<TypeInfo> allStructFieldTypeInfos = ((StructTypeInfo) typeInfo).getAllStructFieldTypeInfos();
                List<Type> fields = type.asGroupType().getFields();
                for (int i2 = 0; i2 < allStructFieldTypeInfos.size(); i2++) {
                    VectorizedColumnReader buildVectorizedParquetReader = buildVectorizedParquetReader(allStructFieldTypeInfos.get(i2), fields.get(i2), pageReadStore, allColumnDescriptorByType, z, zoneId, i + 1);
                    if (buildVectorizedParquetReader == null) {
                        throw new RuntimeException("Fail to build Parquet vectorized reader based on Hive type " + allStructFieldTypeInfos.get(i2).getTypeName() + " and Parquet type" + fields.get(i2).toString());
                    }
                    arrayList.add(buildVectorizedParquetReader);
                }
                return new VectorizedStructColumnReader(arrayList);
            case LIST:
                checkListColumnSupport(((ListTypeInfo) typeInfo).getListElementTypeInfo());
                if (list == null || list.isEmpty()) {
                    throw new RuntimeException("Failed to find related Parquet column descriptor with type " + type);
                }
                return new VectorizedListColumnReader(allColumnDescriptorByType.get(0), pageReadStore.getPageReader(allColumnDescriptorByType.get(0)), z, zoneId, getElementType(type), typeInfo);
            case MAP:
                if (list == null || list.isEmpty()) {
                    throw new RuntimeException("Failed to find related Parquet column descriptor with type " + type);
                }
                int i3 = 0;
                GroupType asGroupType = type.asGroupType();
                while (asGroupType.getFieldCount() < 2) {
                    if (i3 > 3) {
                        throw new RuntimeException("More than 3 level is found in Map definition, Failed to get the field types for Map with type " + type);
                    }
                    asGroupType = asGroupType.getFields().get(0).asGroupType();
                    i3++;
                }
                List<Type> fields2 = asGroupType.getFields();
                return new VectorizedMapColumnReader(new VectorizedListColumnReader(allColumnDescriptorByType.get(0), pageReadStore.getPageReader(allColumnDescriptorByType.get(0)), z, zoneId, fields2.get(0), typeInfo), new VectorizedListColumnReader(allColumnDescriptorByType.get(1), pageReadStore.getPageReader(allColumnDescriptorByType.get(1)), z, zoneId, fields2.get(1), typeInfo));
            case UNION:
            default:
                throw new RuntimeException("Unsupported category " + typeInfo.getCategory().name());
        }
    }

    private void checkListColumnSupport(TypeInfo typeInfo) {
        if (!(typeInfo instanceof PrimitiveTypeInfo)) {
            throw new RuntimeException("Unsupported type used in list:" + typeInfo);
        }
        switch (((PrimitiveTypeInfo) typeInfo).getPrimitiveCategory()) {
            case INTERVAL_DAY_TIME:
            case TIMESTAMP:
                throw new RuntimeException("Unsupported primitive type used in list:: " + typeInfo);
            default:
                return;
        }
    }
}
