package org.apache.iceberg.shaded.org.apache.parquet.hadoop.rewrite;

import java.io.Closeable;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.Queue;
import java.util.Set;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import org.apache.hadoop.conf.Configuration;
import org.apache.iceberg.TableProperties;
import org.apache.iceberg.shaded.org.apache.parquet.ParquetReadOptions;
import org.apache.iceberg.shaded.org.apache.parquet.Preconditions;
import org.apache.iceberg.shaded.org.apache.parquet.bytes.BytesInput;
import org.apache.iceberg.shaded.org.apache.parquet.column.ColumnDescriptor;
import org.apache.iceberg.shaded.org.apache.parquet.column.ColumnReader;
import org.apache.iceberg.shaded.org.apache.parquet.column.ColumnWriteStore;
import org.apache.iceberg.shaded.org.apache.parquet.column.ColumnWriter;
import org.apache.iceberg.shaded.org.apache.parquet.column.ParquetProperties;
import org.apache.iceberg.shaded.org.apache.parquet.column.impl.ColumnReadStoreImpl;
import org.apache.iceberg.shaded.org.apache.parquet.column.page.DictionaryPage;
import org.apache.iceberg.shaded.org.apache.parquet.column.page.PageReadStore;
import org.apache.iceberg.shaded.org.apache.parquet.column.statistics.Statistics;
import org.apache.iceberg.shaded.org.apache.parquet.column.values.bloomfilter.BloomFilter;
import org.apache.iceberg.shaded.org.apache.parquet.compression.CompressionCodecFactory;
import org.apache.iceberg.shaded.org.apache.parquet.conf.ParquetConfiguration;
import org.apache.iceberg.shaded.org.apache.parquet.crypto.AesCipher;
import org.apache.iceberg.shaded.org.apache.parquet.crypto.InternalColumnEncryptionSetup;
import org.apache.iceberg.shaded.org.apache.parquet.crypto.InternalFileEncryptor;
import org.apache.iceberg.shaded.org.apache.parquet.crypto.ModuleCipherFactory;
import org.apache.iceberg.shaded.org.apache.parquet.format.BlockCipher;
import org.apache.iceberg.shaded.org.apache.parquet.format.DataPageHeader;
import org.apache.iceberg.shaded.org.apache.parquet.format.DataPageHeaderV2;
import org.apache.iceberg.shaded.org.apache.parquet.format.DictionaryPageHeader;
import org.apache.iceberg.shaded.org.apache.parquet.format.PageHeader;
import org.apache.iceberg.shaded.org.apache.parquet.format.converter.ParquetMetadataConverter;
import org.apache.iceberg.shaded.org.apache.parquet.hadoop.CodecFactory;
import org.apache.iceberg.shaded.org.apache.parquet.hadoop.ColumnChunkPageWriteStore;
import org.apache.iceberg.shaded.org.apache.parquet.hadoop.IndexCache;
import org.apache.iceberg.shaded.org.apache.parquet.hadoop.ParquetFileWriter;
import org.apache.iceberg.shaded.org.apache.parquet.hadoop.ParquetWriter;
import org.apache.iceberg.shaded.org.apache.parquet.hadoop.metadata.BlockMetaData;
import org.apache.iceberg.shaded.org.apache.parquet.hadoop.metadata.ColumnChunkMetaData;
import org.apache.iceberg.shaded.org.apache.parquet.hadoop.metadata.ColumnPath;
import org.apache.iceberg.shaded.org.apache.parquet.hadoop.metadata.CompressionCodecName;
import org.apache.iceberg.shaded.org.apache.parquet.hadoop.metadata.ParquetMetadata;
import org.apache.iceberg.shaded.org.apache.parquet.hadoop.util.CompressionConverter;
import org.apache.iceberg.shaded.org.apache.parquet.hadoop.util.HadoopCodecs;
import org.apache.iceberg.shaded.org.apache.parquet.internal.column.columnindex.ColumnIndex;
import org.apache.iceberg.shaded.org.apache.parquet.internal.column.columnindex.OffsetIndex;
import org.apache.iceberg.shaded.org.apache.parquet.io.InputFile;
import org.apache.iceberg.shaded.org.apache.parquet.io.OutputFile;
import org.apache.iceberg.shaded.org.apache.parquet.io.ParquetEncodingException;
import org.apache.iceberg.shaded.org.apache.parquet.io.api.Converter;
import org.apache.iceberg.shaded.org.apache.parquet.io.api.GroupConverter;
import org.apache.iceberg.shaded.org.apache.parquet.io.api.PrimitiveConverter;
import org.apache.iceberg.shaded.org.apache.parquet.schema.GroupType;
import org.apache.iceberg.shaded.org.apache.parquet.schema.InvalidSchemaException;
import org.apache.iceberg.shaded.org.apache.parquet.schema.MessageType;
import org.apache.iceberg.shaded.org.apache.parquet.schema.PrimitiveType;
import org.apache.iceberg.shaded.org.apache.parquet.schema.Type;
import org.projectnessie.model.Util;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:org/apache/iceberg/shaded/org/apache/parquet/hadoop/rewrite/ParquetRewriter.class */
public class ParquetRewriter implements Closeable {
    public static final String ORIGINAL_CREATED_BY_KEY = "original.created.by";
    private static final Logger LOG = LoggerFactory.getLogger(ParquetRewriter.class);
    private final int pageBufferSize = 2097152;
    private final byte[] pageBuffer;
    private final CompressionCodecName newCodecName;
    private Map<ColumnPath, MaskMode> maskColumns;
    private Set<ColumnPath> encryptColumns;
    private boolean encryptMode;
    private final Map<String, String> extraMetaData;
    private final ParquetFileWriter writer;
    private int numBlocksRewritten;
    private final Queue<CompressionConverter.TransParquetFileReader> inputFiles;
    private final Queue<CompressionConverter.TransParquetFileReader> inputFilesToJoin;
    private final MessageType outSchema;
    private final IndexCache.CacheStrategy indexCacheStrategy;
    private final boolean overwriteInputWithJoinColumns;
    private final InternalFileEncryptor nullColumnEncryptor;
    private final Map<String, String> renamedColumns;

    /* JADX INFO: Access modifiers changed from: private */
    /* loaded from: input_file:org/apache/iceberg/shaded/org/apache/parquet/hadoop/rewrite/ParquetRewriter$ColumnChunkEncryptorRunTime.class */
    public static class ColumnChunkEncryptorRunTime {
        private final InternalColumnEncryptionSetup colEncrSetup;
        private final BlockCipher.Encryptor dataEncryptor;
        private final BlockCipher.Encryptor metaDataEncryptor;
        private final byte[] fileAAD;
        private final byte[] dataPageHeaderAAD;
        private final byte[] dataPageAAD;
        private final byte[] dictPageHeaderAAD;
        private final byte[] dictPageAAD;

        public ColumnChunkEncryptorRunTime(InternalFileEncryptor internalFileEncryptor, ColumnChunkMetaData columnChunkMetaData, int i, int i2) throws IOException {
            Preconditions.checkArgument(internalFileEncryptor != null, "FileEncryptor is required to create ColumnChunkEncryptorRunTime");
            this.colEncrSetup = internalFileEncryptor.getColumnSetup(columnChunkMetaData.getPath(), true, i2);
            this.dataEncryptor = this.colEncrSetup.getDataEncryptor();
            this.metaDataEncryptor = this.colEncrSetup.getMetaDataEncryptor();
            this.fileAAD = internalFileEncryptor.getFileAAD();
            if (this.colEncrSetup == null || !this.colEncrSetup.isEncrypted()) {
                this.dataPageHeaderAAD = null;
                this.dataPageAAD = null;
                this.dictPageHeaderAAD = null;
                this.dictPageAAD = null;
                return;
            }
            this.dataPageHeaderAAD = createAAD(ModuleCipherFactory.ModuleType.DataPageHeader, i, i2);
            this.dataPageAAD = createAAD(ModuleCipherFactory.ModuleType.DataPage, i, i2);
            this.dictPageHeaderAAD = createAAD(ModuleCipherFactory.ModuleType.DictionaryPageHeader, i, i2);
            this.dictPageAAD = createAAD(ModuleCipherFactory.ModuleType.DictionaryPage, i, i2);
        }

        private byte[] createAAD(ModuleCipherFactory.ModuleType moduleType, int i, int i2) {
            return AesCipher.createModuleAAD(this.fileAAD, moduleType, i, i2, 0);
        }

        public BlockCipher.Encryptor getDataEncryptor() {
            return this.dataEncryptor;
        }

        public BlockCipher.Encryptor getMetaDataEncryptor() {
            return this.metaDataEncryptor;
        }

        public byte[] getDataPageHeaderAAD() {
            return this.dataPageHeaderAAD;
        }

        public byte[] getDataPageAAD() {
            return this.dataPageAAD;
        }

        public byte[] getDictPageHeaderAAD() {
            return this.dictPageHeaderAAD;
        }

        public byte[] getDictPageAAD() {
            return this.dictPageAAD;
        }
    }

    /* loaded from: input_file:org/apache/iceberg/shaded/org/apache/parquet/hadoop/rewrite/ParquetRewriter$DummyConverter.class */
    private static final class DummyConverter extends PrimitiveConverter {
        private DummyConverter() {
        }

        @Override // org.apache.iceberg.shaded.org.apache.parquet.io.api.Converter
        public GroupConverter asGroupConverter() {
            return new DummyGroupConverter();
        }
    }

    /* JADX INFO: Access modifiers changed from: private */
    /* loaded from: input_file:org/apache/iceberg/shaded/org/apache/parquet/hadoop/rewrite/ParquetRewriter$DummyGroupConverter.class */
    public static final class DummyGroupConverter extends GroupConverter {
        private DummyGroupConverter() {
        }

        @Override // org.apache.iceberg.shaded.org.apache.parquet.io.api.GroupConverter
        public void start() {
        }

        @Override // org.apache.iceberg.shaded.org.apache.parquet.io.api.GroupConverter
        public void end() {
        }

        @Override // org.apache.iceberg.shaded.org.apache.parquet.io.api.GroupConverter
        public Converter getConverter(int i) {
            return new DummyConverter();
        }
    }

    public ParquetRewriter(RewriteOptions rewriteOptions) throws IOException {
        this.pageBufferSize = TableProperties.PARQUET_DICT_SIZE_BYTES_DEFAULT;
        this.pageBuffer = new byte[TableProperties.PARQUET_DICT_SIZE_BYTES_DEFAULT];
        this.maskColumns = null;
        this.encryptColumns = null;
        this.encryptMode = false;
        this.numBlocksRewritten = 0;
        this.inputFiles = new LinkedList();
        this.inputFilesToJoin = new LinkedList();
        this.newCodecName = rewriteOptions.getNewCodecName();
        this.indexCacheStrategy = rewriteOptions.getIndexCacheStrategy();
        this.overwriteInputWithJoinColumns = rewriteOptions.getOverwriteInputWithJoinColumns();
        this.renamedColumns = rewriteOptions.getRenameColumns();
        ParquetConfiguration parquetConfiguration = rewriteOptions.getParquetConfiguration();
        this.inputFiles.addAll(getFileReaders(rewriteOptions.getParquetInputFiles(), parquetConfiguration));
        this.inputFilesToJoin.addAll(getFileReaders(rewriteOptions.getParquetInputFilesToJoin(), parquetConfiguration));
        this.outSchema = pruneColumnsInSchema(getSchema(), rewriteOptions.getPruneColumns());
        this.extraMetaData = getExtraMetadata(rewriteOptions);
        ensureSameSchema(this.inputFiles);
        ensureSameSchema(this.inputFilesToJoin);
        ensureRowCount();
        ensureRenamingCorrectness(this.outSchema, this.renamedColumns);
        OutputFile parquetOutputFile = rewriteOptions.getParquetOutputFile();
        LOG.info("Start rewriting {} input file(s) {} to {}", new Object[]{Integer.valueOf(this.inputFiles.size() + this.inputFilesToJoin.size()), Stream.concat(rewriteOptions.getParquetInputFiles().stream(), rewriteOptions.getParquetInputFilesToJoin().stream()).collect(Collectors.toList()), parquetOutputFile});
        if (rewriteOptions.getMaskColumns() != null) {
            this.maskColumns = new HashMap();
            for (Map.Entry<String, MaskMode> entry : rewriteOptions.getMaskColumns().entrySet()) {
                this.maskColumns.put(ColumnPath.fromDotString(entry.getKey()), entry.getValue());
            }
        }
        if (rewriteOptions.getEncryptColumns() != null && rewriteOptions.getFileEncryptionProperties() != null) {
            this.encryptColumns = convertToColumnPaths(rewriteOptions.getEncryptColumns());
            this.encryptMode = true;
        }
        this.writer = new ParquetFileWriter(parquetOutputFile, this.renamedColumns.isEmpty() ? this.outSchema : getSchemaWithRenamedColumns(this.outSchema), ParquetFileWriter.Mode.CREATE, 134217728L, ParquetWriter.MAX_PADDING_SIZE_DEFAULT, 64, Integer.MAX_VALUE, true, rewriteOptions.getFileEncryptionProperties());
        this.writer.start();
        if (rewriteOptions.getFileEncryptionProperties() == null) {
            this.nullColumnEncryptor = null;
            return;
        }
        this.nullColumnEncryptor = new InternalFileEncryptor(rewriteOptions.getFileEncryptionProperties());
        List<ColumnDescriptor> columns = getSchemaWithRenamedColumns(this.outSchema).getColumns();
        for (int i = 0; i < columns.size(); i++) {
            this.writer.getEncryptor().getColumnSetup(ColumnPath.get(columns.get(i).getPath()), true, i);
        }
    }

    public ParquetRewriter(CompressionConverter.TransParquetFileReader transParquetFileReader, ParquetFileWriter parquetFileWriter, ParquetMetadata parquetMetadata, MessageType messageType, String str, CompressionCodecName compressionCodecName, List<String> list, MaskMode maskMode) {
        this.pageBufferSize = TableProperties.PARQUET_DICT_SIZE_BYTES_DEFAULT;
        this.pageBuffer = new byte[TableProperties.PARQUET_DICT_SIZE_BYTES_DEFAULT];
        this.maskColumns = null;
        this.encryptColumns = null;
        this.encryptMode = false;
        this.numBlocksRewritten = 0;
        this.inputFiles = new LinkedList();
        this.inputFilesToJoin = new LinkedList();
        this.writer = parquetFileWriter;
        this.outSchema = messageType;
        this.newCodecName = compressionCodecName;
        this.extraMetaData = new HashMap(parquetMetadata.getFileMetaData().getKeyValueMetaData());
        this.extraMetaData.put(ORIGINAL_CREATED_BY_KEY, str != null ? str : parquetMetadata.getFileMetaData().getCreatedBy());
        if (list != null && maskMode != null) {
            this.maskColumns = new HashMap();
            Iterator<String> it = list.iterator();
            while (it.hasNext()) {
                this.maskColumns.put(ColumnPath.fromDotString(it.next()), maskMode);
            }
        }
        this.inputFiles.add(transParquetFileReader);
        this.indexCacheStrategy = IndexCache.CacheStrategy.NONE;
        this.overwriteInputWithJoinColumns = false;
        this.nullColumnEncryptor = null;
        this.renamedColumns = new HashMap();
    }

    private MessageType getSchema() {
        MessageType schema = this.inputFiles.peek().getFooter().getFileMetaData().getSchema();
        if (this.inputFilesToJoin.isEmpty()) {
            return schema;
        }
        LinkedHashMap linkedHashMap = new LinkedHashMap();
        schema.getFields().forEach(type -> {
        });
        this.inputFilesToJoin.peek().getFooter().getFileMetaData().getSchema().getFields().forEach(type2 -> {
            if (!linkedHashMap.containsKey(type2.getName())) {
                linkedHashMap.put(type2.getName(), type2);
            } else if (this.overwriteInputWithJoinColumns) {
                LOG.info("Column {} in inputFiles is overwritten by inputFilesToJoin side", type2.getName());
                linkedHashMap.put(type2.getName(), type2);
            }
        });
        return new MessageType(schema.getName(), new ArrayList(linkedHashMap.values()));
    }

    private MessageType getSchemaWithRenamedColumns(MessageType messageType) {
        return new MessageType(messageType.getName(), (List<Type>) messageType.getFields().stream().map(type -> {
            return !this.renamedColumns.containsKey(type.getName()) ? type : type.isPrimitive() ? new PrimitiveType(type.getRepetition(), type.asPrimitiveType().getPrimitiveTypeName(), this.renamedColumns.get(type.getName())) : new GroupType(type.getRepetition(), this.renamedColumns.get(type.getName()), type.asGroupType().getFields());
        }).collect(Collectors.toList()));
    }

    private Map<String, String> getExtraMetadata(RewriteOptions rewriteOptions) {
        List arrayList = rewriteOptions.getIgnoreJoinFilesMetadata() ? new ArrayList(this.inputFiles) : (List) Stream.concat(this.inputFiles.stream(), this.inputFilesToJoin.stream()).collect(Collectors.toList());
        HashMap hashMap = new HashMap();
        hashMap.put(ORIGINAL_CREATED_BY_KEY, ((Set) arrayList.stream().map(transParquetFileReader -> {
            return transParquetFileReader.getFooter().getFileMetaData().getCreatedBy();
        }).collect(Collectors.toSet())).stream().reduce((str, str2) -> {
            return str + "\n" + str2;
        }).orElse(""));
        arrayList.forEach(transParquetFileReader2 -> {
            hashMap.putAll(transParquetFileReader2.getFileMetaData().getKeyValueMetaData());
        });
        return hashMap;
    }

    private void ensureRowCount() {
        if (this.inputFilesToJoin.isEmpty()) {
            return;
        }
        List list = (List) this.inputFiles.stream().flatMap(transParquetFileReader -> {
            return transParquetFileReader.getFooter().getBlocks().stream().map((v0) -> {
                return v0.getRowCount();
            });
        }).collect(Collectors.toList());
        List list2 = (List) this.inputFilesToJoin.stream().flatMap(transParquetFileReader2 -> {
            return transParquetFileReader2.getFooter().getBlocks().stream().map((v0) -> {
                return v0.getRowCount();
            });
        }).collect(Collectors.toList());
        if (!list.equals(list2)) {
            throw new IllegalArgumentException("The number of rows in each block must match! Left blocks row counts: " + list + ", right blocks row counts" + list2 + Util.DOT_STRING);
        }
    }

    private Queue<CompressionConverter.TransParquetFileReader> getFileReaders(List<InputFile> list, ParquetConfiguration parquetConfiguration) {
        LinkedList linkedList = new LinkedList();
        for (InputFile inputFile : list) {
            try {
                linkedList.add(new CompressionConverter.TransParquetFileReader(inputFile, ParquetReadOptions.builder(parquetConfiguration).build()));
            } catch (IOException e) {
                throw new IllegalArgumentException("Failed to open input file: " + inputFile, e);
            }
        }
        return linkedList;
    }

    private void ensureSameSchema(Queue<CompressionConverter.TransParquetFileReader> queue) {
        MessageType messageType = null;
        for (CompressionConverter.TransParquetFileReader transParquetFileReader : queue) {
            MessageType schema = transParquetFileReader.getFooter().getFileMetaData().getSchema();
            if (messageType == null) {
                messageType = schema;
            } else if (!messageType.equals((Object) schema)) {
                String file = transParquetFileReader.getFile();
                LOG.error("Input files have different schemas, expect: {}, input: {}, current file: {}", new Object[]{messageType, schema, file});
                throw new InvalidSchemaException("Input files have different schemas, current file: " + file);
            }
        }
    }

    private void ensureRenamingCorrectness(MessageType messageType, Map<String, String> map) {
        Set set = (Set) messageType.getFields().stream().map((v0) -> {
            return v0.getName();
        }).collect(Collectors.toSet());
        map.forEach((str, str2) -> {
            if (!set.contains(str)) {
                String format = String.format("Column to rename '%s' is not found in input files schema", str);
                LOG.error(format);
                throw new IllegalArgumentException(format);
            }
            if (set.contains(str2)) {
                String format2 = String.format("Renamed column target name '%s' is already present in a schema", str2);
                LOG.error(format2);
                throw new IllegalArgumentException(format2);
            }
        });
    }

    @Override // java.io.Closeable, java.lang.AutoCloseable
    public void close() throws IOException {
        this.writer.end(this.extraMetaData);
    }

    public void processBlocks() throws IOException {
        CompressionConverter.TransParquetFileReader transParquetFileReader = null;
        IndexCache indexCache = null;
        int i = 0;
        List<ColumnDescriptor> columns = this.outSchema.getColumns();
        while (!this.inputFiles.isEmpty()) {
            CompressionConverter.TransParquetFileReader poll = this.inputFiles.poll();
            LOG.info("Rewriting input file: {}, remaining files: {}", poll.getFile(), Integer.valueOf(this.inputFiles.size()));
            ParquetMetadata footer = poll.getFooter();
            Set set = (Set) footer.getFileMetaData().getSchema().getColumns().stream().map(columnDescriptor -> {
                return ColumnPath.get(columnDescriptor.getPath());
            }).collect(Collectors.toSet());
            IndexCache create = IndexCache.create(poll, set, this.indexCacheStrategy, true);
            for (int i2 = 0; i2 < footer.getBlocks().size(); i2++) {
                BlockMetaData blockMetaData = footer.getBlocks().get(i2);
                this.writer.startBlock(blockMetaData.getRowCount());
                create.setBlockMetadata(blockMetaData);
                Map map = (Map) blockMetaData.getColumns().stream().collect(Collectors.toMap(columnChunkMetaData -> {
                    return columnChunkMetaData.getPath();
                }, columnChunkMetaData2 -> {
                    return columnChunkMetaData2;
                }));
                if (!this.inputFilesToJoin.isEmpty()) {
                    if (transParquetFileReader != null) {
                        int i3 = i + 1;
                        if (i3 != transParquetFileReader.getFooter().getBlocks().size()) {
                            i = i3 + 1;
                            indexCache.setBlockMetadata(transParquetFileReader.getFooter().getBlocks().get(i));
                        }
                    }
                    if (transParquetFileReader != null) {
                        transParquetFileReader.close();
                    }
                    i = 0;
                    transParquetFileReader = this.inputFilesToJoin.poll();
                    Set set2 = (Set) transParquetFileReader.getFileMetaData().getSchema().getColumns().stream().map(columnDescriptor2 -> {
                        return ColumnPath.get(columnDescriptor2.getPath());
                    }).collect(Collectors.toSet());
                    if (indexCache != null) {
                        indexCache.clean();
                    }
                    indexCache = IndexCache.create(transParquetFileReader, set2, this.indexCacheStrategy, true);
                    indexCache.setBlockMetadata(transParquetFileReader.getFooter().getBlocks().get(0));
                }
                for (int i4 = 0; i4 < columns.size(); i4++) {
                    ColumnPath columnPath = ColumnPath.get(columns.get(i4).getPath());
                    if (transParquetFileReader != null) {
                        Optional<ColumnChunkMetaData> findFirst = transParquetFileReader.getFooter().getBlocks().get(i).getColumns().stream().filter(columnChunkMetaData3 -> {
                            return columnChunkMetaData3.getPath().equals(columnPath);
                        }).findFirst();
                        if (!findFirst.isPresent() || (!this.overwriteInputWithJoinColumns && set.contains(columnPath))) {
                            processBlock(poll, i2, i4, create, (ColumnChunkMetaData) map.get(columnPath));
                        } else {
                            processBlock(transParquetFileReader, i, i4, indexCache, findFirst.get());
                        }
                    } else {
                        processBlock(poll, i2, i4, create, (ColumnChunkMetaData) map.get(columnPath));
                    }
                }
                this.writer.endBlock();
                create.clean();
                this.numBlocksRewritten++;
            }
            create.clean();
            LOG.info("Finish rewriting input file: {}", poll.getFile());
            poll.close();
        }
        if (transParquetFileReader != null) {
            transParquetFileReader.close();
        }
    }

    private ColumnPath normalizeFieldsInPath(ColumnPath columnPath) {
        if (this.renamedColumns.isEmpty()) {
            return columnPath;
        }
        String[] array = columnPath.toArray();
        array[0] = this.renamedColumns.getOrDefault(array[0], array[0]);
        return ColumnPath.get(array);
    }

    private PrimitiveType normalizeNameInType(PrimitiveType primitiveType) {
        return this.renamedColumns.isEmpty() ? primitiveType : new PrimitiveType(primitiveType.getRepetition(), primitiveType.asPrimitiveType().getPrimitiveTypeName(), this.renamedColumns.getOrDefault(primitiveType.getName(), primitiveType.getName()));
    }

    private void processBlock(CompressionConverter.TransParquetFileReader transParquetFileReader, int i, int i2, IndexCache indexCache, ColumnChunkMetaData columnChunkMetaData) throws IOException {
        if (columnChunkMetaData.isEncrypted()) {
            throw new IOException("Column " + columnChunkMetaData.getPath().toDotString() + " is already encrypted");
        }
        ColumnChunkMetaData columnChunkMetaData2 = columnChunkMetaData;
        if (!this.renamedColumns.isEmpty()) {
            columnChunkMetaData2 = ColumnChunkMetaData.get(normalizeFieldsInPath(columnChunkMetaData.getPath()), normalizeNameInType(columnChunkMetaData.getPrimitiveType()), columnChunkMetaData.getCodec(), columnChunkMetaData.getEncodingStats(), columnChunkMetaData.getEncodings(), columnChunkMetaData.getStatistics(), columnChunkMetaData.getFirstDataPageOffset(), columnChunkMetaData.getDictionaryPageOffset(), columnChunkMetaData.getValueCount(), columnChunkMetaData.getTotalSize(), columnChunkMetaData.getTotalUncompressedSize(), columnChunkMetaData.getSizeStatistics());
        }
        ColumnDescriptor columnDescriptor = this.outSchema.getColumns().get(i2);
        ColumnDescriptor columnDescriptor2 = getSchemaWithRenamedColumns(this.outSchema).getColumns().get(i2);
        BlockMetaData blockMetaData = transParquetFileReader.getFooter().getBlocks().get(i);
        String createdBy = transParquetFileReader.getFileMetaData().getCreatedBy();
        transParquetFileReader.setStreamPosition(columnChunkMetaData.getStartingPos());
        CompressionCodecName codec = this.newCodecName == null ? columnChunkMetaData.getCodec() : this.newCodecName;
        boolean z = this.encryptMode && this.encryptColumns != null && this.encryptColumns.contains(columnChunkMetaData.getPath());
        if (this.maskColumns != null && this.maskColumns.containsKey(columnChunkMetaData.getPath())) {
            if (!this.maskColumns.get(columnChunkMetaData.getPath()).equals(MaskMode.NULLIFY)) {
                throw new UnsupportedOperationException("Only nullify is supported for now");
            }
            if (columnDescriptor.getPrimitiveType().getRepetition().equals(Type.Repetition.REQUIRED)) {
                throw new IOException("Required column [" + columnDescriptor.getPrimitiveType().getName() + "] cannot be nullified");
            }
            nullifyColumn(transParquetFileReader, i, columnDescriptor, columnChunkMetaData, this.writer, codec, z, createdBy);
            return;
        }
        if (!this.encryptMode && this.newCodecName == null) {
            this.writer.appendColumnChunk(columnDescriptor2, transParquetFileReader.getStream(), columnChunkMetaData2, indexCache.getBloomFilter(columnChunkMetaData), indexCache.getColumnIndex(columnChunkMetaData), indexCache.getOffsetIndex(columnChunkMetaData));
            return;
        }
        ColumnChunkEncryptorRunTime columnChunkEncryptorRunTime = null;
        if (this.encryptMode) {
            columnChunkEncryptorRunTime = new ColumnChunkEncryptorRunTime(this.writer.getEncryptor(), columnChunkMetaData, this.numBlocksRewritten, i2);
        }
        this.writer.startColumn(columnDescriptor2, columnChunkMetaData.getValueCount(), codec);
        processChunk(transParquetFileReader, blockMetaData.getRowCount(), columnChunkMetaData, codec, columnChunkEncryptorRunTime, z, indexCache.getBloomFilter(columnChunkMetaData), indexCache.getColumnIndex(columnChunkMetaData), indexCache.getOffsetIndex(columnChunkMetaData), createdBy);
        this.writer.endColumn();
    }

    private void processChunk(CompressionConverter.TransParquetFileReader transParquetFileReader, long j, ColumnChunkMetaData columnChunkMetaData, CompressionCodecName compressionCodecName, ColumnChunkEncryptorRunTime columnChunkEncryptorRunTime, boolean z, BloomFilter bloomFilter, ColumnIndex columnIndex, OffsetIndex offsetIndex, String str) throws IOException {
        CompressionCodecFactory newFactory = HadoopCodecs.newFactory(0);
        CompressionCodecFactory.BytesInputDecompressor bytesInputDecompressor = null;
        CompressionCodecFactory.BytesInputCompressor bytesInputCompressor = null;
        if (!compressionCodecName.equals(columnChunkMetaData.getCodec())) {
            bytesInputDecompressor = newFactory.getDecompressor(columnChunkMetaData.getCodec());
            bytesInputCompressor = newFactory.getCompressor(compressionCodecName);
        }
        BlockCipher.Encryptor encryptor = null;
        BlockCipher.Encryptor encryptor2 = null;
        byte[] bArr = null;
        byte[] bArr2 = null;
        byte[] bArr3 = null;
        byte[] bArr4 = null;
        if (columnChunkEncryptorRunTime != null) {
            encryptor = columnChunkEncryptorRunTime.getMetaDataEncryptor();
            encryptor2 = columnChunkEncryptorRunTime.getDataEncryptor();
            bArr = columnChunkEncryptorRunTime.getDictPageAAD();
            bArr2 = columnChunkEncryptorRunTime.getDataPageAAD();
            bArr3 = columnChunkEncryptorRunTime.getDictPageHeaderAAD();
            bArr4 = columnChunkEncryptorRunTime.getDataPageHeaderAAD();
        }
        if (bloomFilter != null) {
            this.writer.addBloomFilter(normalizeFieldsInPath(columnChunkMetaData.getPath()).toDotString(), bloomFilter);
        }
        transParquetFileReader.setStreamPosition(columnChunkMetaData.getStartingPos());
        DictionaryPage dictionaryPage = null;
        long j2 = 0;
        long j3 = 0;
        boolean z2 = false;
        ParquetMetadataConverter parquetMetadataConverter = new ParquetMetadataConverter();
        int i = 0;
        long valueCount = columnChunkMetaData.getValueCount();
        while (j2 < valueCount) {
            PageHeader readPageHeader = transParquetFileReader.readPageHeader();
            int compressed_page_size = readPageHeader.getCompressed_page_size();
            switch (readPageHeader.type) {
                case DICTIONARY_PAGE:
                    if (dictionaryPage == null) {
                        DictionaryPageHeader dictionaryPageHeader = readPageHeader.dictionary_page_header;
                        dictionaryPage = new DictionaryPage(BytesInput.from(processPageLoad(transParquetFileReader, true, bytesInputCompressor, bytesInputDecompressor, readPageHeader.getCompressed_page_size(), readPageHeader.getUncompressed_page_size(), z, encryptor2, bArr)), readPageHeader.getUncompressed_page_size(), dictionaryPageHeader.getNum_values(), parquetMetadataConverter.getEncoding(dictionaryPageHeader.getEncoding()));
                        this.writer.writeDictionaryPage(dictionaryPage, encryptor, bArr3);
                        break;
                    } else {
                        throw new IOException("has more than one dictionary page in column chunk: " + columnChunkMetaData);
                    }
                case DATA_PAGE:
                    if (z) {
                        AesCipher.quickUpdatePageAAD(bArr4, i);
                        AesCipher.quickUpdatePageAAD(bArr2, i);
                    }
                    DataPageHeader dataPageHeader = readPageHeader.data_page_header;
                    byte[] processPageLoad = processPageLoad(transParquetFileReader, true, bytesInputCompressor, bytesInputDecompressor, readPageHeader.getCompressed_page_size(), readPageHeader.getUncompressed_page_size(), z, encryptor2, bArr2);
                    Statistics<?> convertStatistics = convertStatistics(str, normalizeNameInType(columnChunkMetaData.getPrimitiveType()), dataPageHeader.getStatistics(), columnIndex, i, parquetMetadataConverter);
                    if (convertStatistics == null) {
                        z2 = true;
                    } else {
                        Preconditions.checkState(!z2, "Detected mixed null page statistics and non-null page statistics");
                    }
                    j2 += dataPageHeader.getNum_values();
                    if (offsetIndex != null) {
                        j3 += (1 + offsetIndex.getLastRowIndex(i, j)) - offsetIndex.getFirstRowIndex(i);
                        this.writer.writeDataPage(toIntWithCheck(dataPageHeader.getNum_values()), readPageHeader.getUncompressed_page_size(), BytesInput.from(processPageLoad), convertStatistics, toIntWithCheck(r0), parquetMetadataConverter.getEncoding(dataPageHeader.getRepetition_level_encoding()), parquetMetadataConverter.getEncoding(dataPageHeader.getDefinition_level_encoding()), parquetMetadataConverter.getEncoding(dataPageHeader.getEncoding()), encryptor, bArr4);
                    } else {
                        this.writer.writeDataPage(toIntWithCheck(dataPageHeader.getNum_values()), readPageHeader.getUncompressed_page_size(), BytesInput.from(processPageLoad), convertStatistics, parquetMetadataConverter.getEncoding(dataPageHeader.getRepetition_level_encoding()), parquetMetadataConverter.getEncoding(dataPageHeader.getDefinition_level_encoding()), parquetMetadataConverter.getEncoding(dataPageHeader.getEncoding()), encryptor, bArr4);
                    }
                    i++;
                    break;
                case DATA_PAGE_V2:
                    if (z) {
                        AesCipher.quickUpdatePageAAD(bArr4, i);
                        AesCipher.quickUpdatePageAAD(bArr2, i);
                    }
                    DataPageHeaderV2 dataPageHeaderV2 = readPageHeader.data_page_header_v2;
                    int repetition_levels_byte_length = dataPageHeaderV2.getRepetition_levels_byte_length();
                    BytesInput readBlockAllocate = readBlockAllocate(repetition_levels_byte_length, transParquetFileReader);
                    int definition_levels_byte_length = dataPageHeaderV2.getDefinition_levels_byte_length();
                    BytesInput readBlockAllocate2 = readBlockAllocate(definition_levels_byte_length, transParquetFileReader);
                    int compressed_page_size2 = (readPageHeader.getCompressed_page_size() - repetition_levels_byte_length) - definition_levels_byte_length;
                    int uncompressed_page_size = (readPageHeader.getUncompressed_page_size() - repetition_levels_byte_length) - definition_levels_byte_length;
                    byte[] processPageLoad2 = processPageLoad(transParquetFileReader, dataPageHeaderV2.is_compressed, bytesInputCompressor, bytesInputDecompressor, compressed_page_size2, uncompressed_page_size, z, encryptor2, bArr2);
                    Statistics<?> convertStatistics2 = convertStatistics(str, normalizeNameInType(columnChunkMetaData.getPrimitiveType()), dataPageHeaderV2.getStatistics(), columnIndex, i, parquetMetadataConverter);
                    if (convertStatistics2 == null) {
                        z2 = true;
                    } else {
                        Preconditions.checkState(!z2, "Detected mixed null page statistics and non-null page statistics");
                    }
                    j2 += dataPageHeaderV2.getNum_values();
                    j3 += dataPageHeaderV2.getNum_rows();
                    this.writer.writeDataPageV2(dataPageHeaderV2.getNum_rows(), dataPageHeaderV2.getNum_nulls(), dataPageHeaderV2.getNum_values(), readBlockAllocate, readBlockAllocate2, parquetMetadataConverter.getEncoding(dataPageHeaderV2.getEncoding()), BytesInput.from(processPageLoad2), uncompressed_page_size, convertStatistics2, encryptor, bArr4);
                    i++;
                    break;
                default:
                    LOG.debug("skipping page of type {} of size {}", readPageHeader.getType(), Integer.valueOf(compressed_page_size));
                    break;
            }
        }
        Preconditions.checkState(j3 == 0 || j3 == j, "Read row count: %s not match with block total row count: %s", Long.valueOf(j3), Long.valueOf(j));
        if (z2) {
            this.writer.invalidateStatistics(columnChunkMetaData.getStatistics());
        }
    }

    private Statistics<?> convertStatistics(String str, PrimitiveType primitiveType, org.apache.iceberg.shaded.org.apache.parquet.format.Statistics statistics, ColumnIndex columnIndex, int i, ParquetMetadataConverter parquetMetadataConverter) throws IOException {
        if (columnIndex == null) {
            if (statistics != null) {
                return parquetMetadataConverter.fromParquetStatistics(str, statistics, primitiveType);
            }
            return null;
        }
        if (columnIndex.getNullPages() == null) {
            throw new IOException("columnIndex has null variable 'nullPages' which indicates corrupted data for type: " + primitiveType.getName());
        }
        if (i > columnIndex.getNullPages().size()) {
            throw new IOException("There are more pages " + i + " found in the column than in the columnIndex " + columnIndex.getNullPages().size());
        }
        Statistics.Builder builderForReading = Statistics.getBuilderForReading(primitiveType);
        builderForReading.withNumNulls(columnIndex.getNullCounts().get(i).longValue());
        if (!columnIndex.getNullPages().get(i).booleanValue()) {
            builderForReading.withMin((byte[]) columnIndex.getMinValues().get(i).array().clone());
            builderForReading.withMax((byte[]) columnIndex.getMaxValues().get(i).array().clone());
        }
        return builderForReading.build();
    }

    private byte[] processPageLoad(CompressionConverter.TransParquetFileReader transParquetFileReader, boolean z, CompressionCodecFactory.BytesInputCompressor bytesInputCompressor, CompressionCodecFactory.BytesInputDecompressor bytesInputDecompressor, int i, int i2, boolean z2, BlockCipher.Encryptor encryptor, byte[] bArr) throws IOException {
        BytesInput readBlock = readBlock(i, transParquetFileReader);
        if (bytesInputCompressor != null) {
            if (z) {
                readBlock = bytesInputDecompressor.decompress(readBlock, i2);
            }
            readBlock = bytesInputCompressor.compress(readBlock);
        }
        return !z2 ? readBlock.toByteArray() : encryptor.encrypt(readBlock.toByteArray(), bArr);
    }

    public BytesInput readBlock(int i, CompressionConverter.TransParquetFileReader transParquetFileReader) throws IOException {
        byte[] bArr = i > 2097152 ? new byte[i] : this.pageBuffer;
        transParquetFileReader.blockRead(bArr, 0, i);
        return BytesInput.from(bArr, 0, i);
    }

    public BytesInput readBlockAllocate(int i, CompressionConverter.TransParquetFileReader transParquetFileReader) throws IOException {
        byte[] bArr = new byte[i];
        transParquetFileReader.blockRead(bArr, 0, i);
        return BytesInput.from(bArr, 0, i);
    }

    private int toIntWithCheck(long j) {
        if (((int) j) != j) {
            throw new ParquetEncodingException("size is bigger than 2147483647 bytes: " + j);
        }
        return (int) j;
    }

    private void getPaths(GroupType groupType, List<String> list, String str) {
        List<Type> fields = groupType.getFields();
        String str2 = str == null ? "" : str + Util.DOT_STRING;
        for (Type type : fields) {
            list.add(str2 + type.getName());
            if (type instanceof GroupType) {
                getPaths(type.asGroupType(), list, str2 + type.getName());
            }
        }
    }

    private MessageType pruneColumnsInSchema(MessageType messageType, List<String> list) {
        if (list == null || list.isEmpty()) {
            return messageType;
        }
        ArrayList arrayList = new ArrayList();
        getPaths(messageType, arrayList, null);
        for (String str : list) {
            if (!arrayList.contains(str)) {
                LOG.warn("Input column name {} doesn't show up in the schema", str);
            }
        }
        return new MessageType(messageType.getName(), pruneColumnsInFields(messageType.getFields(), new ArrayList(), convertToColumnPaths(list)));
    }

    private List<Type> pruneColumnsInFields(List<Type> list, List<String> list2, Set<ColumnPath> set) {
        ArrayList arrayList = new ArrayList();
        Iterator<Type> it = list.iterator();
        while (it.hasNext()) {
            Type pruneColumnsInField = pruneColumnsInField(it.next(), list2, set);
            if (pruneColumnsInField != null) {
                arrayList.add(pruneColumnsInField);
            }
        }
        return arrayList;
    }

    private Type pruneColumnsInField(Type type, List<String> list, Set<ColumnPath> set) {
        list.add(type.getName());
        Type type2 = null;
        if (!set.contains(ColumnPath.get((String[]) list.toArray(new String[0])))) {
            if (type.isPrimitive()) {
                type2 = type;
            } else {
                List<Type> pruneColumnsInFields = pruneColumnsInFields(((GroupType) type).getFields(), list, set);
                if (!pruneColumnsInFields.isEmpty()) {
                    type2 = ((GroupType) type).withNewFields(pruneColumnsInFields);
                }
            }
        }
        list.remove(list.size() - 1);
        return type2;
    }

    private Set<ColumnPath> convertToColumnPaths(List<String> list) {
        HashSet hashSet = new HashSet();
        Iterator<String> it = list.iterator();
        while (it.hasNext()) {
            hashSet.add(ColumnPath.fromDotString(it.next()));
        }
        return hashSet;
    }

    private void nullifyColumn(CompressionConverter.TransParquetFileReader transParquetFileReader, int i, ColumnDescriptor columnDescriptor, ColumnChunkMetaData columnChunkMetaData, ParquetFileWriter parquetFileWriter, CompressionCodecName compressionCodecName, boolean z, String str) throws IOException {
        if (z) {
            Preconditions.checkArgument(parquetFileWriter.getEncryptor() != null, "Missing encryptor");
        }
        long valueCount = columnChunkMetaData.getValueCount();
        int maxDefinitionLevel = columnDescriptor.getMaxDefinitionLevel();
        PageReadStore readRowGroup = transParquetFileReader.readRowGroup(i);
        ColumnReader columnReader = new ColumnReadStoreImpl(readRowGroup, new DummyGroupConverter(), this.outSchema, str).getColumnReader(columnDescriptor);
        ParquetProperties build = ParquetProperties.builder().withWriterVersion(columnChunkMetaData.getEncodingStats().usesV2Pages() ? ParquetProperties.WriterVersion.PARQUET_2_0 : ParquetProperties.WriterVersion.PARQUET_1_0).build();
        CodecFactory.BytesCompressor compressor = new CodecFactory(new Configuration(), build.getPageSizeThreshold()).getCompressor(compressionCodecName);
        MessageType schemaWithRenamedColumns = getSchemaWithRenamedColumns(newSchema(this.outSchema, columnDescriptor));
        ColumnChunkPageWriteStore columnChunkPageWriteStore = new ColumnChunkPageWriteStore((CompressionCodecFactory.BytesInputCompressor) compressor, schemaWithRenamedColumns, build.getAllocator(), build.getColumnIndexTruncateLength(), build.getPageWriteChecksumEnabled(), this.nullColumnEncryptor, this.numBlocksRewritten);
        ColumnWriteStore newColumnWriteStore = build.newColumnWriteStore(schemaWithRenamedColumns, columnChunkPageWriteStore);
        ColumnWriter columnWriter = newColumnWriteStore.getColumnWriter(columnDescriptor);
        for (int i2 = 0; i2 < valueCount; i2++) {
            int currentRepetitionLevel = columnReader.getCurrentRepetitionLevel();
            int currentDefinitionLevel = columnReader.getCurrentDefinitionLevel();
            if (currentDefinitionLevel != maxDefinitionLevel) {
                columnWriter.writeNull(currentRepetitionLevel, currentDefinitionLevel);
            } else {
                if (currentDefinitionLevel == 0) {
                    throw new IOException("definition level is detected to be 0 for column " + columnChunkMetaData.getPath().toDotString() + " to be nullified");
                }
                if (currentRepetitionLevel == 0) {
                    columnWriter.writeNull(currentRepetitionLevel, currentDefinitionLevel - 1);
                }
            }
            newColumnWriteStore.endRecord();
        }
        readRowGroup.close();
        newColumnWriteStore.flush();
        columnChunkPageWriteStore.flushToFileWriter(parquetFileWriter);
        newColumnWriteStore.close();
        columnWriter.close();
    }

    private MessageType newSchema(MessageType messageType, ColumnDescriptor columnDescriptor) {
        Type extractField;
        String[] path = columnDescriptor.getPath();
        Type type = messageType.getType(path);
        if (path.length == 1) {
            return new MessageType(messageType.getName(), type);
        }
        for (Type type2 : messageType.getFields()) {
            if (!type2.isPrimitive() && (extractField = extractField(type2.asGroupType(), type)) != null) {
                return new MessageType(messageType.getName(), extractField);
            }
        }
        throw new RuntimeException("No field is found");
    }

    private Type extractField(GroupType groupType, Type type) {
        if (type.equals((Object) groupType)) {
            return type;
        }
        for (Type type2 : groupType.asGroupType().getFields()) {
            if (!type2.isPrimitive()) {
                Type extractField = extractField(type2.asGroupType(), type);
                if (extractField != null) {
                    return new GroupType(groupType.getRepetition(), groupType.getName(), extractField);
                }
            } else if (type2.equals((Object) type)) {
                return new GroupType(groupType.getRepetition(), groupType.getName(), type);
            }
        }
        return null;
    }
}
