package org.apache.spark.sql.execution.datasources.orc;

import java.io.Serializable;
import java.net.URI;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.ql.io.sarg.SearchArgument;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapreduce.InputSplit;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.JobID;
import org.apache.hadoop.mapreduce.TaskAttemptContext;
import org.apache.hadoop.mapreduce.TaskAttemptID;
import org.apache.hadoop.mapreduce.TaskID;
import org.apache.hadoop.mapreduce.TaskType;
import org.apache.hadoop.mapreduce.lib.input.FileSplit;
import org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl;
import org.apache.orc.OrcConf;
import org.apache.orc.OrcFile;
import org.apache.orc.TypeDescription;
import org.apache.orc.mapred.OrcOutputFormat;
import org.apache.orc.mapreduce.OrcInputFormat;
import org.apache.spark.TaskContext$;
import org.apache.spark.broadcast.Broadcast;
import org.apache.spark.sql.SparkSession;
import org.apache.spark.sql.catalyst.InternalRow;
import org.apache.spark.sql.catalyst.expressions.JoinedRow;
import org.apache.spark.sql.catalyst.expressions.UnsafeProjection;
import org.apache.spark.sql.catalyst.expressions.codegen.GenerateUnsafeProjection$;
import org.apache.spark.sql.execution.WholeStageCodegenExec$;
import org.apache.spark.sql.execution.datasources.FileFormat;
import org.apache.spark.sql.execution.datasources.OutputWriter;
import org.apache.spark.sql.execution.datasources.OutputWriterFactory;
import org.apache.spark.sql.execution.datasources.PartitionedFile;
import org.apache.spark.sql.execution.datasources.RecordReaderIterator;
import org.apache.spark.sql.internal.SQLConf;
import org.apache.spark.sql.sources.DataSourceRegister;
import org.apache.spark.sql.sources.Filter;
import org.apache.spark.sql.types.ArrayType;
import org.apache.spark.sql.types.AtomicType;
import org.apache.spark.sql.types.DataType;
import org.apache.spark.sql.types.MapType;
import org.apache.spark.sql.types.StructField;
import org.apache.spark.sql.types.StructType;
import org.apache.spark.sql.types.UserDefinedType;
import org.apache.spark.util.SerializableConfiguration;
import org.apache.spark.util.Utils$;
import scala.Array$;
import scala.Function1;
import scala.MatchError;
import scala.Option;
import scala.Option$;
import scala.Predef$;
import scala.Tuple2;
import scala.collection.ArrayOps$;
import scala.collection.Iterator;
import scala.collection.immutable.Map;
import scala.collection.immutable.Seq;
import scala.math.Ordering$Int$;
import scala.package$;
import scala.reflect.ClassTag$;
import scala.reflect.ScalaSignature;
import scala.runtime.BoxedUnit;
import scala.runtime.BoxesRunTime;

/* compiled from: OrcFileFormat.scala */
@ScalaSignature(bytes = "\u0006\u0005\u0005]e\u0001\u0002\u0007\u000e\u0001qAQ!\u000e\u0001\u0005\u0002YBQ!\u000f\u0001\u0005BiBQA\u0012\u0001\u0005BiBQa\u0012\u0001\u0005B!CQ\u0001\u0014\u0001\u0005B5CQA\u0016\u0001\u0005B]Caa \u0001\u0005B\u0005\u0005\u0001bBA\u0011\u0001\u0011\u0005\u00131\u0005\u0005\b\u0003W\u0001A\u0011IA\u0017\u0011\u001d\ti\u0004\u0001C!\u0003\u007fAq!!#\u0001\t\u0003\nYIA\u0007Pe\u000e4\u0015\u000e\\3G_Jl\u0017\r\u001e\u0006\u0003\u001d=\t1a\u001c:d\u0015\t\u0001\u0012#A\u0006eCR\f7o\\;sG\u0016\u001c(B\u0001\n\u0014\u0003%)\u00070Z2vi&|gN\u0003\u0002\u0015+\u0005\u00191/\u001d7\u000b\u0005Y9\u0012!B:qCJ\\'B\u0001\r\u001a\u0003\u0019\t\u0007/Y2iK*\t!$A\u0002pe\u001e\u001c\u0001aE\u0003\u0001;\r:S\u0006\u0005\u0002\u001fC5\tqDC\u0001!\u0003\u0015\u00198-\u00197b\u0013\t\u0011sD\u0001\u0004B]f\u0014VM\u001a\t\u0003I\u0015j\u0011aD\u0005\u0003M=\u0011!BR5mK\u001a{'/\\1u!\tA3&D\u0001*\u0015\tQ3#A\u0004t_V\u00148-Z:\n\u00051J#A\u0005#bi\u0006\u001cv.\u001e:dKJ+w-[:uKJ\u0004\"AL\u001a\u000e\u0003=R!\u0001M\u0019\u0002\u0005%|'\"\u0001\u001a\u0002\t)\fg/Y\u0005\u0003i=\u0012AbU3sS\u0006d\u0017N_1cY\u0016\fa\u0001P5oSRtD#A\u001c\u0011\u0005a\u0002Q\"A\u0007\u0002\u0013MDwN\u001d;OC6,G#A\u001e\u0011\u0005q\u001aeBA\u001fB!\tqt$D\u0001@\u0015\t\u00015$\u0001\u0004=e>|GOP\u0005\u0003\u0005~\ta\u0001\u0015:fI\u00164\u0017B\u0001#F\u0005\u0019\u0019FO]5oO*\u0011!iH\u0001\ti>\u001cFO]5oO\u0006A\u0001.Y:i\u0007>$W\rF\u0001J!\tq\"*\u0003\u0002L?\t\u0019\u0011J\u001c;\u0002\r\u0015\fX/\u00197t)\tq\u0015\u000b\u0005\u0002\u001f\u001f&\u0011\u0001k\b\u0002\b\u0005>|G.Z1o\u0011\u0015\u0011V\u00011\u0001T\u0003\u0015yG\u000f[3s!\tqB+\u0003\u0002V?\t\u0019\u0011I\\=\u0002\u0017%tg-\u001a:TG\",W.\u0019\u000b\u00051\u0006<G\u000eE\u0002\u001f3nK!AW\u0010\u0003\r=\u0003H/[8o!\tav,D\u0001^\u0015\tq6#A\u0003usB,7/\u0003\u0002a;\nQ1\u000b\u001e:vGR$\u0016\u0010]3\t\u000b\t4\u0001\u0019A2\u0002\u0019M\u0004\u0018M]6TKN\u001c\u0018n\u001c8\u0011\u0005\u0011,W\"A\n\n\u0005\u0019\u001c\"\u0001D*qCJ\\7+Z:tS>t\u0007\"\u00025\u0007\u0001\u0004I\u0017aB8qi&|gn\u001d\t\u0005y)\\4(\u0003\u0002l\u000b\n\u0019Q*\u00199\t\u000b54\u0001\u0019\u00018\u0002\u000b\u0019LG.Z:\u0011\u0007=$xO\u0004\u0002qe:\u0011a(]\u0005\u0002A%\u00111oH\u0001\ba\u0006\u001c7.Y4f\u0013\t)hOA\u0002TKFT!a]\u0010\u0011\u0005alX\"A=\u000b\u0005i\\\u0018A\u00014t\u0015\tax#\u0001\u0004iC\u0012|w\u000e]\u0005\u0003}f\u0014!BR5mKN#\u0018\r^;t\u00031\u0001(/\u001a9be\u0016<&/\u001b;f))\t\u0019!!\u0003\u0002\f\u0005m\u0011Q\u0004\t\u0004I\u0005\u0015\u0011bAA\u0004\u001f\t\u0019r*\u001e;qkR<&/\u001b;fe\u001a\u000b7\r^8ss\")!m\u0002a\u0001G\"9\u0011QB\u0004A\u0002\u0005=\u0011a\u00016pEB!\u0011\u0011CA\f\u001b\t\t\u0019BC\u0002\u0002\u0016m\f\u0011\"\\1qe\u0016$WoY3\n\t\u0005e\u00111\u0003\u0002\u0004\u0015>\u0014\u0007\"\u00025\b\u0001\u0004I\u0007BBA\u0010\u000f\u0001\u00071,\u0001\u0006eCR\f7k\u00195f[\u0006\fAb];qa>\u0014HOQ1uG\"$RATA\u0013\u0003OAQA\u0019\u0005A\u0002\rDa!!\u000b\t\u0001\u0004Y\u0016AB:dQ\u0016l\u0017-A\u0006jgN\u0003H.\u001b;bE2,Gc\u0002(\u00020\u0005E\u00121\u0007\u0005\u0006E&\u0001\ra\u0019\u0005\u0006Q&\u0001\r!\u001b\u0005\b\u0003kI\u0001\u0019AA\u001c\u0003\u0011\u0001\u0018\r\u001e5\u0011\u0007a\fI$C\u0002\u0002<e\u0014A\u0001U1uQ\u0006q\"-^5mIJ+\u0017\rZ3s/&$\b\u000eU1si&$\u0018n\u001c8WC2,Xm\u001d\u000b\u0011\u0003\u0003\ny&!\u0019\u0002d\u0005\u001d\u00141NA<\u0003s\u0002rAHA\"\u0003\u000f\ni%C\u0002\u0002F}\u0011\u0011BR;oGRLwN\\\u0019\u0011\u0007\u0011\nI%C\u0002\u0002L=\u0011q\u0002U1si&$\u0018n\u001c8fI\u001aKG.\u001a\t\u0006_\u0006=\u00131K\u0005\u0004\u0003#2(\u0001C%uKJ\fGo\u001c:\u0011\t\u0005U\u00131L\u0007\u0003\u0003/R1!!\u0017\u0014\u0003!\u0019\u0017\r^1msN$\u0018\u0002BA/\u0003/\u00121\"\u00138uKJt\u0017\r\u001c*po\")!M\u0003a\u0001G\"1\u0011q\u0004\u0006A\u0002mCa!!\u001a\u000b\u0001\u0004Y\u0016a\u00049beRLG/[8o'\u000eDW-\\1\t\r\u0005%$\u00021\u0001\\\u00039\u0011X-];je\u0016$7k\u00195f[\u0006Dq!!\u001c\u000b\u0001\u0004\ty'A\u0004gS2$XM]:\u0011\t=$\u0018\u0011\u000f\t\u0004Q\u0005M\u0014bAA;S\t1a)\u001b7uKJDQ\u0001\u001b\u0006A\u0002%Dq!a\u001f\u000b\u0001\u0004\ti(\u0001\u0006iC\u0012|w\u000e]\"p]\u001a\u0004B!a \u0002\u00066\u0011\u0011\u0011\u0011\u0006\u0004\u0003\u0007[\u0018\u0001B2p]\u001aLA!a\"\u0002\u0002\ni1i\u001c8gS\u001e,(/\u0019;j_:\fqb];qa>\u0014H\u000fR1uCRK\b/\u001a\u000b\u0004\u001d\u00065\u0005bBAH\u0017\u0001\u0007\u0011\u0011S\u0001\tI\u0006$\u0018\rV=qKB\u0019A,a%\n\u0007\u0005UUL\u0001\u0005ECR\fG+\u001f9f\u0001")
/* loaded from: input_file:org/apache/spark/sql/execution/datasources/orc/OrcFileFormat.class */
public class OrcFileFormat implements FileFormat, DataSourceRegister, Serializable {
    @Override // org.apache.spark.sql.execution.datasources.FileFormat
    public Option<Seq<String>> vectorTypes(StructType structType, StructType structType2, SQLConf sQLConf) {
        Option<Seq<String>> vectorTypes;
        vectorTypes = vectorTypes(structType, structType2, sQLConf);
        return vectorTypes;
    }

    @Override // org.apache.spark.sql.execution.datasources.FileFormat
    public Function1<PartitionedFile, Iterator<InternalRow>> buildReader(SparkSession sparkSession, StructType structType, StructType structType2, StructType structType3, Seq<Filter> seq, Map<String, String> map, Configuration configuration) {
        Function1<PartitionedFile, Iterator<InternalRow>> buildReader;
        buildReader = buildReader(sparkSession, structType, structType2, structType3, seq, map, configuration);
        return buildReader;
    }

    @Override // org.apache.spark.sql.execution.datasources.FileFormat
    public boolean supportFieldName(String str) {
        boolean supportFieldName;
        supportFieldName = supportFieldName(str);
        return supportFieldName;
    }

    @Override // org.apache.spark.sql.sources.DataSourceRegister
    public String shortName() {
        return "orc";
    }

    public String toString() {
        return "ORC";
    }

    public int hashCode() {
        return getClass().hashCode();
    }

    public boolean equals(Object obj) {
        return obj instanceof OrcFileFormat;
    }

    @Override // org.apache.spark.sql.execution.datasources.FileFormat
    public Option<StructType> inferSchema(SparkSession sparkSession, Map<String, String> map, Seq<FileStatus> seq) {
        return OrcUtils$.MODULE$.inferSchema(sparkSession, seq, map);
    }

    @Override // org.apache.spark.sql.execution.datasources.FileFormat
    public OutputWriterFactory prepareWrite(SparkSession sparkSession, Job job, Map<String, String> map, StructType structType) {
        OrcOptions orcOptions = new OrcOptions(map, sparkSession.sessionState().conf());
        JobConf configuration = job.getConfiguration();
        configuration.set(OrcConf.COMPRESS.getAttribute(), orcOptions.compressionCodec());
        configuration.setOutputFormat(OrcOutputFormat.class);
        final OrcFileFormat orcFileFormat = null;
        return new OutputWriterFactory(orcFileFormat) { // from class: org.apache.spark.sql.execution.datasources.orc.OrcFileFormat$$anon$1
            @Override // org.apache.spark.sql.execution.datasources.OutputWriterFactory
            public OutputWriter newInstance(String str, StructType structType2, TaskAttemptContext taskAttemptContext) {
                return new OrcOutputWriter(str, structType2, taskAttemptContext);
            }

            @Override // org.apache.spark.sql.execution.datasources.OutputWriterFactory
            public String getFileExtension(TaskAttemptContext taskAttemptContext) {
                return new StringBuilder(4).append((String) OrcUtils$.MODULE$.extensionsForCompressionCodecNames().getOrElse(taskAttemptContext.getConfiguration().get(OrcConf.COMPRESS.getAttribute()), () -> {
                    return "";
                })).append(".orc").toString();
            }
        };
    }

    @Override // org.apache.spark.sql.execution.datasources.FileFormat
    public boolean supportBatch(SparkSession sparkSession, StructType structType) {
        SQLConf conf = sparkSession.sessionState().conf();
        return conf.orcVectorizedReaderEnabled() && conf.wholeStageEnabled() && !WholeStageCodegenExec$.MODULE$.isTooManyFields(conf, structType) && structType.forall(structField -> {
            return BoxesRunTime.boxToBoolean($anonfun$supportBatch$1(sparkSession, structField));
        });
    }

    @Override // org.apache.spark.sql.execution.datasources.FileFormat
    public boolean isSplitable(SparkSession sparkSession, Map<String, String> map, Path path) {
        return true;
    }

    @Override // org.apache.spark.sql.execution.datasources.FileFormat
    public Function1<PartitionedFile, Iterator<InternalRow>> buildReaderWithPartitionValues(SparkSession sparkSession, StructType structType, StructType structType2, StructType structType3, Seq<Filter> seq, Map<String, String> map, Configuration configuration) {
        StructType structType4 = new StructType((StructField[]) ArrayOps$.MODULE$.$plus$plus$extension(Predef$.MODULE$.refArrayOps(structType3.fields()), structType2.fields(), ClassTag$.MODULE$.apply(StructField.class)));
        SQLConf conf = sparkSession.sessionState().conf();
        boolean supportBatch = supportBatch(sparkSession, structType4);
        int orcVectorizedReaderBatchSize = conf.orcVectorizedReaderBatchSize();
        OrcConf.IS_SCHEMA_EVOLUTION_CASE_SENSITIVE.setBoolean(configuration, conf.caseSensitiveAnalysis());
        Broadcast broadcast = sparkSession.sparkContext().broadcast(new SerializableConfiguration(configuration), ClassTag$.MODULE$.apply(SerializableConfiguration.class));
        boolean caseSensitiveAnalysis = sparkSession.sessionState().conf().caseSensitiveAnalysis();
        boolean orcFilterPushDown = sparkSession.sessionState().conf().orcFilterPushDown();
        boolean ignoreCorruptFiles = sparkSession.sessionState().conf().ignoreCorruptFiles();
        return partitionedFile -> {
            Configuration value = ((SerializableConfiguration) broadcast.value()).value();
            Path path = new Path(new URI(partitionedFile.filePath()));
            OrcFile.ReaderOptions filesystem = OrcFile.readerOptions(value).filesystem(path.getFileSystem(value));
            Option<Tuple2<int[], Object>> requestedColumnIds = OrcUtils$.MODULE$.requestedColumnIds(caseSensitiveAnalysis, structType, structType3, (TypeDescription) Utils$.MODULE$.tryWithResource(() -> {
                return OrcFile.createReader(path, filesystem);
            }, reader -> {
                return reader.getSchema();
            }), value);
            if (requestedColumnIds.isEmpty()) {
                return package$.MODULE$.Iterator().empty();
            }
            if (orcFilterPushDown && seq.nonEmpty()) {
                OrcUtils$.MODULE$.readCatalystSchema(path, value, ignoreCorruptFiles).foreach(structType5 -> {
                    $anonfun$buildReaderWithPartitionValues$4(seq, value, structType5);
                    return BoxedUnit.UNIT;
                });
            }
            Tuple2 tuple2 = (Tuple2) requestedColumnIds.get();
            if (tuple2 == null) {
                throw new MatchError(tuple2);
            }
            Tuple2 tuple22 = new Tuple2((int[]) tuple2._1(), BoxesRunTime.boxToBoolean(tuple2._2$mcZ$sp()));
            int[] iArr = (int[]) tuple22._1();
            String orcResultSchemaString = OrcUtils$.MODULE$.orcResultSchemaString(tuple22._2$mcZ$sp(), structType, structType4, structType2, value);
            Predef$.MODULE$.assert(iArr.length == structType3.length(), () -> {
                return "[BUG] requested column IDs do not match required schema";
            });
            Configuration configuration2 = new Configuration(value);
            configuration2.set(OrcConf.INCLUDE_COLUMNS.getAttribute(), Predef$.MODULE$.wrapIntArray((int[]) ArrayOps$.MODULE$.sorted$extension(Predef$.MODULE$.intArrayOps((int[]) ArrayOps$.MODULE$.filter$extension(Predef$.MODULE$.intArrayOps(iArr), i -> {
                return i != -1;
            })), Ordering$Int$.MODULE$)).mkString(","));
            InputSplit fileSplit = new FileSplit(path, partitionedFile.start(), partitionedFile.length(), (String[]) Array$.MODULE$.empty(ClassTag$.MODULE$.apply(String.class)));
            TaskAttemptContext taskAttemptContextImpl = new TaskAttemptContextImpl(configuration2, new TaskAttemptID(new TaskID(new JobID(), TaskType.MAP, 0), 0));
            if (supportBatch) {
                OrcColumnarBatchReader orcColumnarBatchReader = new OrcColumnarBatchReader(orcVectorizedReaderBatchSize);
                RecordReaderIterator recordReaderIterator = new RecordReaderIterator(orcColumnarBatchReader);
                Option$.MODULE$.apply(TaskContext$.MODULE$.get()).foreach(taskContext -> {
                    return taskContext.addTaskCompletionListener(taskContext -> {
                        recordReaderIterator.close();
                        return BoxedUnit.UNIT;
                    });
                });
                int[] iArr2 = (int[]) ArrayOps$.MODULE$.$plus$plus$extension(Predef$.MODULE$.intArrayOps(iArr), Array$.MODULE$.fill(structType2.length(), () -> {
                    return -1;
                }, ClassTag$.MODULE$.Int()), ClassTag$.MODULE$.Int());
                int[] iArr3 = (int[]) ArrayOps$.MODULE$.$plus$plus$extension(Predef$.MODULE$.intArrayOps((int[]) Array$.MODULE$.fill(structType3.length(), () -> {
                    return -1;
                }, ClassTag$.MODULE$.Int())), package$.MODULE$.Range().apply(0, structType2.length()), ClassTag$.MODULE$.Int());
                orcColumnarBatchReader.initialize(fileSplit, taskAttemptContextImpl);
                orcColumnarBatchReader.initBatch(TypeDescription.fromString(orcResultSchemaString), structType4.fields(), iArr2, iArr3, partitionedFile.partitionValues());
                return recordReaderIterator;
            }
            RecordReaderIterator recordReaderIterator2 = new RecordReaderIterator(new OrcInputFormat().createRecordReader(fileSplit, taskAttemptContextImpl));
            Option$.MODULE$.apply(TaskContext$.MODULE$.get()).foreach(taskContext2 -> {
                return taskContext2.addTaskCompletionListener(taskContext2 -> {
                    recordReaderIterator2.close();
                    return BoxedUnit.UNIT;
                });
            });
            Seq seq2 = (Seq) structType3.toAttributes().$plus$plus(structType2.toAttributes());
            UnsafeProjection unsafeProjection = (UnsafeProjection) GenerateUnsafeProjection$.MODULE$.generate(seq2, seq2);
            OrcDeserializer orcDeserializer = new OrcDeserializer(structType3, iArr);
            if (structType2.length() == 0) {
                return recordReaderIterator2.m884map(orcStruct -> {
                    return unsafeProjection.apply(orcDeserializer.deserialize(orcStruct));
                });
            }
            JoinedRow joinedRow = new JoinedRow();
            return recordReaderIterator2.m884map(orcStruct2 -> {
                return unsafeProjection.apply(joinedRow.apply(orcDeserializer.deserialize(orcStruct2), partitionedFile.partitionValues()));
            });
        };
    }

    @Override // org.apache.spark.sql.execution.datasources.FileFormat
    public boolean supportDataType(DataType dataType) {
        boolean supportDataType;
        if (dataType instanceof AtomicType) {
            supportDataType = true;
        } else if (dataType instanceof StructType) {
            supportDataType = ((StructType) dataType).forall(structField -> {
                return BoxesRunTime.boxToBoolean($anonfun$supportDataType$1(this, structField));
            });
        } else if (dataType instanceof ArrayType) {
            supportDataType = supportDataType(((ArrayType) dataType).elementType());
        } else if (dataType instanceof MapType) {
            MapType mapType = (MapType) dataType;
            supportDataType = supportDataType(mapType.keyType()) && supportDataType(mapType.valueType());
        } else {
            supportDataType = dataType instanceof UserDefinedType ? supportDataType(((UserDefinedType) dataType).sqlType()) : false;
        }
        return supportDataType;
    }

    public static final /* synthetic */ boolean $anonfun$supportBatch$1(SparkSession sparkSession, StructField structField) {
        return OrcUtils$.MODULE$.supportColumnarReads(structField.dataType(), sparkSession.sessionState().conf().orcVectorizedReaderNestedColumnEnabled());
    }

    public static final /* synthetic */ void $anonfun$buildReaderWithPartitionValues$5(Configuration configuration, StructType structType, SearchArgument searchArgument) {
        OrcInputFormat.setSearchArgument(configuration, searchArgument, structType.fieldNames());
    }

    public static final /* synthetic */ void $anonfun$buildReaderWithPartitionValues$4(Seq seq, Configuration configuration, StructType structType) {
        OrcFilters$.MODULE$.createFilter(structType, seq).foreach(searchArgument -> {
            $anonfun$buildReaderWithPartitionValues$5(configuration, structType, searchArgument);
            return BoxedUnit.UNIT;
        });
    }

    public static final /* synthetic */ boolean $anonfun$supportDataType$1(OrcFileFormat orcFileFormat, StructField structField) {
        return orcFileFormat.supportDataType(structField.dataType());
    }

    public OrcFileFormat() {
        FileFormat.$init$(this);
    }
}
