package org.apache.spark.sql.execution.datasources.csv;

import com.univocity.parsers.csv.CsvParser;
import java.nio.charset.Charset;
import java.nio.charset.StandardCharsets;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.TextInputFormat;
import org.apache.spark.TaskContext$;
import org.apache.spark.rdd.RDD;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Encoders$;
import org.apache.spark.sql.SparkSession;
import org.apache.spark.sql.catalyst.InternalRow;
import org.apache.spark.sql.execution.datasources.DataSource;
import org.apache.spark.sql.execution.datasources.DataSource$;
import org.apache.spark.sql.execution.datasources.HadoopFileLinesReader;
import org.apache.spark.sql.execution.datasources.PartitionedFile;
import org.apache.spark.sql.execution.datasources.text.TextFileFormat;
import org.apache.spark.sql.types.StructType;
import org.apache.spark.sql.types.StructType$;
import scala.Option;
import scala.Option$;
import scala.Predef$;
import scala.Serializable;
import scala.Some;
import scala.collection.Iterator;
import scala.collection.Seq;
import scala.collection.Seq$;
import scala.collection.immutable.Nil$;
import scala.reflect.ClassTag$;
import scala.runtime.ScalaRunTime$;

/* compiled from: CSVDataSource.scala */
/* loaded from: input_file:org/apache/spark/sql/execution/datasources/csv/TextInputCSVDataSource$.class */
public final class TextInputCSVDataSource$ extends CSVDataSource {
    public static final TextInputCSVDataSource$ MODULE$ = null;
    private final boolean isSplitable;

    static {
        new TextInputCSVDataSource$();
    }

    @Override // org.apache.spark.sql.execution.datasources.csv.CSVDataSource
    public boolean isSplitable() {
        return this.isSplitable;
    }

    @Override // org.apache.spark.sql.execution.datasources.csv.CSVDataSource
    public Iterator<InternalRow> readFile(Configuration configuration, PartitionedFile partitionedFile, UnivocityParser univocityParser, StructType structType, StructType structType2, boolean z, boolean z2) {
        HadoopFileLinesReader hadoopFileLinesReader = new HadoopFileLinesReader(partitionedFile, configuration);
        Option$.MODULE$.apply(TaskContext$.MODULE$.get()).foreach(new TextInputCSVDataSource$$anonfun$4(hadoopFileLinesReader));
        Iterator<String> map = hadoopFileLinesReader.map(new TextInputCSVDataSource$$anonfun$5(univocityParser));
        if (univocityParser.options().headerFlag() && partitionedFile.start() == 0) {
            CSVUtils$.MODULE$.extractHeader(map, univocityParser.options()).foreach(new TextInputCSVDataSource$$anonfun$readFile$1(partitionedFile, univocityParser, structType, structType2, z, z2));
        }
        return UnivocityParser$.MODULE$.parseIterator(map, univocityParser, structType);
    }

    @Override // org.apache.spark.sql.execution.datasources.csv.CSVDataSource
    public StructType infer(SparkSession sparkSession, Seq<FileStatus> seq, CSVOptions cSVOptions) {
        Dataset<String> createBaseDataset = createBaseDataset(sparkSession, seq, cSVOptions);
        return inferFromDataset(sparkSession, createBaseDataset, Predef$.MODULE$.refArrayOps((Object[]) CSVUtils$.MODULE$.filterCommentAndEmpty(createBaseDataset, cSVOptions).take(1)).headOption(), cSVOptions);
    }

    public StructType inferFromDataset(SparkSession sparkSession, Dataset<String> dataset, Option<String> option, CSVOptions cSVOptions) {
        StructType apply;
        String[] strArr;
        Serializable map = option.map(new TextInputCSVDataSource$$anonfun$8(new CsvParser(cSVOptions.asParserSettings())));
        if (!(map instanceof Some) || (strArr = (String[]) ((Some) map).x()) == null) {
            apply = StructType$.MODULE$.apply(Nil$.MODULE$);
        } else {
            String[] makeSafeHeader = makeSafeHeader(strArr, sparkSession.sessionState().conf().caseSensitiveAnalysis(), cSVOptions);
            Dataset<String> sample = CSVUtils$.MODULE$.sample(dataset, cSVOptions);
            apply = CSVInferSchema$.MODULE$.infer(sample.rdd().mapPartitions(new TextInputCSVDataSource$$anonfun$9(option, cSVOptions), sample.rdd().mapPartitions$default$2(), ClassTag$.MODULE$.apply(ScalaRunTime$.MODULE$.arrayClass(String.class))), makeSafeHeader, cSVOptions);
        }
        return apply;
    }

    private Dataset<String> createBaseDataset(SparkSession sparkSession, Seq<FileStatus> seq, CSVOptions cSVOptions) {
        Seq seq2 = (Seq) seq.map(new TextInputCSVDataSource$$anonfun$10(), Seq$.MODULE$.canBuildFrom());
        Charset forName = Charset.forName(cSVOptions.charset());
        Charset charset = StandardCharsets.UTF_8;
        if (forName != null ? !forName.equals(charset) : charset != null) {
            String charset2 = cSVOptions.charset();
            RDD hadoopFile = sparkSession.sparkContext().hadoopFile(seq2.mkString(","), ClassTag$.MODULE$.apply(LongWritable.class), ClassTag$.MODULE$.apply(Text.class), ClassTag$.MODULE$.apply(TextInputFormat.class));
            return sparkSession.createDataset(hadoopFile.mapPartitions(new TextInputCSVDataSource$$anonfun$11(charset2), hadoopFile.mapPartitions$default$2(), ClassTag$.MODULE$.apply(String.class)), Encoders$.MODULE$.STRING());
        }
        return sparkSession.baseRelationToDataFrame(new DataSource(sparkSession, TextFileFormat.class.getName(), seq2, DataSource$.MODULE$.apply$default$4(), DataSource$.MODULE$.apply$default$5(), DataSource$.MODULE$.apply$default$6(), cSVOptions.parameters(), DataSource$.MODULE$.apply$default$8()).resolveRelation(false)).select("value", Predef$.MODULE$.wrapRefArray(new String[0])).as(Encoders$.MODULE$.STRING());
    }

    private Object readResolve() {
        return MODULE$;
    }

    private TextInputCSVDataSource$() {
        MODULE$ = this;
        this.isSplitable = true;
    }
}
