package org.apache.iceberg.spark.source;

import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.Map;
import java.util.function.Supplier;
import java.util.stream.Collectors;
import org.apache.iceberg.BlobMetadata;
import org.apache.iceberg.ScanTask;
import org.apache.iceberg.ScanTaskGroup;
import org.apache.iceberg.Schema;
import org.apache.iceberg.Snapshot;
import org.apache.iceberg.StatisticsFile;
import org.apache.iceberg.Table;
import org.apache.iceberg.TableProperties;
import org.apache.iceberg.expressions.Expression;
import org.apache.iceberg.metrics.ScanReport;
import org.apache.iceberg.puffin.StandardBlobTypes;
import org.apache.iceberg.relocated.com.google.common.base.Strings;
import org.apache.iceberg.relocated.com.google.common.collect.Lists;
import org.apache.iceberg.relocated.com.google.common.collect.Maps;
import org.apache.iceberg.spark.Spark3Util;
import org.apache.iceberg.spark.SparkReadConf;
import org.apache.iceberg.spark.SparkSchemaUtil;
import org.apache.iceberg.spark.source.metrics.EqualityDeleteFiles;
import org.apache.iceberg.spark.source.metrics.IndexedDeleteFiles;
import org.apache.iceberg.spark.source.metrics.NumDeletes;
import org.apache.iceberg.spark.source.metrics.NumSplits;
import org.apache.iceberg.spark.source.metrics.PositionalDeleteFiles;
import org.apache.iceberg.spark.source.metrics.ResultDataFiles;
import org.apache.iceberg.spark.source.metrics.ResultDeleteFiles;
import org.apache.iceberg.spark.source.metrics.ScannedDataManifests;
import org.apache.iceberg.spark.source.metrics.ScannedDeleteManifests;
import org.apache.iceberg.spark.source.metrics.SkippedDataFiles;
import org.apache.iceberg.spark.source.metrics.SkippedDataManifests;
import org.apache.iceberg.spark.source.metrics.SkippedDeleteFiles;
import org.apache.iceberg.spark.source.metrics.SkippedDeleteManifests;
import org.apache.iceberg.spark.source.metrics.TaskEqualityDeleteFiles;
import org.apache.iceberg.spark.source.metrics.TaskIndexedDeleteFiles;
import org.apache.iceberg.spark.source.metrics.TaskPositionalDeleteFiles;
import org.apache.iceberg.spark.source.metrics.TaskResultDataFiles;
import org.apache.iceberg.spark.source.metrics.TaskResultDeleteFiles;
import org.apache.iceberg.spark.source.metrics.TaskScannedDataManifests;
import org.apache.iceberg.spark.source.metrics.TaskScannedDeleteManifests;
import org.apache.iceberg.spark.source.metrics.TaskSkippedDataFiles;
import org.apache.iceberg.spark.source.metrics.TaskSkippedDataManifests;
import org.apache.iceberg.spark.source.metrics.TaskSkippedDeleteFiles;
import org.apache.iceberg.spark.source.metrics.TaskSkippedDeleteManifests;
import org.apache.iceberg.spark.source.metrics.TaskTotalDataFileSize;
import org.apache.iceberg.spark.source.metrics.TaskTotalDataManifests;
import org.apache.iceberg.spark.source.metrics.TaskTotalDeleteFileSize;
import org.apache.iceberg.spark.source.metrics.TaskTotalDeleteManifests;
import org.apache.iceberg.spark.source.metrics.TaskTotalPlanningDuration;
import org.apache.iceberg.spark.source.metrics.TotalDataFileSize;
import org.apache.iceberg.spark.source.metrics.TotalDataManifests;
import org.apache.iceberg.spark.source.metrics.TotalDeleteFileSize;
import org.apache.iceberg.spark.source.metrics.TotalDeleteManifests;
import org.apache.iceberg.spark.source.metrics.TotalPlanningDuration;
import org.apache.iceberg.types.Types;
import org.apache.iceberg.util.PropertyUtil;
import org.apache.iceberg.util.SnapshotUtil;
import org.apache.iceberg.util.TableScanUtil;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.sql.SparkSession;
import org.apache.spark.sql.connector.expressions.FieldReference;
import org.apache.spark.sql.connector.expressions.NamedReference;
import org.apache.spark.sql.connector.metric.CustomMetric;
import org.apache.spark.sql.connector.metric.CustomTaskMetric;
import org.apache.spark.sql.connector.read.Batch;
import org.apache.spark.sql.connector.read.Scan;
import org.apache.spark.sql.connector.read.Statistics;
import org.apache.spark.sql.connector.read.SupportsReportStatistics;
import org.apache.spark.sql.connector.read.streaming.MicroBatchStream;
import org.apache.spark.sql.internal.SQLConf;
import org.apache.spark.sql.types.StructType;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:org/apache/iceberg/spark/source/SparkScan.class */
abstract class SparkScan implements Scan, SupportsReportStatistics {
    private static final Logger LOG = LoggerFactory.getLogger(SparkScan.class);
    private static final String NDV_KEY = "ndv";
    private final JavaSparkContext sparkContext;
    private final Table table;
    private final SparkSession spark;
    private final SparkReadConf readConf;
    private final boolean caseSensitive;
    private final Schema expectedSchema;
    private final List<Expression> filterExpressions;
    private final String branch;
    private final Supplier<ScanReport> scanReportSupplier;
    private StructType readSchema;

    /* JADX INFO: Access modifiers changed from: package-private */
    public SparkScan(SparkSession sparkSession, Table table, SparkReadConf sparkReadConf, Schema schema, List<Expression> list, Supplier<ScanReport> supplier) {
        SparkSchemaUtil.validateMetadataColumnReferences(SnapshotUtil.schemaFor(table, sparkReadConf.branch()), schema);
        this.spark = sparkSession;
        this.sparkContext = JavaSparkContext.fromSparkContext(sparkSession.sparkContext());
        this.table = table;
        this.readConf = sparkReadConf;
        this.caseSensitive = sparkReadConf.caseSensitive();
        this.expectedSchema = schema;
        this.filterExpressions = list != null ? list : Collections.emptyList();
        this.branch = sparkReadConf.branch();
        this.scanReportSupplier = supplier;
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public Table table() {
        return this.table;
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public String branch() {
        return this.branch;
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public boolean caseSensitive() {
        return this.caseSensitive;
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public Schema expectedSchema() {
        return this.expectedSchema;
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public List<Expression> filterExpressions() {
        return this.filterExpressions;
    }

    protected Types.StructType groupingKeyType() {
        return Types.StructType.of(new Types.NestedField[0]);
    }

    protected abstract List<? extends ScanTaskGroup<?>> taskGroups();

    public Batch toBatch() {
        return new SparkBatch(this.sparkContext, this.table, this.readConf, groupingKeyType(), taskGroups(), this.expectedSchema, hashCode());
    }

    public MicroBatchStream toMicroBatchStream(String str) {
        return new SparkMicroBatchStream(this.sparkContext, this.table, this.readConf, this.expectedSchema, str);
    }

    public StructType readSchema() {
        if (this.readSchema == null) {
            this.readSchema = SparkSchemaUtil.convert(this.expectedSchema);
        }
        return this.readSchema;
    }

    public Statistics estimateStatistics() {
        return estimateStatistics(SnapshotUtil.latestSnapshot(this.table, this.branch));
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public Statistics estimateStatistics(Snapshot snapshot) {
        if (snapshot == null) {
            return new Stats(0L, 0L, Collections.emptyMap());
        }
        boolean parseBoolean = Boolean.parseBoolean(this.spark.conf().get(SQLConf.CBO_ENABLED().key(), "false"));
        Map emptyMap = Collections.emptyMap();
        if (this.readConf.reportColumnStats() && parseBoolean) {
            emptyMap = Maps.newHashMap();
            List<StatisticsFile> statisticsFiles = this.table.statisticsFiles();
            if (!statisticsFiles.isEmpty()) {
                for (Map.Entry entry : ((Map) statisticsFiles.get(0).blobMetadata().stream().collect(Collectors.groupingBy(blobMetadata -> {
                    return blobMetadata.fields().get(0);
                }, Collectors.toList()))).entrySet()) {
                    String findColumnName = this.table.schema().findColumnName(((Integer) entry.getKey()).intValue());
                    NamedReference column = FieldReference.column(findColumnName);
                    Long l = null;
                    for (BlobMetadata blobMetadata2 : (List) entry.getValue()) {
                        if (blobMetadata2.type().equals(StandardBlobTypes.APACHE_DATASKETCHES_THETA_V1)) {
                            String str = blobMetadata2.properties().get("ndv");
                            if (Strings.isNullOrEmpty(str)) {
                                LOG.debug("{} is not set in BlobMetadata for column {}", "ndv", findColumnName);
                            } else {
                                l = Long.valueOf(Long.parseLong(str));
                            }
                        } else {
                            LOG.debug("Blob type {} is not supported yet", blobMetadata2.type());
                        }
                    }
                    emptyMap.put(column, new SparkColumnStatistics(l, null, null, null, null, null, null));
                }
            }
        }
        if (this.table.spec().isUnpartitioned() || !this.filterExpressions.isEmpty()) {
            long sum = taskGroups().stream().mapToLong((v0) -> {
                return v0.estimatedRowsCount();
            }).sum();
            return new Stats(SparkSchemaUtil.estimateSize(readSchema(), sum), sum, emptyMap);
        }
        LOG.debug("Using snapshot {} metadata to estimate statistics for table {}", Long.valueOf(snapshot.snapshotId()), this.table.name());
        long j = totalRecords(snapshot);
        return new Stats(SparkSchemaUtil.estimateSize(readSchema(), j), j, emptyMap);
    }

    private long totalRecords(Snapshot snapshot) {
        return PropertyUtil.propertyAsLong(snapshot.summary(), "total-records", TableProperties.MAX_REF_AGE_MS_DEFAULT);
    }

    public String description() {
        return String.format("%s (branch=%s) [filters=%s, groupedBy=%s]", table(), branch(), Spark3Util.describe(this.filterExpressions), (String) groupingKeyType().fields().stream().map((v0) -> {
            return v0.name();
        }).collect(Collectors.joining(", ")));
    }

    public CustomTaskMetric[] reportDriverMetrics() {
        ScanReport scanReport = this.scanReportSupplier != null ? this.scanReportSupplier.get() : null;
        if (scanReport == null) {
            return new CustomTaskMetric[0];
        }
        ArrayList newArrayList = Lists.newArrayList();
        newArrayList.add(TaskTotalPlanningDuration.from(scanReport));
        newArrayList.add(TaskTotalDataManifests.from(scanReport));
        newArrayList.add(TaskScannedDataManifests.from(scanReport));
        newArrayList.add(TaskSkippedDataManifests.from(scanReport));
        newArrayList.add(TaskResultDataFiles.from(scanReport));
        newArrayList.add(TaskSkippedDataFiles.from(scanReport));
        newArrayList.add(TaskTotalDataFileSize.from(scanReport));
        newArrayList.add(TaskTotalDeleteManifests.from(scanReport));
        newArrayList.add(TaskScannedDeleteManifests.from(scanReport));
        newArrayList.add(TaskSkippedDeleteManifests.from(scanReport));
        newArrayList.add(TaskTotalDeleteFileSize.from(scanReport));
        newArrayList.add(TaskResultDeleteFiles.from(scanReport));
        newArrayList.add(TaskEqualityDeleteFiles.from(scanReport));
        newArrayList.add(TaskIndexedDeleteFiles.from(scanReport));
        newArrayList.add(TaskPositionalDeleteFiles.from(scanReport));
        newArrayList.add(TaskSkippedDeleteFiles.from(scanReport));
        return (CustomTaskMetric[]) newArrayList.toArray(new CustomTaskMetric[0]);
    }

    public CustomMetric[] supportedCustomMetrics() {
        return new CustomMetric[]{new NumSplits(), new NumDeletes(), new TotalPlanningDuration(), new TotalDataManifests(), new ScannedDataManifests(), new SkippedDataManifests(), new ResultDataFiles(), new SkippedDataFiles(), new TotalDataFileSize(), new TotalDeleteManifests(), new ScannedDeleteManifests(), new SkippedDeleteManifests(), new TotalDeleteFileSize(), new ResultDeleteFiles(), new EqualityDeleteFiles(), new IndexedDeleteFiles(), new PositionalDeleteFiles(), new SkippedDeleteFiles()};
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public long adjustSplitSize(List<? extends ScanTask> list, long j) {
        return (this.readConf.splitSizeOption() == null && this.readConf.adaptiveSplitSizeEnabled()) ? TableScanUtil.adjustSplitSize(list.stream().mapToLong((v0) -> {
            return v0.sizeBytes();
        }).sum(), this.readConf.parallelism(), j) : j;
    }
}
