package org.talend.dataprep.dataset;

import java.text.DecimalFormat;
import java.text.NumberFormat;
import java.util.Iterator;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Optional;
import java.util.function.Predicate;
import java.util.stream.Collectors;
import org.apache.commons.lang.StringUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.talend.dataprep.api.dataset.ColumnMetadata;
import org.talend.dataprep.api.dataset.Quality;
import org.talend.dataprep.api.dataset.statistics.DataFrequency;
import org.talend.dataprep.api.dataset.statistics.HistogramRange;
import org.talend.dataprep.api.dataset.statistics.PatternFrequency;
import org.talend.dataprep.api.dataset.statistics.Quantiles;
import org.talend.dataprep.api.dataset.statistics.SemanticDomain;
import org.talend.dataprep.api.dataset.statistics.Statistics;
import org.talend.dataprep.api.dataset.statistics.TextLengthSummary;
import org.talend.dataprep.api.dataset.statistics.date.DateHistogram;
import org.talend.dataprep.api.dataset.statistics.date.StreamDateHistogramStatistics;
import org.talend.dataprep.api.dataset.statistics.number.NumberHistogram;
import org.talend.dataprep.api.dataset.statistics.number.StreamNumberHistogramStatistics;
import org.talend.dataprep.api.type.Type;
import org.talend.dataprep.api.type.TypeUtils;
import org.talend.dataquality.common.inference.Analyzers;
import org.talend.dataquality.common.inference.ValueQualityStatistics;
import org.talend.dataquality.semantic.classifier.SemanticCategoryEnum;
import org.talend.dataquality.semantic.recognizer.CategoryFrequency;
import org.talend.dataquality.semantic.statistics.SemanticType;
import org.talend.dataquality.statistics.cardinality.CardinalityStatistics;
import org.talend.dataquality.statistics.frequency.DataTypeFrequencyStatistics;
import org.talend.dataquality.statistics.frequency.pattern.PatternFrequencyStatistics;
import org.talend.dataquality.statistics.numeric.histogram.Range;
import org.talend.dataquality.statistics.numeric.quantile.QuantileStatistics;
import org.talend.dataquality.statistics.numeric.summary.SummaryStatistics;
import org.talend.dataquality.statistics.text.TextLengthStatistics;
import org.talend.dataquality.statistics.type.DataTypeOccurences;

/* loaded from: input_file:org/talend/dataprep/dataset/StatisticsAdapter.class */
public class StatisticsAdapter {
    private static final Logger LOGGER = LoggerFactory.getLogger(StatisticsAdapter.class);
    private final int semanticThreshold;

    public StatisticsAdapter(int i) {
        this.semanticThreshold = i;
    }

    public void adapt(List<ColumnMetadata> list, List<Analyzers.Result> list2) {
        adapt(list, list2, columnMetadata -> {
            return true;
        });
    }

    public void adapt(List<ColumnMetadata> list, List<Analyzers.Result> list2, Predicate<ColumnMetadata> predicate) {
        genericAdapt(list, list2, predicate);
    }

    private void genericAdapt(List<ColumnMetadata> list, List<Analyzers.Result> list2, Predicate<ColumnMetadata> predicate) {
        Iterator<Analyzers.Result> it = list2.iterator();
        list.stream().filter(predicate).forEach(columnMetadata -> {
            if (it.hasNext()) {
                Analyzers.Result result = (Analyzers.Result) it.next();
                injectDataTypeAnalysis(columnMetadata, result);
                adaptCommonAnalysis(columnMetadata, result);
            }
        });
    }

    private void adaptCommonAnalysis(ColumnMetadata columnMetadata, Analyzers.Result result) {
        injectSemanticTypes(columnMetadata, result);
        injectCardinality(columnMetadata, result);
        injectDataFrequency(columnMetadata, result);
        injectPatternFrequency(columnMetadata, result);
        injectQuantile(columnMetadata, result);
        injectNumberSummary(columnMetadata, result);
        injectTextLength(columnMetadata, result);
        injectNumberHistogram(columnMetadata, result);
        injectDateHistogram(columnMetadata, result);
    }

    private void injectDataTypeAnalysis(ColumnMetadata columnMetadata, Analyzers.Result result) {
        if (result.exist(DataTypeOccurences.class) && !columnMetadata.isTypeForced()) {
            columnMetadata.setType(Type.get(((DataTypeOccurences) result.get(DataTypeOccurences.class)).getSuggestedType().name()).getName());
        }
        injectValueQuality(columnMetadata, result);
    }

    private void injectValueQuality(ColumnMetadata columnMetadata, Analyzers.Result result) {
        if (result.exist(ValueQualityStatistics.class)) {
            Statistics statistics = columnMetadata.getStatistics();
            Quality quality = columnMetadata.getQuality();
            ValueQualityStatistics valueQualityStatistics = (ValueQualityStatistics) result.get(ValueQualityStatistics.class);
            long count = valueQualityStatistics.getCount();
            long emptyCount = valueQualityStatistics.getEmptyCount();
            long validCount = valueQualityStatistics.getValidCount();
            quality.setEmpty((int) emptyCount);
            quality.setValid((int) validCount);
            quality.setInvalid((int) ((count - emptyCount) - validCount));
            statistics.setCount(count);
            statistics.setEmpty((int) emptyCount);
            statistics.setInvalid((int) r0);
            statistics.setValid(validCount);
        }
    }

    private void injectSemanticTypes(ColumnMetadata columnMetadata, Analyzers.Result result) {
        if (!result.exist(SemanticType.class) || columnMetadata.isDomainForced()) {
            return;
        }
        SemanticType semanticType = (SemanticType) result.get(SemanticType.class);
        Optional max = semanticType.getCategoryToCount().entrySet().stream().filter(entry -> {
            return !((CategoryFrequency) entry.getKey()).getCategoryName().isEmpty();
        }).max((entry2, entry3) -> {
            return ((CategoryFrequency) entry2.getKey()).compareTo((CategoryFrequency) entry3.getKey());
        });
        if (max.isPresent()) {
            float frequency = ((CategoryFrequency) ((Map.Entry) max.get()).getKey()).getFrequency();
            if (frequency > this.semanticThreshold) {
                String categoryId = ((CategoryFrequency) ((Map.Entry) max.get()).getKey()).getCategoryId();
                try {
                    SemanticCategoryEnum valueOf = SemanticCategoryEnum.valueOf(categoryId);
                    columnMetadata.setDomain(valueOf.getId());
                    columnMetadata.setDomainLabel(valueOf.getDisplayName());
                    columnMetadata.setDomainFrequency(frequency);
                } catch (IllegalArgumentException e) {
                    LOGGER.error("Could not find {} in known categories.", categoryId, e);
                }
            } else {
                columnMetadata.setDomain("");
                columnMetadata.setDomainLabel("");
                columnMetadata.setDomainFrequency(0.0f);
            }
        } else if (!StringUtils.isEmpty(columnMetadata.getDomain())) {
            columnMetadata.setDomain("");
            columnMetadata.setDomainLabel("");
            columnMetadata.setDomainFrequency(0.0f);
        }
        columnMetadata.setSemanticDomains((List) semanticType.getCategoryToCount().entrySet().stream().map(entry4 -> {
            String categoryId2 = ((CategoryFrequency) entry4.getKey()).getCategoryId();
            if (StringUtils.isEmpty(categoryId2)) {
                return null;
            }
            return new SemanticDomain(categoryId2, TypeUtils.getDomainLabel(categoryId2), ((CategoryFrequency) entry4.getKey()).getFrequency());
        }).filter(semanticDomain -> {
            return semanticDomain != null && semanticDomain.getFrequency() >= 1.0f;
        }).limit(10L).collect(Collectors.toList()));
    }

    private void injectCardinality(ColumnMetadata columnMetadata, Analyzers.Result result) {
        if (result.exist(CardinalityStatistics.class)) {
            Statistics statistics = columnMetadata.getStatistics();
            CardinalityStatistics cardinalityStatistics = (CardinalityStatistics) result.get(CardinalityStatistics.class);
            statistics.setDistinctCount(cardinalityStatistics.getDistinctCount());
            statistics.setDuplicateCount(cardinalityStatistics.getDuplicateCount());
        }
    }

    private void injectDataFrequency(ColumnMetadata columnMetadata, Analyzers.Result result) {
        if (result.exist(DataTypeFrequencyStatistics.class)) {
            Statistics statistics = columnMetadata.getStatistics();
            Map topK = ((DataTypeFrequencyStatistics) result.get(DataTypeFrequencyStatistics.class)).getTopK(15);
            if (topK != null) {
                statistics.getDataFrequencies().clear();
                topK.forEach((str, l) -> {
                    statistics.getDataFrequencies().add(new DataFrequency(str, l.longValue()));
                });
            }
        }
    }

    private void injectPatternFrequency(ColumnMetadata columnMetadata, Analyzers.Result result) {
        if (result.exist(PatternFrequencyStatistics.class)) {
            Statistics statistics = columnMetadata.getStatistics();
            Map topK = ((PatternFrequencyStatistics) result.get(PatternFrequencyStatistics.class)).getTopK(15);
            if (topK != null) {
                statistics.getPatternFrequencies().clear();
                topK.forEach((str, l) -> {
                    statistics.getPatternFrequencies().add(new PatternFrequency(str, l.longValue()));
                });
            }
        }
    }

    private void injectQuantile(ColumnMetadata columnMetadata, Analyzers.Result result) {
        if (result.exist(QuantileStatistics.class)) {
            try {
                QuantileStatistics quantileStatistics = (QuantileStatistics) result.get(QuantileStatistics.class);
                quantileStatistics.endAddValue();
                Quantiles quantiles = columnMetadata.getStatistics().getQuantiles();
                quantiles.setLowerQuantile(quantileStatistics.getLowerQuartile());
                quantiles.setMedian(quantileStatistics.getMedian());
                quantiles.setUpperQuantile(quantileStatistics.getUpperQuartile());
            } catch (Exception e) {
                LOGGER.warn("Unable to inject quantile information in column {}.", columnMetadata.getName());
                LOGGER.debug("Unable to inject quantile information in column {}.", e);
            }
        }
    }

    private void injectNumberSummary(ColumnMetadata columnMetadata, Analyzers.Result result) {
        if (result.exist(SummaryStatistics.class)) {
            Statistics statistics = columnMetadata.getStatistics();
            SummaryStatistics summaryStatistics = (SummaryStatistics) result.get(SummaryStatistics.class);
            statistics.setMean(summaryStatistics.getMean());
            statistics.setVariance(summaryStatistics.getVariance());
            if (!Type.DATE.isAssignableFrom(columnMetadata.getType()) || !result.exist(StreamDateHistogramStatistics.class)) {
                statistics.setMax(summaryStatistics.getMax());
                statistics.setMin(summaryStatistics.getMin());
            } else {
                DateHistogram dateHistogram = (DateHistogram) ((StreamDateHistogramStatistics) result.get(StreamDateHistogramStatistics.class)).getHistogram();
                statistics.setMax(dateHistogram.getMaxUTCEpochMilliseconds());
                statistics.setMin(dateHistogram.getMinUTCEpochMilliseconds());
            }
        }
    }

    private void injectNumberHistogram(ColumnMetadata columnMetadata, Analyzers.Result result) {
        if (Type.NUMERIC.isAssignableFrom(columnMetadata.getType()) && result.exist(StreamNumberHistogramStatistics.class)) {
            Statistics statistics = columnMetadata.getStatistics();
            Map<Range, Long> histogram = ((StreamNumberHistogramStatistics) result.get(StreamNumberHistogramStatistics.class)).getHistogram();
            NumberFormat decimalFormat = DecimalFormat.getInstance(Locale.ENGLISH);
            NumberHistogram numberHistogram = new NumberHistogram();
            histogram.forEach((range, l) -> {
                HistogramRange histogramRange = new HistogramRange();
                try {
                    histogramRange.getRange().setMax(new Double(decimalFormat.format(range.getUpper())).doubleValue());
                    histogramRange.getRange().setMin(new Double(decimalFormat.format(range.getLower())).doubleValue());
                } catch (NumberFormatException e) {
                    histogramRange.getRange().setMax(range.getUpper());
                    histogramRange.getRange().setMin(range.getLower());
                }
                histogramRange.setOccurrences(l.longValue());
                numberHistogram.getItems().add(histogramRange);
            });
            statistics.setHistogram(numberHistogram);
        }
    }

    private void injectDateHistogram(ColumnMetadata columnMetadata, Analyzers.Result result) {
        if (Type.DATE.isAssignableFrom(columnMetadata.getType()) && result.exist(StreamDateHistogramStatistics.class)) {
            columnMetadata.getStatistics().setHistogram(((StreamDateHistogramStatistics) result.get(StreamDateHistogramStatistics.class)).getHistogram());
        }
    }

    private void injectTextLength(ColumnMetadata columnMetadata, Analyzers.Result result) {
        if (Type.STRING.equals(Type.get(columnMetadata.getType())) && result.exist(TextLengthStatistics.class)) {
            TextLengthStatistics textLengthStatistics = (TextLengthStatistics) result.get(TextLengthStatistics.class);
            TextLengthSummary textLengthSummary = columnMetadata.getStatistics().getTextLengthSummary();
            textLengthSummary.setAverageLength(textLengthStatistics.getAvgTextLength().doubleValue());
            textLengthSummary.setMinimalLength(textLengthStatistics.getMinTextLength().intValue());
            textLengthSummary.setMaximalLength(textLengthStatistics.getMaxTextLength().intValue());
        }
    }
}
