/*
 * Decompiled with CFR 0.152.
 */
package org.talend.dataprep.dataset;

import java.text.DecimalFormat;
import java.text.NumberFormat;
import java.util.Iterator;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Optional;
import java.util.function.Predicate;
import java.util.stream.Collectors;
import org.apache.commons.lang.StringUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.talend.dataprep.api.dataset.ColumnMetadata;
import org.talend.dataprep.api.dataset.Quality;
import org.talend.dataprep.api.dataset.statistics.DataFrequency;
import org.talend.dataprep.api.dataset.statistics.Histogram;
import org.talend.dataprep.api.dataset.statistics.HistogramRange;
import org.talend.dataprep.api.dataset.statistics.PatternFrequency;
import org.talend.dataprep.api.dataset.statistics.Quantiles;
import org.talend.dataprep.api.dataset.statistics.SemanticDomain;
import org.talend.dataprep.api.dataset.statistics.Statistics;
import org.talend.dataprep.api.dataset.statistics.TextLengthSummary;
import org.talend.dataprep.api.dataset.statistics.date.DateHistogram;
import org.talend.dataprep.api.dataset.statistics.date.StreamDateHistogramStatistics;
import org.talend.dataprep.api.dataset.statistics.number.NumberHistogram;
import org.talend.dataprep.api.dataset.statistics.number.StreamNumberHistogramStatistics;
import org.talend.dataprep.api.type.Type;
import org.talend.dataprep.api.type.TypeUtils;
import org.talend.dataquality.common.inference.Analyzers;
import org.talend.dataquality.common.inference.ValueQualityStatistics;
import org.talend.dataquality.semantic.api.CategoryRegistryManager;
import org.talend.dataquality.semantic.model.DQCategory;
import org.talend.dataquality.semantic.recognizer.CategoryFrequency;
import org.talend.dataquality.semantic.statistics.SemanticType;
import org.talend.dataquality.statistics.cardinality.CardinalityStatistics;
import org.talend.dataquality.statistics.frequency.DataTypeFrequencyStatistics;
import org.talend.dataquality.statistics.frequency.pattern.PatternFrequencyStatistics;
import org.talend.dataquality.statistics.numeric.histogram.Range;
import org.talend.dataquality.statistics.numeric.quantile.QuantileStatistics;
import org.talend.dataquality.statistics.numeric.summary.SummaryStatistics;
import org.talend.dataquality.statistics.text.TextLengthStatistics;
import org.talend.dataquality.statistics.type.DataTypeEnum;
import org.talend.dataquality.statistics.type.DataTypeOccurences;

public class StatisticsAdapter {
    private static final Logger LOGGER = LoggerFactory.getLogger(StatisticsAdapter.class);
    private final int semanticThreshold;

    public StatisticsAdapter(int semanticThreshold) {
        this.semanticThreshold = semanticThreshold;
    }

    public void adapt(List<ColumnMetadata> columns, List<Analyzers.Result> results) {
        this.adapt(columns, results, c -> true);
    }

    public void adapt(List<ColumnMetadata> columns, List<Analyzers.Result> results, Predicate<ColumnMetadata> filter) {
        this.genericAdapt(columns, results, filter);
    }

    private void genericAdapt(List<ColumnMetadata> columns, List<Analyzers.Result> results, Predicate<ColumnMetadata> filter) {
        Iterator<Analyzers.Result> resultIterator = results.iterator();
        columns.stream().filter(filter).forEach(c -> {
            if (resultIterator.hasNext()) {
                Analyzers.Result result = (Analyzers.Result)resultIterator.next();
                this.injectDataTypeAnalysis((ColumnMetadata)c, result);
                this.adaptCommonAnalysis((ColumnMetadata)c, result);
            }
        });
    }

    private void adaptCommonAnalysis(ColumnMetadata currentColumn, Analyzers.Result result) {
        this.injectSemanticTypes(currentColumn, result);
        this.injectCardinality(currentColumn, result);
        this.injectDataFrequency(currentColumn, result);
        this.injectPatternFrequency(currentColumn, result);
        this.injectQuantile(currentColumn, result);
        this.injectNumberSummary(currentColumn, result);
        this.injectTextLength(currentColumn, result);
        this.injectNumberHistogram(currentColumn, result);
        this.injectDateHistogram(currentColumn, result);
    }

    private void injectDataTypeAnalysis(ColumnMetadata column, Analyzers.Result result) {
        if (result.exist(DataTypeOccurences.class) && !column.isTypeForced()) {
            DataTypeOccurences dataType = (DataTypeOccurences)result.get(DataTypeOccurences.class);
            DataTypeEnum suggestedEnumType = dataType.getSuggestedType();
            Type suggestedColumnType = Type.get(suggestedEnumType.name());
            column.setType(suggestedColumnType.getName());
        }
        this.injectValueQuality(column, result);
    }

    private void injectValueQuality(ColumnMetadata column, Analyzers.Result result) {
        if (result.exist(ValueQualityStatistics.class)) {
            Statistics statistics = column.getStatistics();
            Quality quality = column.getQuality();
            ValueQualityStatistics valueQualityStatistics = (ValueQualityStatistics)result.get(ValueQualityStatistics.class);
            long allCount = valueQualityStatistics.getCount();
            long emptyCount = valueQualityStatistics.getEmptyCount();
            long validCount = valueQualityStatistics.getValidCount();
            long invalidCount = allCount - emptyCount - validCount;
            quality.setEmpty((int)emptyCount);
            quality.setValid((int)validCount);
            quality.setInvalid((int)invalidCount);
            statistics.setCount(allCount);
            statistics.setEmpty((int)emptyCount);
            statistics.setInvalid((int)invalidCount);
            statistics.setValid(validCount);
        }
    }

    private void injectSemanticTypes(ColumnMetadata column, Analyzers.Result result) {
        if (result.exist(SemanticType.class) && !column.isDomainForced()) {
            SemanticType semanticType = (SemanticType)result.get(SemanticType.class);
            List suggestedTypes = semanticType.getSuggestedCategories();
            Optional<CategoryFrequency> bestMatch = suggestedTypes.stream().filter(e -> !e.getCategoryName().isEmpty()).findFirst();
            if (bestMatch.isPresent()) {
                float score = bestMatch.get().getScore();
                if (score > (float)this.semanticThreshold) {
                    this.updateMetadataWithCategoryInfo(column, bestMatch.get());
                } else {
                    this.resetDomain(column);
                }
            } else if (StringUtils.isNotEmpty((String)column.getDomain())) {
                this.resetDomain(column);
            }
            List<SemanticDomain> semanticDomains = suggestedTypes.stream().map(this::toSemanticDomain).filter(semanticDomain -> semanticDomain != null && semanticDomain.getScore() >= 1.0f).limit(10L).collect(Collectors.toList());
            column.setSemanticDomains(semanticDomains);
        }
    }

    private void updateMetadataWithCategoryInfo(ColumnMetadata column, CategoryFrequency categoryFrequency) {
        String categoryId = categoryFrequency.getCategoryId();
        DQCategory categoryMetadataByName = CategoryRegistryManager.getInstance().getCategoryMetadataByName(categoryId);
        if (categoryMetadataByName == null) {
            LOGGER.error("Could not find {} in known categories.", (Object)categoryId);
            column.setDomain(categoryId);
            column.setDomainLabel(categoryId);
        } else {
            column.setDomain(categoryMetadataByName.getName());
            column.setDomainLabel(categoryMetadataByName.getLabel());
        }
        column.setDomainFrequency(categoryFrequency.getScore());
    }

    private void resetDomain(ColumnMetadata column) {
        column.setDomain("");
        column.setDomainLabel("");
        column.setDomainFrequency(0.0f);
    }

    private SemanticDomain toSemanticDomain(CategoryFrequency categoryFrequency) {
        String id = categoryFrequency.getCategoryId();
        if (StringUtils.isEmpty((String)id)) {
            return null;
        }
        String categoryDisplayName = TypeUtils.getDomainLabel(id);
        return new SemanticDomain(id, categoryDisplayName, categoryFrequency.getScore());
    }

    private void injectCardinality(ColumnMetadata column, Analyzers.Result result) {
        if (result.exist(CardinalityStatistics.class)) {
            Statistics statistics = column.getStatistics();
            CardinalityStatistics cardinalityStatistics = (CardinalityStatistics)result.get(CardinalityStatistics.class);
            statistics.setDistinctCount(cardinalityStatistics.getDistinctCount());
            statistics.setDuplicateCount(cardinalityStatistics.getDuplicateCount());
        }
    }

    private void injectDataFrequency(ColumnMetadata column, Analyzers.Result result) {
        if (result.exist(DataTypeFrequencyStatistics.class)) {
            Statistics statistics = column.getStatistics();
            DataTypeFrequencyStatistics dataFrequencyStatistics = (DataTypeFrequencyStatistics)result.get(DataTypeFrequencyStatistics.class);
            Map topTerms = dataFrequencyStatistics.getTopK(15);
            if (topTerms != null) {
                statistics.getDataFrequencies().clear();
                topTerms.forEach((s, o) -> statistics.getDataFrequencies().add(new DataFrequency((String)s, (long)o)));
            }
        }
    }

    private void injectPatternFrequency(ColumnMetadata column, Analyzers.Result result) {
        if (result.exist(PatternFrequencyStatistics.class)) {
            Statistics statistics = column.getStatistics();
            PatternFrequencyStatistics patternFrequencyStatistics = (PatternFrequencyStatistics)result.get(PatternFrequencyStatistics.class);
            Map topTerms = patternFrequencyStatistics.getTopK(15);
            if (topTerms != null) {
                statistics.getPatternFrequencies().clear();
                topTerms.forEach((s, o) -> statistics.getPatternFrequencies().add(new PatternFrequency((String)s, (long)o)));
            }
        }
    }

    private void injectQuantile(ColumnMetadata column, Analyzers.Result result) {
        if (result.exist(QuantileStatistics.class)) {
            try {
                QuantileStatistics quantileStatistics = (QuantileStatistics)result.get(QuantileStatistics.class);
                quantileStatistics.endAddValue();
                Quantiles quantiles = column.getStatistics().getQuantiles();
                quantiles.setLowerQuantile(quantileStatistics.getLowerQuartile());
                quantiles.setMedian(quantileStatistics.getMedian());
                quantiles.setUpperQuantile(quantileStatistics.getUpperQuartile());
            }
            catch (Exception e) {
                LOGGER.warn("Unable to inject quantile information in column {}.", (Object)column.getName());
                LOGGER.debug("Unable to inject quantile information in column {}.", (Throwable)e);
            }
        }
    }

    private void injectNumberSummary(ColumnMetadata column, Analyzers.Result result) {
        if (result.exist(SummaryStatistics.class)) {
            Statistics statistics = column.getStatistics();
            SummaryStatistics summaryStatistics = (SummaryStatistics)result.get(SummaryStatistics.class);
            statistics.setMean(summaryStatistics.getMean());
            statistics.setVariance(summaryStatistics.getVariance());
            if (Type.DATE.isAssignableFrom(column.getType()) && result.exist(StreamDateHistogramStatistics.class)) {
                DateHistogram histogram = (DateHistogram)((StreamDateHistogramStatistics)result.get(StreamDateHistogramStatistics.class)).getHistogram();
                statistics.setMax(histogram.getMaxUTCEpochMilliseconds());
                statistics.setMin(histogram.getMinUTCEpochMilliseconds());
            } else {
                statistics.setMax(summaryStatistics.getMax());
                statistics.setMin(summaryStatistics.getMin());
            }
        }
    }

    private void injectNumberHistogram(ColumnMetadata column, Analyzers.Result result) {
        if (Type.NUMERIC.isAssignableFrom(column.getType()) && result.exist(StreamNumberHistogramStatistics.class)) {
            Statistics statistics = column.getStatistics();
            Map<Range, Long> histogramStatistics = ((StreamNumberHistogramStatistics)result.get(StreamNumberHistogramStatistics.class)).getHistogram();
            NumberFormat format = DecimalFormat.getInstance(Locale.US);
            NumberHistogram histogram = new NumberHistogram();
            histogramStatistics.forEach((rangeValues, occurrence) -> {
                HistogramRange range = new HistogramRange();
                try {
                    range.getRange().setMax(new Double(format.format(rangeValues.getUpper())));
                    range.getRange().setMin(new Double(format.format(rangeValues.getLower())));
                }
                catch (NumberFormatException e) {
                    range.getRange().setMax(rangeValues.getUpper());
                    range.getRange().setMin(rangeValues.getLower());
                }
                range.setOccurrences((long)occurrence);
                histogram.getItems().add(range);
            });
            statistics.setHistogram(histogram);
        }
    }

    private void injectDateHistogram(ColumnMetadata column, Analyzers.Result result) {
        if (Type.DATE.isAssignableFrom(column.getType()) && result.exist(StreamDateHistogramStatistics.class)) {
            Histogram histogram = ((StreamDateHistogramStatistics)result.get(StreamDateHistogramStatistics.class)).getHistogram();
            column.getStatistics().setHistogram(histogram);
        }
    }

    private void injectTextLength(ColumnMetadata column, Analyzers.Result result) {
        if (Type.STRING.equals(Type.get(column.getType())) && result.exist(TextLengthStatistics.class)) {
            TextLengthStatistics textLengthStatistics = (TextLengthStatistics)result.get(TextLengthStatistics.class);
            TextLengthSummary textLengthSummary = column.getStatistics().getTextLengthSummary();
            textLengthSummary.setAverageLength(textLengthStatistics.getAvgTextLength());
            textLengthSummary.setMinimalLength(textLengthStatistics.getMinTextLength().intValue());
            textLengthSummary.setMaximalLength(textLengthStatistics.getMaxTextLength().intValue());
        }
    }
}

