package org.talend.dataquality.semantic.statistics;

import dk.brics.automaton.Automaton;
import java.io.IOException;
import java.util.ArrayDeque;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import org.apache.commons.collections.CollectionUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.talend.dataquality.common.inference.QualityAnalyzer;
import org.talend.dataquality.common.inference.ResizableList;
import org.talend.dataquality.common.inference.ValueQualityStatistics;
import org.talend.dataquality.common.util.LFUCache;
import org.talend.dataquality.semantic.classifier.ISubCategoryClassifier;
import org.talend.dataquality.semantic.classifier.SemanticCategoryEnum;
import org.talend.dataquality.semantic.classifier.impl.DataDictFieldClassifier;
import org.talend.dataquality.semantic.model.CategoryType;
import org.talend.dataquality.semantic.model.DQCategory;
import org.talend.dataquality.semantic.recognizer.DefaultCategoryRecognizer;
import org.talend.dataquality.semantic.snapshot.DictionarySnapshot;

/* loaded from: input_file:org/talend/dataquality/semantic/statistics/SemanticQualityAnalyzer.class */
public class SemanticQualityAnalyzer extends QualityAnalyzer<ValueQualityStatistics, String[]> {
    private static final long serialVersionUID = -5951511723860660263L;
    private static final Logger LOGGER = LoggerFactory.getLogger(SemanticQualityAnalyzer.class);
    private final ResizableList<ValueQualityStatistics> results;
    private final Map<String, LFUCache<String, Boolean>> knownValidationCategoryCache;
    private DictionarySnapshot dictionarySnapshot;
    private ISubCategoryClassifier regexClassifier;
    private ISubCategoryClassifier dataDictClassifier;

    /* JADX INFO: Access modifiers changed from: package-private */
    /* renamed from: org.talend.dataquality.semantic.statistics.SemanticQualityAnalyzer$1, reason: invalid class name */
    /* loaded from: input_file:org/talend/dataquality/semantic/statistics/SemanticQualityAnalyzer$1.class */
    public static /* synthetic */ class AnonymousClass1 {
        static final /* synthetic */ int[] $SwitchMap$org$talend$dataquality$semantic$model$CategoryType = new int[CategoryType.values().length];

        static {
            try {
                $SwitchMap$org$talend$dataquality$semantic$model$CategoryType[CategoryType.REGEX.ordinal()] = 1;
            } catch (NoSuchFieldError e) {
            }
            try {
                $SwitchMap$org$talend$dataquality$semantic$model$CategoryType[CategoryType.DICT.ordinal()] = 2;
            } catch (NoSuchFieldError e2) {
            }
            try {
                $SwitchMap$org$talend$dataquality$semantic$model$CategoryType[CategoryType.COMPOUND.ordinal()] = 3;
            } catch (NoSuchFieldError e3) {
            }
        }
    }

    public SemanticQualityAnalyzer(DictionarySnapshot dictionarySnapshot, String[] strArr, boolean z) {
        this.results = new ResizableList<>(ValueQualityStatistics.class);
        this.knownValidationCategoryCache = new HashMap();
        this.dictionarySnapshot = dictionarySnapshot;
        this.isStoreInvalidValues = z;
        init();
        setTypes(strArr);
    }

    public SemanticQualityAnalyzer(DictionarySnapshot dictionarySnapshot, String... strArr) {
        this(dictionarySnapshot, strArr, false);
    }

    public void setTypes(String[] strArr) {
        ArrayList arrayList = new ArrayList();
        for (String str : strArr) {
            DQCategory dQCategory = null;
            Iterator<DQCategory> it = this.dictionarySnapshot.getMetadata().values().iterator();
            while (true) {
                if (!it.hasNext()) {
                    break;
                }
                DQCategory next = it.next();
                if (str.equals(next.getName())) {
                    next.setChildren(getChildrenCategories(next.getId()));
                    dQCategory = next;
                    break;
                }
            }
            if (dQCategory == null) {
                arrayList.add(SemanticCategoryEnum.UNKNOWN.name());
            } else {
                arrayList.add(dQCategory.getId());
            }
        }
        super.setTypes(arrayList.toArray(new String[arrayList.size()]));
    }

    public void init() {
        try {
            DefaultCategoryRecognizer defaultCategoryRecognizer = new DefaultCategoryRecognizer(this.dictionarySnapshot);
            this.regexClassifier = defaultCategoryRecognizer.getUserDefineClassifier();
            this.dataDictClassifier = defaultCategoryRecognizer.getDataDictFieldClassifier();
        } catch (IOException e) {
            LOGGER.error(e.getMessage(), e);
        }
        this.results.clear();
    }

    public void setStoreInvalidValues(boolean z) {
        this.isStoreInvalidValues = z;
    }

    public boolean analyze(String... strArr) {
        if (strArr == null) {
            this.results.resize(0);
            return true;
        }
        this.results.resize(strArr.length);
        for (int i = 0; i < strArr.length; i++) {
            String str = ((String[]) getTypes())[i];
            String str2 = strArr[i];
            ValueQualityStatistics valueQualityStatistics = (ValueQualityStatistics) this.results.get(i);
            if (str2 == null || str2.trim().length() == 0) {
                valueQualityStatistics.incrementEmpty();
            } else {
                analyzeValue(str, str2, valueQualityStatistics);
            }
        }
        return true;
    }

    private void analyzeValue(String str, String str2, ValueQualityStatistics valueQualityStatistics) {
        DQCategory dQCategory = this.dictionarySnapshot.getMetadata().get(str);
        if (dQCategory == null) {
            valueQualityStatistics.incrementValid();
            return;
        }
        if (dQCategory.getCompleteness() == null || !dQCategory.getCompleteness().booleanValue()) {
            valueQualityStatistics.incrementValid();
        } else if (!Boolean.TRUE.equals(dQCategory.getDeleted()) && isValid(dQCategory, str2)) {
            valueQualityStatistics.incrementValid();
        } else {
            valueQualityStatistics.incrementInvalid();
            processInvalidValue(valueQualityStatistics, str2);
        }
    }

    public boolean isValid(DQCategory dQCategory, String str) {
        LFUCache<String, Boolean> lFUCache = this.knownValidationCategoryCache.get(dQCategory.getId());
        if (lFUCache == null) {
            lFUCache = new LFUCache<>(10, 1000, 0.01f);
            this.knownValidationCategoryCache.put(dQCategory.getId(), lFUCache);
        } else {
            Boolean bool = (Boolean) lFUCache.get(str);
            if (bool != null) {
                return bool.booleanValue();
            }
        }
        boolean z = false;
        switch (AnonymousClass1.$SwitchMap$org$talend$dataquality$semantic$model$CategoryType[dQCategory.getType().ordinal()]) {
            case 1:
                z = this.regexClassifier.validCategories(str, dQCategory, null);
                break;
            case 2:
                z = this.dataDictClassifier.validCategories(str, dQCategory, null);
                break;
            case Automaton.MINIMIZE_VALMARI /* 3 */:
                z = isCompoundValid(dQCategory, str);
                break;
        }
        lFUCache.put(str, Boolean.valueOf(z));
        return z;
    }

    private boolean isCompoundValid(DQCategory dQCategory, String str) {
        HashSet hashSet = new HashSet();
        HashSet hashSet2 = new HashSet();
        for (DQCategory dQCategory2 : dQCategory.getChildren()) {
            if (CategoryType.DICT.equals(dQCategory2.getType())) {
                hashSet2.add(dQCategory2);
            } else if (CategoryType.REGEX.equals(dQCategory2.getType())) {
                hashSet.add(dQCategory2);
            }
        }
        boolean validCategories = CollectionUtils.isEmpty(hashSet) ? false : this.regexClassifier.validCategories(str, dQCategory, hashSet);
        if (!validCategories && !CollectionUtils.isEmpty(hashSet2)) {
            validCategories = this.dataDictClassifier.validCategories(str, dQCategory, hashSet2);
        }
        return validCategories;
    }

    private void processInvalidValue(ValueQualityStatistics valueQualityStatistics, String str) {
        if (this.isStoreInvalidValues) {
            valueQualityStatistics.appendInvalidValue(str);
        }
    }

    private List<DQCategory> getChildrenCategories(String str) {
        ArrayDeque arrayDeque = new ArrayDeque();
        HashSet hashSet = new HashSet();
        ArrayList arrayList = new ArrayList();
        arrayDeque.add(str);
        while (!arrayDeque.isEmpty()) {
            String str2 = (String) arrayDeque.pop();
            DQCategory dQCategory = this.dictionarySnapshot.getMetadata().get(str2);
            if (dQCategory != null) {
                if (!CollectionUtils.isEmpty(dQCategory.getChildren())) {
                    for (DQCategory dQCategory2 : dQCategory.getChildren()) {
                        if (!hashSet.contains(dQCategory2.getId())) {
                            hashSet.add(dQCategory2.getId());
                            arrayDeque.add(dQCategory2.getId());
                        }
                    }
                } else if (!str2.equals(str)) {
                    arrayList.add(dQCategory);
                }
            }
        }
        return arrayList;
    }

    public void end() {
    }

    public List<ValueQualityStatistics> getResult() {
        return this.results;
    }

    public void close() throws Exception {
        ((DataDictFieldClassifier) this.dataDictClassifier).closeIndex();
    }
}
