package org.talend.dataquality.semantic.recognizer;

import dk.brics.automaton.Automaton;
import java.util.ArrayDeque;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.Deque;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import org.apache.commons.collections.CollectionUtils;
import org.talend.dataquality.common.util.LFUCache;
import org.talend.dataquality.record.linkage.attribute.AbstractAttributeMatcher;
import org.talend.dataquality.record.linkage.attribute.LevenshteinMatcher;
import org.talend.dataquality.record.linkage.constant.TokenizedResolutionMethod;
import org.talend.dataquality.semantic.classifier.SemanticCategoryEnum;
import org.talend.dataquality.semantic.classifier.custom.UserDefinedClassifier;
import org.talend.dataquality.semantic.classifier.impl.DataDictFieldClassifier;
import org.talend.dataquality.semantic.index.Index;
import org.talend.dataquality.semantic.model.CategoryType;
import org.talend.dataquality.semantic.model.DQCategory;
import org.talend.dataquality.semantic.model.MainCategory;
import org.talend.dataquality.semantic.snapshot.DictionarySnapshot;

/* loaded from: input_file:org/talend/dataquality/semantic/recognizer/DefaultCategoryRecognizer.class */
public class DefaultCategoryRecognizer implements CategoryRecognizer {
    private final List<CategoryFrequency> catList;
    private Map<String, CategoryFrequency> categoryToFrequency;
    private final DataDictFieldClassifier dataDictFieldClassifier;
    private final UserDefinedClassifier userDefineClassifier;
    private final Map<String, DQCategory> metadata;
    private LFUCache<String, Set<String>> knownCategoryCache;
    private long total;
    private AbstractAttributeMatcher defaultMatcher;
    private boolean fingerPrintApply;
    private boolean tokenizedApply;

    /* JADX INFO: Access modifiers changed from: package-private */
    /* renamed from: org.talend.dataquality.semantic.recognizer.DefaultCategoryRecognizer$1, reason: invalid class name */
    /* loaded from: input_file:org/talend/dataquality/semantic/recognizer/DefaultCategoryRecognizer$1.class */
    public static /* synthetic */ class AnonymousClass1 {
        static final /* synthetic */ int[] $SwitchMap$org$talend$dataquality$semantic$model$MainCategory = new int[MainCategory.values().length];

        static {
            try {
                $SwitchMap$org$talend$dataquality$semantic$model$MainCategory[MainCategory.Alpha.ordinal()] = 1;
            } catch (NoSuchFieldError e) {
            }
            try {
                $SwitchMap$org$talend$dataquality$semantic$model$MainCategory[MainCategory.Numeric.ordinal()] = 2;
            } catch (NoSuchFieldError e2) {
            }
            try {
                $SwitchMap$org$talend$dataquality$semantic$model$MainCategory[MainCategory.AlphaNumeric.ordinal()] = 3;
            } catch (NoSuchFieldError e3) {
            }
            try {
                $SwitchMap$org$talend$dataquality$semantic$model$MainCategory[MainCategory.NULL.ordinal()] = 4;
            } catch (NoSuchFieldError e4) {
            }
            try {
                $SwitchMap$org$talend$dataquality$semantic$model$MainCategory[MainCategory.BLANK.ordinal()] = 5;
            } catch (NoSuchFieldError e5) {
            }
        }
    }

    public DefaultCategoryRecognizer(DictionarySnapshot dictionarySnapshot) {
        this(dictionarySnapshot.getSharedDataDict(), dictionarySnapshot.getCustomDataDict(), dictionarySnapshot.getKeyword(), dictionarySnapshot.getRegexClassifier(), dictionarySnapshot.getMetadata());
    }

    public DefaultCategoryRecognizer(Index index, Index index2, Index index3, UserDefinedClassifier userDefinedClassifier, Map<String, DQCategory> map) {
        this.catList = new ArrayList();
        this.categoryToFrequency = new HashMap();
        this.knownCategoryCache = new LFUCache<>(10, 1000, 0.01f);
        this.total = 0L;
        this.defaultMatcher = new LevenshteinMatcher();
        this.fingerPrintApply = true;
        this.tokenizedApply = true;
        HashSet hashSet = new HashSet(userDefinedClassifier.getClassifiers());
        hashSet.removeIf(iSubCategory -> {
            return map.get(iSubCategory.getId()) != null && Boolean.TRUE.equals(((DQCategory) map.get(iSubCategory.getId())).getDeleted());
        });
        this.userDefineClassifier = new UserDefinedClassifier();
        this.userDefineClassifier.setClassifiers(hashSet);
        this.metadata = map;
        ArrayList arrayList = new ArrayList();
        ArrayList arrayList2 = new ArrayList();
        for (DQCategory dQCategory : map.values()) {
            if (!dQCategory.getDeleted().booleanValue() && CategoryType.DICT.equals(dQCategory.getType())) {
                if (dQCategory.getModified().booleanValue()) {
                    arrayList2.add(dQCategory.getId());
                } else {
                    arrayList.add(dQCategory.getId());
                }
            }
        }
        index.setCategoriesToSearch(arrayList);
        if (index2 != null) {
            index2.setCategoriesToSearch(arrayList2);
        }
        this.dataDictFieldClassifier = new DataDictFieldClassifier(index, index2, index3);
    }

    @Override // org.talend.dataquality.semantic.recognizer.CategoryRecognizer
    public DataDictFieldClassifier getDataDictFieldClassifier() {
        return this.dataDictFieldClassifier;
    }

    @Override // org.talend.dataquality.semantic.recognizer.CategoryRecognizer
    public UserDefinedClassifier getUserDefineClassifier() {
        return this.userDefineClassifier;
    }

    public Set<String> getSubCategorySet(String str) {
        if (str == null || "".equals(str.trim())) {
            return new HashSet();
        }
        Set<String> set = (Set) this.knownCategoryCache.get(str);
        if (set != null) {
            return set;
        }
        MainCategory mainCategory = MainCategory.getMainCategory(str);
        HashSet hashSet = new HashSet();
        switch (AnonymousClass1.$SwitchMap$org$talend$dataquality$semantic$model$MainCategory[mainCategory.ordinal()]) {
            case 1:
            case 2:
            case Automaton.MINIMIZE_VALMARI /* 3 */:
                handleAlphaNumericCase(hashSet, str, mainCategory);
                break;
        }
        return hashSet;
    }

    private void handleAlphaNumericCase(Set<String> set, String str, MainCategory mainCategory) {
        set.addAll(this.dataDictFieldClassifier.classify(str));
        if (this.userDefineClassifier != null) {
            set.addAll(this.userDefineClassifier.classify(str, mainCategory));
        }
        this.knownCategoryCache.put(str, set);
    }

    @Override // org.talend.dataquality.semantic.recognizer.CategoryRecognizer
    public void prepare() {
        this.categoryToFrequency = new HashMap();
        this.knownCategoryCache = new LFUCache<>(10, 1000, 0.01f);
    }

    @Override // org.talend.dataquality.semantic.recognizer.CategoryRecognizer
    public void reset() {
        this.catList.clear();
        if (this.categoryToFrequency != null) {
            this.categoryToFrequency.clear();
        }
        this.total = 0L;
        if (this.knownCategoryCache != null) {
            this.knownCategoryCache.clear();
        }
        this.categoryToFrequency = null;
        this.knownCategoryCache = null;
    }

    @Override // org.talend.dataquality.semantic.recognizer.CategoryRecognizer
    public String[] process(String str) {
        Set<String> subCategorySet = getSubCategorySet(str);
        HashMap hashMap = new HashMap();
        ArrayList arrayList = new ArrayList();
        if (subCategorySet.isEmpty()) {
            incrementCategory();
        } else {
            for (String str2 : subCategorySet) {
                hashMap.put(str2, 0);
                DQCategory dQCategory = this.metadata.get(str2);
                if (dQCategory != null && !CollectionUtils.isEmpty(dQCategory.getParents())) {
                    incrementAncestorsCategories(hashMap, str2);
                }
            }
            for (Map.Entry<String, Integer> entry : hashMap.entrySet()) {
                DQCategory dQCategory2 = this.metadata.get(entry.getKey());
                if (dQCategory2 != null) {
                    arrayList.add(dQCategory2.getName());
                    incrementCategory(dQCategory2.getName(), dQCategory2.getLabel(), entry.getValue().intValue());
                }
            }
        }
        this.total++;
        return (String[]) arrayList.toArray(new String[0]);
    }

    private void incrementAncestorsCategories(Map<String, Integer> map, String str) {
        ArrayDeque arrayDeque = new ArrayDeque();
        arrayDeque.add(str);
        while (!arrayDeque.isEmpty()) {
            String pop = arrayDeque.pop();
            DQCategory dQCategory = this.metadata.get(pop);
            Integer num = map.get(pop);
            if (dQCategory != null && !CollectionUtils.isEmpty(dQCategory.getParents())) {
                updateAncestorsCategoryLevel(dQCategory.getParents(), num, map, arrayDeque);
            }
        }
    }

    private void updateAncestorsCategoryLevel(List<DQCategory> list, Integer num, Map<String, Integer> map, Deque<String> deque) {
        Iterator<DQCategory> it = list.iterator();
        while (it.hasNext()) {
            String id = it.next().getId();
            Integer num2 = map.get(id);
            if (num2 == null || num2.intValue() < num.intValue() + 1) {
                map.put(id, Integer.valueOf(num.intValue() + 1));
                deque.add(id);
            }
        }
    }

    private void incrementCategory() {
        incrementCategory("", "", 0);
    }

    private void incrementCategory(String str, String str2, int i) {
        CategoryFrequency categoryFrequency = this.categoryToFrequency.get(str);
        if (categoryFrequency == null) {
            categoryFrequency = new CategoryFrequency(str, str2, i);
            this.categoryToFrequency.put(str, categoryFrequency);
            this.catList.add(categoryFrequency);
        }
        categoryFrequency.count++;
    }

    @Override // org.talend.dataquality.semantic.recognizer.CategoryRecognizer
    @Deprecated
    public Collection<CategoryFrequency> getResult() {
        for (CategoryFrequency categoryFrequency : this.categoryToFrequency.values()) {
            categoryFrequency.score = ((float) ((categoryFrequency.count * 10000) / this.total)) / 100.0f;
        }
        this.catList.sort(Collections.reverseOrder());
        return this.catList;
    }

    @Override // org.talend.dataquality.semantic.recognizer.CategoryRecognizer
    public Collection<CategoryFrequency> getResult(String str, float f) {
        for (CategoryFrequency categoryFrequency : this.categoryToFrequency.values()) {
            categoryFrequency.score = (((float) categoryFrequency.count) * 100.0f) / ((float) this.total);
            if (this.tokenizedApply) {
                this.defaultMatcher.setTokenMethod(TokenizedResolutionMethod.ANYORDER);
            }
            this.defaultMatcher.setFingerPrintApply(this.fingerPrintApply);
            float f2 = 0.0f;
            if (str != null && !SemanticCategoryEnum.UNKNOWN.getDisplayName().equals(categoryFrequency.getCategoryName())) {
                f2 = (float) this.defaultMatcher.getMatchingWeight(str, categoryFrequency.getCategoryName());
            }
            if (f2 > 0.7d) {
                categoryFrequency.score += f2 * f * 100.0f;
            }
            categoryFrequency.score = Math.min(Math.round(categoryFrequency.score * 100.0f) / 100.0f, 100.0f);
        }
        this.catList.sort(Collections.reverseOrder());
        return this.catList;
    }

    @Override // org.talend.dataquality.semantic.recognizer.CategoryRecognizer
    public void end() {
        reset();
    }

    public void setDefaultMatcher(AbstractAttributeMatcher abstractAttributeMatcher) {
        this.defaultMatcher = abstractAttributeMatcher;
    }

    public void setFingerPrintApply(boolean z) {
        this.fingerPrintApply = z;
    }

    public void setTokenizedApply(boolean z) {
        this.tokenizedApply = z;
    }

    public long getTotal() {
        return this.total;
    }
}
