package org.talend.dataquality.semantic.recognizer;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import org.json.zip.JSONzip;
import org.talend.dataquality.semantic.api.CategoryRegistryManager;
import org.talend.dataquality.semantic.classifier.custom.UserDefinedClassifier;
import org.talend.dataquality.semantic.classifier.impl.DataDictFieldClassifier;
import org.talend.dataquality.semantic.index.Index;
import org.talend.dataquality.semantic.model.DQCategory;

/* JADX INFO: Access modifiers changed from: package-private */
/* loaded from: input_file:org/talend/dataquality/semantic/recognizer/DefaultCategoryRecognizer.class */
public class DefaultCategoryRecognizer implements CategoryRecognizer {
    private final List<CategoryFrequency> catList;
    private final Map<String, CategoryFrequency> categoryToFrequency;
    private final DataDictFieldClassifier dataDictFieldClassifier;
    private final UserDefinedClassifier userDefineClassifier;
    private final LFUCache<String, Set<String>> knownCategoryCache;
    private long emptyCount;
    private long total;
    private CategoryRegistryManager crm;

    /* JADX INFO: Access modifiers changed from: package-private */
    /* renamed from: org.talend.dataquality.semantic.recognizer.DefaultCategoryRecognizer$2, reason: invalid class name */
    /* loaded from: input_file:org/talend/dataquality/semantic/recognizer/DefaultCategoryRecognizer$2.class */
    public static /* synthetic */ class AnonymousClass2 {
        static final /* synthetic */ int[] $SwitchMap$org$talend$dataquality$semantic$recognizer$MainCategory = new int[MainCategory.values().length];

        static {
            try {
                $SwitchMap$org$talend$dataquality$semantic$recognizer$MainCategory[MainCategory.Alpha.ordinal()] = 1;
            } catch (NoSuchFieldError e) {
            }
            try {
                $SwitchMap$org$talend$dataquality$semantic$recognizer$MainCategory[MainCategory.Numeric.ordinal()] = 2;
            } catch (NoSuchFieldError e2) {
            }
            try {
                $SwitchMap$org$talend$dataquality$semantic$recognizer$MainCategory[MainCategory.AlphaNumeric.ordinal()] = 3;
            } catch (NoSuchFieldError e3) {
            }
            try {
                $SwitchMap$org$talend$dataquality$semantic$recognizer$MainCategory[MainCategory.NULL.ordinal()] = 4;
            } catch (NoSuchFieldError e4) {
            }
            try {
                $SwitchMap$org$talend$dataquality$semantic$recognizer$MainCategory[MainCategory.BLANK.ordinal()] = 5;
            } catch (NoSuchFieldError e5) {
            }
        }
    }

    public DefaultCategoryRecognizer(Index index, Index index2) throws IOException {
        this(index, index2, CategoryRegistryManager.getInstance().getRegexClassifier());
    }

    public DefaultCategoryRecognizer(Index index, Index index2, UserDefinedClassifier userDefinedClassifier) throws IOException {
        this.catList = new ArrayList();
        this.categoryToFrequency = new HashMap();
        this.knownCategoryCache = new LFUCache<>(10, 1000, 0.01f);
        this.emptyCount = 0L;
        this.total = 0L;
        this.dataDictFieldClassifier = new DataDictFieldClassifier(index, index2);
        this.userDefineClassifier = userDefinedClassifier;
        this.crm = CategoryRegistryManager.getInstance();
    }

    @Override // org.talend.dataquality.semantic.recognizer.CategoryRecognizer
    public DataDictFieldClassifier getDataDictFieldClassifier() {
        return this.dataDictFieldClassifier;
    }

    @Override // org.talend.dataquality.semantic.recognizer.CategoryRecognizer
    public UserDefinedClassifier getUserDefineClassifier() {
        return this.userDefineClassifier;
    }

    public Set<String> getSubCategorySet(String str) {
        if (str == null || "".equals(str.trim())) {
            this.emptyCount++;
            return new HashSet();
        }
        Set<String> set = this.knownCategoryCache.get(str);
        if (set != null) {
            return set;
        }
        MainCategory mainCategory = MainCategory.getMainCategory(str);
        HashSet hashSet = new HashSet();
        switch (AnonymousClass2.$SwitchMap$org$talend$dataquality$semantic$recognizer$MainCategory[mainCategory.ordinal()]) {
            case JSONzip.zipEmptyArray /* 1 */:
            case JSONzip.zipTrue /* 2 */:
            case 3:
                hashSet.addAll(this.dataDictFieldClassifier.classify(str));
                if (this.userDefineClassifier != null) {
                    hashSet.addAll(this.userDefineClassifier.classify(str, mainCategory));
                }
                this.knownCategoryCache.put(str, hashSet);
                break;
            case JSONzip.zipNull /* 4 */:
            case JSONzip.zipObject /* 5 */:
                this.emptyCount++;
                break;
        }
        return hashSet;
    }

    @Override // org.talend.dataquality.semantic.recognizer.CategoryRecognizer
    public void prepare() {
    }

    @Override // org.talend.dataquality.semantic.recognizer.CategoryRecognizer
    public void reset() {
        this.catList.clear();
        this.categoryToFrequency.clear();
        this.total = 0L;
        this.emptyCount = 0L;
        this.knownCategoryCache.clear();
    }

    @Override // org.talend.dataquality.semantic.recognizer.CategoryRecognizer
    public String[] process(String str) {
        Set<String> subCategorySet = getSubCategorySet(str);
        if (subCategorySet.size() > 0) {
            for (String str2 : subCategorySet) {
                DQCategory categoryMetadataByName = this.crm.getCategoryMetadataByName(str2);
                incrementCategory(str2, categoryMetadataByName == null ? str2 : categoryMetadataByName.getLabel());
            }
        } else {
            incrementCategory("");
        }
        this.total++;
        return (String[]) subCategorySet.toArray(new String[subCategorySet.size()]);
    }

    private void incrementCategory(String str) {
        incrementCategory(str, str);
    }

    private void incrementCategory(String str, String str2) {
        CategoryFrequency categoryFrequency = this.categoryToFrequency.get(str);
        if (categoryFrequency == null) {
            categoryFrequency = new CategoryFrequency(str, str2);
            this.categoryToFrequency.put(str, categoryFrequency);
            this.catList.add(categoryFrequency);
        }
        categoryFrequency.count++;
    }

    @Override // org.talend.dataquality.semantic.recognizer.CategoryRecognizer
    public Collection<CategoryFrequency> getResult() {
        Iterator<CategoryFrequency> it = this.categoryToFrequency.values().iterator();
        while (it.hasNext()) {
            it.next().frequency = Math.round((float) ((r0.count * 10000) / this.total)) / 100.0f;
        }
        Collections.sort(this.catList, new Comparator<CategoryFrequency>() { // from class: org.talend.dataquality.semantic.recognizer.DefaultCategoryRecognizer.1
            @Override // java.util.Comparator
            public int compare(CategoryFrequency categoryFrequency, CategoryFrequency categoryFrequency2) {
                if ("".equals(categoryFrequency.categoryName)) {
                    return 1;
                }
                if ("".equals(categoryFrequency2.categoryName)) {
                    return -1;
                }
                return (int) (categoryFrequency2.count - categoryFrequency.count);
            }
        });
        return this.catList;
    }

    @Override // org.talend.dataquality.semantic.recognizer.CategoryRecognizer
    public void end() {
        this.dataDictFieldClassifier.closeIndex();
        this.knownCategoryCache.clear();
    }
}
