/*
 * Decompiled with CFR 0.152.
 */
package org.talend.dataquality.semantic.recognizer;

import java.io.IOException;
import java.util.ArrayDeque;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import org.apache.commons.collections.CollectionUtils;
import org.talend.dataquality.record.linkage.attribute.AbstractAttributeMatcher;
import org.talend.dataquality.record.linkage.attribute.LevenshteinMatcher;
import org.talend.dataquality.record.linkage.constant.TokenizedResolutionMethod;
import org.talend.dataquality.semantic.classifier.custom.UserDefinedClassifier;
import org.talend.dataquality.semantic.classifier.impl.DataDictFieldClassifier;
import org.talend.dataquality.semantic.index.Index;
import org.talend.dataquality.semantic.model.DQCategory;
import org.talend.dataquality.semantic.model.MainCategory;
import org.talend.dataquality.semantic.recognizer.CategoryFrequency;
import org.talend.dataquality.semantic.recognizer.CategoryRecognizer;
import org.talend.dataquality.semantic.recognizer.DictionaryConstituents;
import org.talend.dataquality.semantic.recognizer.LFUCache;

public class DefaultCategoryRecognizer
implements CategoryRecognizer {
    private final List<CategoryFrequency> catList = new ArrayList<CategoryFrequency>();
    private final Map<String, CategoryFrequency> categoryToFrequency = new HashMap<String, CategoryFrequency>();
    private final DataDictFieldClassifier dataDictFieldClassifier;
    private final UserDefinedClassifier userDefineClassifier;
    private final Map<String, DQCategory> metadata;
    private final LFUCache<String, Set<String>> knownCategoryCache = new LFUCache(10, 1000, 0.01f);
    private long emptyCount = 0L;
    private long total = 0L;
    private AbstractAttributeMatcher defaultMatcher = new LevenshteinMatcher();
    private boolean fingerPrintApply = true;
    private boolean tokenizedApply = true;

    public DefaultCategoryRecognizer(DictionaryConstituents constituents) throws IOException {
        this(constituents.getSharedDataDict(), constituents.getCustomDataDict(), constituents.getKeyword(), constituents.getRegexClassifier(), constituents.getMetadata());
    }

    public DefaultCategoryRecognizer(Index sharedDictionary, Index customDictionary, Index keyword, UserDefinedClassifier regex, Map<String, DQCategory> metadata) throws IOException {
        this.userDefineClassifier = regex;
        this.userDefineClassifier.getClassifiers().removeIf(classifier -> metadata.get(classifier.getId()) != null && Boolean.TRUE.equals(((DQCategory)metadata.get(classifier.getId())).getDeleted()));
        this.metadata = metadata;
        ArrayList<String> sharedCategories = new ArrayList<String>();
        ArrayList<String> customCategories = new ArrayList<String>();
        for (DQCategory cat : metadata.values()) {
            if (cat.getDeleted().booleanValue()) continue;
            if (cat.getModified().booleanValue()) {
                customCategories.add(cat.getId());
                continue;
            }
            sharedCategories.add(cat.getId());
        }
        sharedDictionary.setCategoriesToSearch(sharedCategories);
        if (customDictionary != null) {
            customDictionary.setCategoriesToSearch(customCategories);
        }
        this.dataDictFieldClassifier = new DataDictFieldClassifier(sharedDictionary, customDictionary, keyword);
    }

    @Override
    public DataDictFieldClassifier getDataDictFieldClassifier() {
        return this.dataDictFieldClassifier;
    }

    @Override
    public UserDefinedClassifier getUserDefineClassifier() {
        return this.userDefineClassifier;
    }

    public Set<String> getSubCategorySet(String data) {
        if (data == null || "".equals(data.trim())) {
            ++this.emptyCount;
            return new HashSet<String>();
        }
        Set<String> knownCategory = this.knownCategoryCache.get(data);
        if (knownCategory != null) {
            return knownCategory;
        }
        MainCategory mainCategory = MainCategory.getMainCategory((String)data);
        HashSet<String> subCategorySet = new HashSet<String>();
        switch (mainCategory) {
            case Alpha: 
            case Numeric: 
            case AlphaNumeric: {
                subCategorySet.addAll(this.dataDictFieldClassifier.classify(data));
                if (this.userDefineClassifier != null) {
                    subCategorySet.addAll(this.userDefineClassifier.classify(data, mainCategory));
                }
                this.knownCategoryCache.put(data, subCategorySet);
                break;
            }
            case NULL: 
            case BLANK: {
                ++this.emptyCount;
            }
        }
        return subCategorySet;
    }

    @Override
    public void prepare() {
    }

    @Override
    public void reset() {
        this.catList.clear();
        this.categoryToFrequency.clear();
        this.total = 0L;
        this.emptyCount = 0L;
        this.knownCategoryCache.clear();
    }

    @Override
    public String[] process(String data) {
        Set<String> ids = this.getSubCategorySet(data);
        HashMap<String, Integer> categoryToLevel = new HashMap<String, Integer>();
        ArrayList<String> categories = new ArrayList<String>();
        if (!ids.isEmpty()) {
            DQCategory meta;
            for (String string : ids) {
                categoryToLevel.put(string, 0);
                meta = this.metadata.get(string);
                if (meta == null || CollectionUtils.isEmpty((Collection)meta.getParents())) continue;
                this.incrementAncestorsCategories(categoryToLevel, string);
            }
            for (Map.Entry entry : categoryToLevel.entrySet()) {
                meta = this.metadata.get(entry.getKey());
                if (meta == null) continue;
                categories.add(meta.getName());
                this.incrementCategory(meta.getName(), meta.getLabel(), (Integer)entry.getValue());
            }
        } else {
            this.incrementCategory("");
        }
        ++this.total;
        return categories.toArray(new String[categories.size()]);
    }

    private void incrementAncestorsCategories(Map<String, Integer> categoryToLevel, String id) {
        ArrayDeque<String> catToSee = new ArrayDeque<String>();
        catToSee.add(id);
        while (!catToSee.isEmpty()) {
            String currentCategory = (String)catToSee.pop();
            DQCategory dqCategory = this.metadata.get(currentCategory);
            Integer categoryLevel = categoryToLevel.get(currentCategory);
            if (dqCategory == null || CollectionUtils.isEmpty((Collection)dqCategory.getParents())) continue;
            for (DQCategory parent : dqCategory.getParents()) {
                String parentId = parent.getId();
                Integer level = categoryToLevel.get(parentId);
                if (level != null && level >= categoryLevel + 1) continue;
                categoryToLevel.put(parentId, categoryLevel + 1);
                catToSee.add(parentId);
            }
        }
    }

    private void incrementCategory(String categoryName) {
        this.incrementCategory(categoryName, categoryName);
    }

    private void incrementCategory(String categoryName, String categoryLabel) {
        this.incrementCategory(categoryName, categoryLabel, 0);
    }

    private void incrementCategory(String categoryName, String categoryLabel, int categoryLevel) {
        CategoryFrequency c = this.categoryToFrequency.get(categoryName);
        if (c == null) {
            c = new CategoryFrequency(categoryName, categoryLabel, categoryLevel);
            this.categoryToFrequency.put(categoryName, c);
            this.catList.add(c);
        }
        ++c.count;
    }

    @Override
    @Deprecated
    public Collection<CategoryFrequency> getResult() {
        for (CategoryFrequency category : this.categoryToFrequency.values()) {
            category.score = (float)Math.round(category.count * 10000L / this.total) / 100.0f;
        }
        Collections.sort(this.catList, Collections.reverseOrder());
        return this.catList;
    }

    @Override
    public Collection<CategoryFrequency> getResult(String columnName, float weight) {
        for (CategoryFrequency category : this.categoryToFrequency.values()) {
            if (this.tokenizedApply) {
                this.defaultMatcher.setTokenMethod(TokenizedResolutionMethod.ANYORDER);
            }
            this.defaultMatcher.setFingerPrintApply(this.fingerPrintApply);
            float scoreOnHeader = Double.valueOf(this.defaultMatcher.getMatchingWeight(columnName, category.getCategoryName())).floatValue();
            category.score = Math.min((float)Math.round(category.count * 10000L / this.total) / 100.0f + scoreOnHeader * weight * 100.0f, 100.0f);
        }
        Collections.sort(this.catList, Collections.reverseOrder());
        return this.catList;
    }

    @Override
    public void end() {
        this.dataDictFieldClassifier.closeIndex();
        this.knownCategoryCache.clear();
    }

    public void setDefaultMatcher(AbstractAttributeMatcher defaultMatcher) {
        this.defaultMatcher = defaultMatcher;
    }

    public void setFingerPrintApply(boolean fingerPrintApply) {
        this.fingerPrintApply = fingerPrintApply;
    }

    public void setTokenizedApply(boolean tokenizedApply) {
        this.tokenizedApply = tokenizedApply;
    }
}

