/*
 * Decompiled with CFR 0.152.
 */
package org.talend.dataquality.semantic.sampling;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.talend.dataquality.semantic.api.CategoryRegistryManager;
import org.talend.dataquality.semantic.exception.DQSemanticRuntimeException;
import org.talend.dataquality.semantic.index.DictionarySearchMode;
import org.talend.dataquality.semantic.index.LuceneIndex;
import org.talend.dataquality.semantic.recognizer.CategoryFrequency;
import org.talend.dataquality.semantic.recognizer.CategoryRecognizer;
import org.talend.dataquality.semantic.recognizer.DefaultCategoryRecognizer;
import org.talend.dataquality.semantic.sampling.SemanticCategory;
import org.talend.dataquality.semantic.snapshot.DictionarySnapshot;

public class CategoryInferenceManager {
    private Map<Integer, CategoryRecognizer> categoryRecognizers = new HashMap<Integer, CategoryRecognizer>();
    private DictionarySnapshot dictionarySnapshot;

    public CategoryInferenceManager() {
        CategoryRegistryManager crm = CategoryRegistryManager.getInstance();
        this.dictionarySnapshot = new DictionarySnapshot(crm.getSharedCategoryMetadata(), new LuceneIndex(crm.getSharedDataDictDirectory(), DictionarySearchMode.MATCH_SEMANTIC_DICTIONARY), null, new LuceneIndex(crm.getSharedKeywordDirectory(), DictionarySearchMode.MATCH_SEMANTIC_KEYWORD), crm.getRegexClassifier());
    }

    public Map<Integer, List<SemanticCategory>> getSemanticCategory() {
        HashMap<Integer, List<SemanticCategory>> categories = new HashMap<Integer, List<SemanticCategory>>();
        for (Map.Entry<Integer, CategoryRecognizer> entry : this.categoryRecognizers.entrySet()) {
            Integer colIdx = entry.getKey();
            CategoryRecognizer categoryRecognizer = entry.getValue();
            ArrayList<SemanticCategory> categoryList = new ArrayList<SemanticCategory>();
            Collection<CategoryFrequency> result = categoryRecognizer.getResult();
            for (CategoryFrequency frequencyTableItem : result) {
                SemanticCategory category = new SemanticCategory(frequencyTableItem.getCategoryId(), frequencyTableItem.getCategoryName(), frequencyTableItem.getCount(), frequencyTableItem.getFrequency());
                categoryList.add(category);
            }
            categories.put(colIdx, categoryList);
        }
        return categories;
    }

    public boolean inferCategory(Object[] record) {
        int colIdx = 0;
        for (Object fieldData : record) {
            CategoryRecognizer categoryRecognizer = this.categoryRecognizers.get(colIdx);
            if (categoryRecognizer == null) {
                categoryRecognizer = this.newCategoryRecognizer();
                categoryRecognizer.prepare();
                this.categoryRecognizers.put(colIdx, categoryRecognizer);
            }
            categoryRecognizer.process(fieldData == null ? null : fieldData.toString());
            ++colIdx;
        }
        return true;
    }

    private CategoryRecognizer newCategoryRecognizer() {
        try {
            return new DefaultCategoryRecognizer(this.dictionarySnapshot);
        }
        catch (IOException e) {
            throw new DQSemanticRuntimeException("Unable to find resources.", e);
        }
    }
}

