package org.talend.dataquality.semantic.classifier.impl;

import java.util.HashSet;
import java.util.Set;
import java.util.StringTokenizer;
import org.talend.dataquality.semantic.classifier.ISubCategoryClassifier;
import org.talend.dataquality.semantic.index.Index;
import org.talend.dataquality.semantic.model.DQCategory;

/* loaded from: input_file:org/talend/dataquality/semantic/classifier/impl/DataDictFieldClassifier.class */
public class DataDictFieldClassifier implements ISubCategoryClassifier {
    private static final long serialVersionUID = 6174669848299972111L;
    private Index dictionary;
    private Index keyword;
    private final int MAX_TOKEN_FOR_KEYWORDS = 3;

    public DataDictFieldClassifier(Index index, Index index2) {
        this.dictionary = index;
        this.keyword = index2;
    }

    @Override // org.talend.dataquality.semantic.classifier.ISubCategoryClassifier
    public Set<String> classify(String str) {
        int countTokens = new StringTokenizer(str, " ").countTokens();
        HashSet hashSet = new HashSet();
        if (countTokens < 3) {
            hashSet.addAll(this.dictionary.findCategories(str));
        } else {
            hashSet.addAll(this.dictionary.findCategories(str));
            hashSet.addAll(this.keyword.findCategories(str));
        }
        return hashSet;
    }

    @Override // org.talend.dataquality.semantic.classifier.ISubCategoryClassifier
    public boolean validCategories(String str, DQCategory dQCategory, Set<DQCategory> set) {
        return this.dictionary.validCategories(str, dQCategory, set);
    }

    public void closeIndex() {
        this.dictionary.closeIndex();
        this.keyword.closeIndex();
    }
}
