package org.talend.dataquality.semantic.index;

import java.io.IOException;
import java.io.StringReader;
import java.util.ArrayList;
import java.util.List;
import org.apache.commons.lang3.StringUtils;
import org.apache.lucene.analysis.core.LowerCaseFilter;
import org.apache.lucene.analysis.miscellaneous.ASCIIFoldingFilter;
import org.apache.lucene.analysis.standard.StandardFilter;
import org.apache.lucene.analysis.standard.StandardTokenizer;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.FuzzyQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TopDocs;

/* loaded from: input_file:org/talend/dataquality/semantic/index/AbstractDictionarySearcher.class */
public abstract class AbstractDictionarySearcher {
    public static final String F_ID = "docid";
    public static final String F_WORD = "word";
    public static final String F_SYN = "syn";
    public static final String F_SYNTERM = "synterm";
    public static final String F_RAW = "raw";
    public static final String F_CATID = "catid";
    private static final int MAX_TOKEN_COUNT_FOR_KEYWORD_MATCH = 20;
    private static final int MAX_CHAR_COUNT_FOR_DICTIONARY_MATCH = 100;
    protected int topDocLimit = 3;
    private int maxEdits = 2;
    protected DictionarySearchMode searchMode = DictionarySearchMode.MATCH_SEMANTIC_DICTIONARY;

    public abstract TopDocs searchDocumentBySynonym(String str) throws IOException;

    public abstract Document getDocument(int i);

    public void setTopDocLimit(int i) {
        this.topDocLimit = i;
    }

    public void setMaxEdits(int i) {
        this.maxEdits = i;
    }

    public DictionarySearchMode getSearchMode() {
        return this.searchMode;
    }

    public void setSearchMode(DictionarySearchMode dictionarySearchMode) {
        this.searchMode = dictionarySearchMode;
    }

    private Query getTermQuery(String str, String str2, boolean z) {
        Term term = new Term(str, str2);
        return z ? new FuzzyQuery(term, this.maxEdits) : new TermQuery(term);
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public Query createQueryForSemanticDictionaryMatch(String str) throws IOException {
        return str.length() > MAX_CHAR_COUNT_FOR_DICTIONARY_MATCH ? new TermQuery(new Term(F_SYNTERM, "")) : getTermQuery(F_SYNTERM, StringUtils.join(getTokensFromAnalyzer(str), ' '), false);
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public Query createQueryForSemanticKeywordMatch(String str) throws IOException {
        BooleanQuery booleanQuery = new BooleanQuery();
        List<String> tokensFromAnalyzer = getTokensFromAnalyzer(str);
        for (int i = 0; i < Math.min(tokensFromAnalyzer.size(), MAX_TOKEN_COUNT_FOR_KEYWORD_MATCH); i++) {
            booleanQuery.add(getTermQuery(F_SYNTERM, tokensFromAnalyzer.get(i), false), BooleanClause.Occur.SHOULD);
        }
        return booleanQuery;
    }

    public static String getJointTokens(String str) {
        return StringUtils.join(getTokensFromAnalyzer(str), ' ');
    }

    public static List<String> getTokensFromAnalyzer(String str) {
        StandardTokenizer standardTokenizer = new StandardTokenizer(new StringReader(str));
        ASCIIFoldingFilter aSCIIFoldingFilter = new ASCIIFoldingFilter(new LowerCaseFilter(new StandardFilter(standardTokenizer)));
        CharTermAttribute addAttribute = aSCIIFoldingFilter.addAttribute(CharTermAttribute.class);
        ArrayList arrayList = new ArrayList();
        try {
            standardTokenizer.reset();
            while (aSCIIFoldingFilter.incrementToken()) {
                arrayList.add(addAttribute.toString());
            }
            aSCIIFoldingFilter.close();
        } catch (IOException e) {
        }
        if (arrayList.size() == 1) {
            arrayList.clear();
            arrayList.add(StringUtils.stripAccents(str.toLowerCase()));
        }
        return arrayList;
    }
}
