package org.talend.dataquality.standardization.index;

import java.io.File;
import java.io.IOException;
import java.io.StringReader;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import org.apache.commons.lang.StringUtils;
import org.apache.log4j.Logger;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.core.LowerCaseFilter;
import org.apache.lucene.analysis.miscellaneous.ASCIIFoldingFilter;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.analysis.standard.StandardFilter;
import org.apache.lucene.analysis.standard.StandardTokenizer;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.util.CharArraySet;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.MultiFields;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.FuzzyQuery;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.PhraseQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.SearcherFactory;
import org.apache.lucene.search.SearcherManager;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Bits;
import org.talend.dataquality.standardization.query.FirstNameStandardize;

/* loaded from: input_file:org/talend/dataquality/standardization/index/SynonymIndexSearcher.class */
public class SynonymIndexSearcher {
    private static final Logger LOGGER = Logger.getLogger(SynonymIndexSearcher.class);
    public static final String F_WORD = "word";
    public static final String F_SYN = "syn";
    public static final String F_WORDTERM = "wordterm";
    public static final String F_SYNTERM = "synterm";
    private SearcherManager mgr;
    private static final float WORD_TERM_BOOST = 2.0f;
    private static final float WORD_BOOST = 1.5f;
    private static final int MAX_TOKEN_COUNT_FOR_SEMANTIC_MATCH = 20;
    private Analyzer analyzer;
    private int topDocLimit = 3;
    private int maxEdits = 1;
    private SynonymSearchMode searchMode = SynonymSearchMode.MATCH_ANY;
    private float matchingThreshold = 0.0f;
    private int slop = 1;

    /* renamed from: org.talend.dataquality.standardization.index.SynonymIndexSearcher$1, reason: invalid class name */
    /* loaded from: input_file:org/talend/dataquality/standardization/index/SynonymIndexSearcher$1.class */
    static /* synthetic */ class AnonymousClass1 {
        static final /* synthetic */ int[] $SwitchMap$org$talend$dataquality$standardization$index$SynonymIndexSearcher$SynonymSearchMode = new int[SynonymSearchMode.values().length];

        static {
            try {
                $SwitchMap$org$talend$dataquality$standardization$index$SynonymIndexSearcher$SynonymSearchMode[SynonymSearchMode.MATCH_ANY.ordinal()] = 1;
            } catch (NoSuchFieldError e) {
            }
            try {
                $SwitchMap$org$talend$dataquality$standardization$index$SynonymIndexSearcher$SynonymSearchMode[SynonymSearchMode.MATCH_PARTIAL.ordinal()] = 2;
            } catch (NoSuchFieldError e2) {
            }
            try {
                $SwitchMap$org$talend$dataquality$standardization$index$SynonymIndexSearcher$SynonymSearchMode[SynonymSearchMode.MATCH_ALL.ordinal()] = 3;
            } catch (NoSuchFieldError e3) {
            }
            try {
                $SwitchMap$org$talend$dataquality$standardization$index$SynonymIndexSearcher$SynonymSearchMode[SynonymSearchMode.MATCH_EXACT.ordinal()] = 4;
            } catch (NoSuchFieldError e4) {
            }
            try {
                $SwitchMap$org$talend$dataquality$standardization$index$SynonymIndexSearcher$SynonymSearchMode[SynonymSearchMode.MATCH_ANY_FUZZY.ordinal()] = 5;
            } catch (NoSuchFieldError e5) {
            }
            try {
                $SwitchMap$org$talend$dataquality$standardization$index$SynonymIndexSearcher$SynonymSearchMode[SynonymSearchMode.MATCH_ALL_FUZZY.ordinal()] = 6;
            } catch (NoSuchFieldError e6) {
            }
            try {
                $SwitchMap$org$talend$dataquality$standardization$index$SynonymIndexSearcher$SynonymSearchMode[SynonymSearchMode.MATCH_SEMANTIC_DICTIONARY.ordinal()] = 7;
            } catch (NoSuchFieldError e7) {
            }
            try {
                $SwitchMap$org$talend$dataquality$standardization$index$SynonymIndexSearcher$SynonymSearchMode[SynonymSearchMode.MATCH_SEMANTIC_KEYWORD.ordinal()] = 8;
            } catch (NoSuchFieldError e8) {
            }
        }
    }

    /* loaded from: input_file:org/talend/dataquality/standardization/index/SynonymIndexSearcher$SynonymSearchMode.class */
    public enum SynonymSearchMode {
        MATCH_ANY("MATCH_ANY"),
        MATCH_PARTIAL("MATCH_PARTIAL"),
        MATCH_ALL("MATCH_ALL"),
        MATCH_EXACT("MATCH_EXACT"),
        MATCH_ANY_FUZZY("MATCH_ANY_FUZZY"),
        MATCH_ALL_FUZZY("MATCH_ALL_FUZZY"),
        MATCH_SEMANTIC_DICTIONARY("MATCH_SEMANTIC_DICTIONARY"),
        MATCH_SEMANTIC_KEYWORD("MATCH_SEMANTIC_KEYWORD");

        private String label;

        SynonymSearchMode(String str) {
            this.label = str;
        }

        private String getLabel() {
            return this.label;
        }

        public static SynonymSearchMode get(String str) {
            for (SynonymSearchMode synonymSearchMode : values()) {
                if (synonymSearchMode.getLabel().equalsIgnoreCase(str)) {
                    return synonymSearchMode;
                }
            }
            return MATCH_ANY;
        }
    }

    @Deprecated
    public SynonymIndexSearcher() {
    }

    public SynonymIndexSearcher(String str) {
        try {
            openIndexInFS(str);
        } catch (IOException e) {
            LOGGER.error("Unable to open synonym index.", e);
        }
    }

    /* JADX INFO: Access modifiers changed from: package-private */
    public SynonymIndexSearcher(Directory directory) throws IOException {
        this.mgr = new SearcherManager(directory, (SearcherFactory) null);
    }

    public void openIndexInFS(String str) throws IOException {
        this.mgr = new SearcherManager(FSDirectory.open(new File(str)), (SearcherFactory) null);
    }

    public TopDocs searchDocumentByWord(String str) {
        if (str == null) {
            return null;
        }
        String trim = str.trim();
        if ("".equals(trim)) {
            return null;
        }
        TopDocs topDocs = null;
        try {
            IndexSearcher indexSearcher = (IndexSearcher) this.mgr.acquire();
            topDocs = indexSearcher.search(createWordQueryFor(trim), this.topDocLimit);
            this.mgr.release(indexSearcher);
        } catch (IOException e) {
            LOGGER.error(e.getMessage(), e);
        }
        return topDocs;
    }

    public TopDocs searchDocumentBySynonym(String str) throws IOException {
        Query createCombinedQueryFor;
        switch (AnonymousClass1.$SwitchMap$org$talend$dataquality$standardization$index$SynonymIndexSearcher$SynonymSearchMode[this.searchMode.ordinal()]) {
            case FirstNameStandardize.SORT_WITH_COUNT /* 1 */:
                createCombinedQueryFor = createCombinedQueryFor(str, false, false);
                break;
            case 2:
                createCombinedQueryFor = createCombinedQueryForPartialMatch(str);
                break;
            case 3:
                createCombinedQueryFor = createCombinedQueryFor(str, false, true);
                break;
            case 4:
                createCombinedQueryFor = createCombinedQueryForExactMatch(str);
                break;
            case 5:
                createCombinedQueryFor = createCombinedQueryFor(str, true, false);
                break;
            case 6:
                createCombinedQueryFor = createCombinedQueryFor(str, true, true);
                break;
            case 7:
                createCombinedQueryFor = createQueryForSemanticDictionaryMatch(str);
                break;
            case 8:
                createCombinedQueryFor = createQueryForSemanticKeywordMatch(str);
                break;
            default:
                createCombinedQueryFor = createCombinedQueryFor(str, false, false);
                break;
        }
        IndexSearcher indexSearcher = (IndexSearcher) this.mgr.acquire();
        TopDocs search = indexSearcher.search(createCombinedQueryFor, this.topDocLimit);
        this.mgr.release(indexSearcher);
        return search;
    }

    public int getSynonymCount(String str) {
        try {
            Query createWordQueryFor = createWordQueryFor(str);
            IndexSearcher indexSearcher = (IndexSearcher) this.mgr.acquire();
            TopDocs search = indexSearcher.search(createWordQueryFor, this.topDocLimit);
            if (search.totalHits > 0) {
                return indexSearcher.doc(search.scoreDocs[0].doc).getValues(F_SYN).length;
            }
            this.mgr.release(indexSearcher);
            return -1;
        } catch (IOException e) {
            LOGGER.error(e);
            return -1;
        }
    }

    public Document getDocument(int i) {
        Document document = null;
        try {
            IndexSearcher indexSearcher = (IndexSearcher) this.mgr.acquire();
            IndexReader indexReader = indexSearcher.getIndexReader();
            Bits liveDocs = MultiFields.getLiveDocs(indexReader);
            if (liveDocs != null && !liveDocs.get(i)) {
                return null;
            }
            document = indexReader.document(i);
            this.mgr.release(indexSearcher);
            return document;
        } catch (IOException e) {
            LOGGER.error(e);
            return document;
        }
    }

    public String getWordByDocNumber(int i) {
        Document document = getDocument(i);
        if (document != null) {
            return document.getValues(F_WORD)[0];
        }
        return null;
    }

    public String[] getSynonymsByDocNumber(int i) {
        Document document = getDocument(i);
        if (document != null) {
            return document.getValues(F_SYN);
        }
        return null;
    }

    public int getNumDocs() {
        try {
            IndexSearcher indexSearcher = (IndexSearcher) this.mgr.acquire();
            int numDocs = indexSearcher.getIndexReader().numDocs();
            this.mgr.release(indexSearcher);
            return numDocs;
        } catch (IOException e) {
            LOGGER.error(e.getMessage(), e);
            return -1;
        }
    }

    public int getMaxDoc() {
        try {
            IndexSearcher indexSearcher = (IndexSearcher) this.mgr.acquire();
            int maxDoc = indexSearcher.getIndexReader().maxDoc();
            this.mgr.release(indexSearcher);
            return maxDoc;
        } catch (IOException e) {
            LOGGER.error(e.getMessage(), e);
            return -1;
        }
    }

    public int getTopDocLimit() {
        return this.topDocLimit;
    }

    public void setTopDocLimit(int i) {
        this.topDocLimit = i;
    }

    public int getSlop() {
        return this.slop;
    }

    public void setSlop(int i) {
        this.slop = i;
    }

    public void setAnalyzer(Analyzer analyzer) {
        this.analyzer = analyzer;
    }

    public Analyzer getAnalyzer() {
        if (this.analyzer == null) {
            this.analyzer = new StandardAnalyzer(CharArraySet.EMPTY_SET);
        }
        return this.analyzer;
    }

    private Query createWordQueryFor(String str) {
        return new TermQuery(new Term(F_WORDTERM, str.toLowerCase()));
    }

    private Query getTermQuery(String str, String str2, boolean z) {
        Term term = new Term(str, str2);
        return z ? new FuzzyQuery(term, this.maxEdits) : new TermQuery(term);
    }

    private Query createCombinedQueryFor(String str, boolean z, boolean z2) throws IOException {
        BooleanQuery booleanQuery = new BooleanQuery();
        Query termQuery = getTermQuery(F_WORDTERM, str.toLowerCase(), z);
        Query termQuery2 = getTermQuery(F_SYNTERM, str.toLowerCase(), z);
        List<String> tokensFromAnalyzer = getTokensFromAnalyzer(str);
        BooleanQuery booleanQuery2 = new BooleanQuery();
        BooleanQuery booleanQuery3 = new BooleanQuery();
        for (String str2 : tokensFromAnalyzer) {
            booleanQuery2.add(getTermQuery(F_WORD, str2, z), z2 ? BooleanClause.Occur.MUST : BooleanClause.Occur.SHOULD);
            booleanQuery3.add(getTermQuery(F_SYN, str2, z), z2 ? BooleanClause.Occur.MUST : BooleanClause.Occur.SHOULD);
        }
        termQuery.setBoost(WORD_TERM_BOOST);
        booleanQuery2.setBoost(WORD_BOOST);
        booleanQuery.add(termQuery, BooleanClause.Occur.SHOULD);
        booleanQuery.add(termQuery2, BooleanClause.Occur.SHOULD);
        booleanQuery.add(booleanQuery2, BooleanClause.Occur.SHOULD);
        booleanQuery.add(booleanQuery3, BooleanClause.Occur.SHOULD);
        return booleanQuery;
    }

    private Query createCombinedQueryForPartialMatch(String str) throws IOException {
        BooleanQuery booleanQuery = new BooleanQuery();
        Query termQuery = getTermQuery(F_WORDTERM, str.toLowerCase(), false);
        Query termQuery2 = getTermQuery(F_SYNTERM, str.toLowerCase(), false);
        List<String> tokensFromAnalyzer = getTokensFromAnalyzer(str);
        PhraseQuery phraseQuery = new PhraseQuery();
        phraseQuery.setSlop(this.slop);
        PhraseQuery phraseQuery2 = new PhraseQuery();
        phraseQuery2.setSlop(this.slop);
        Iterator<String> it = tokensFromAnalyzer.iterator();
        while (it.hasNext()) {
            String lowerCase = it.next().toLowerCase();
            phraseQuery.add(new Term(F_WORD, lowerCase));
            phraseQuery2.add(new Term(F_SYN, lowerCase));
        }
        termQuery.setBoost(WORD_TERM_BOOST);
        phraseQuery.setBoost(WORD_BOOST);
        booleanQuery.add(termQuery, BooleanClause.Occur.SHOULD);
        booleanQuery.add(termQuery2, BooleanClause.Occur.SHOULD);
        booleanQuery.add(phraseQuery, BooleanClause.Occur.SHOULD);
        booleanQuery.add(phraseQuery2, BooleanClause.Occur.SHOULD);
        return booleanQuery;
    }

    @Deprecated
    private Query createQueryForSemanticDictionaryMatch(String str) throws IOException {
        List<String> tokensFromAnalyzer = getTokensFromAnalyzer(str);
        return tokensFromAnalyzer.size() > MAX_TOKEN_COUNT_FOR_SEMANTIC_MATCH ? new TermQuery(new Term(F_SYNTERM, "")) : getTermQuery(F_SYNTERM, StringUtils.join(tokensFromAnalyzer, ' '), false);
    }

    @Deprecated
    private Query createQueryForSemanticKeywordMatch(String str) throws IOException {
        BooleanQuery booleanQuery = new BooleanQuery();
        List<String> tokensFromAnalyzer = getTokensFromAnalyzer(str);
        if (tokensFromAnalyzer.size() > MAX_TOKEN_COUNT_FOR_SEMANTIC_MATCH) {
            for (int i = 0; i < MAX_TOKEN_COUNT_FOR_SEMANTIC_MATCH; i++) {
                booleanQuery.add(getTermQuery(F_SYN, tokensFromAnalyzer.get(i), false), BooleanClause.Occur.SHOULD);
            }
        } else {
            Iterator<String> it = tokensFromAnalyzer.iterator();
            while (it.hasNext()) {
                booleanQuery.add(getTermQuery(F_SYN, it.next(), false), BooleanClause.Occur.SHOULD);
            }
        }
        return booleanQuery;
    }

    private Query createCombinedQueryForExactMatch(String str) throws IOException {
        BooleanQuery booleanQuery = new BooleanQuery();
        Query termQuery = getTermQuery(F_WORDTERM, str.toLowerCase(), false);
        Query termQuery2 = getTermQuery(F_SYNTERM, str.toLowerCase(), false);
        termQuery.setBoost(WORD_TERM_BOOST);
        booleanQuery.add(termQuery, BooleanClause.Occur.SHOULD);
        booleanQuery.add(termQuery2, BooleanClause.Occur.SHOULD);
        return booleanQuery;
    }

    public void close() {
        IndexSearcher indexSearcher;
        IndexReader indexReader;
        try {
            if (this.mgr != null && (indexSearcher = (IndexSearcher) this.mgr.acquire()) != null && (indexReader = indexSearcher.getIndexReader()) != null) {
                indexReader.close();
            }
        } catch (IOException e) {
            LOGGER.error(e.getMessage(), e);
        }
    }

    public SynonymSearchMode getSearchMode() {
        return this.searchMode;
    }

    public void setSearchMode(SynonymSearchMode synonymSearchMode) {
        this.searchMode = synonymSearchMode;
    }

    public void setMaxEdits(int i) {
        this.maxEdits = i;
    }

    public float getMatchingThreshold() {
        return this.matchingThreshold;
    }

    public void setMatchingThreshold(float f) {
        this.matchingThreshold = f;
    }

    public void setMatchingThreshold(double d) {
        this.matchingThreshold = (float) d;
    }

    public static List<String> getTokensFromAnalyzer(String str) throws IOException {
        StandardTokenizer standardTokenizer = new StandardTokenizer(new StringReader(str));
        ASCIIFoldingFilter aSCIIFoldingFilter = new ASCIIFoldingFilter(new LowerCaseFilter(new StandardFilter(standardTokenizer)));
        CharTermAttribute addAttribute = aSCIIFoldingFilter.addAttribute(CharTermAttribute.class);
        standardTokenizer.reset();
        ArrayList arrayList = new ArrayList();
        while (aSCIIFoldingFilter.incrementToken()) {
            arrayList.add(addAttribute.toString());
        }
        aSCIIFoldingFilter.close();
        return arrayList;
    }
}
