package org.talend.dataquality.semantic.index;

import java.io.File;
import java.io.IOException;
import java.net.URI;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import org.apache.commons.collections.CollectionUtils;
import org.apache.commons.lang3.StringUtils;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term;
import org.apache.lucene.queries.TermsFilter;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.CachingWrapperFilter;
import org.apache.lucene.search.FieldCacheTermsFilter;
import org.apache.lucene.search.Filter;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.SearcherFactory;
import org.apache.lucene.search.SearcherManager;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.talend.dataquality.semantic.model.DQCategory;
import org.talend.dataquality.semantic.model.ValidationMode;

/* loaded from: input_file:org/talend/dataquality/semantic/index/DictionarySearcher.class */
public class DictionarySearcher extends AbstractDictionarySearcher {
    public static final String UNABLE_TO_OPEN_INDEX = "Unable to open synonym index.";
    private static final Logger LOGGER = LoggerFactory.getLogger(DictionarySearcher.class);
    private SearcherManager mgr;
    private Map<String, CachingWrapperFilter> categoryToCache = new HashMap();
    private CachingWrapperFilter cachingWrapperFilterForDiscovery;

    public DictionarySearcher(String str) {
        try {
            this.mgr = new SearcherManager(FSDirectory.open(new File(str)), (SearcherFactory) null);
        } catch (IOException e) {
            LOGGER.error(UNABLE_TO_OPEN_INDEX, e);
        }
    }

    public DictionarySearcher(URI uri) {
        try {
            this.mgr = new SearcherManager(ClassPathDirectory.open(uri), (SearcherFactory) null);
        } catch (IOException e) {
            LOGGER.error(UNABLE_TO_OPEN_INDEX, e);
        }
    }

    public DictionarySearcher(Directory directory) {
        try {
            this.mgr = new SearcherManager(directory, (SearcherFactory) null);
        } catch (IOException e) {
            LOGGER.error(UNABLE_TO_OPEN_INDEX, e);
        }
    }

    public void setCategoriesToSearch(List<String> list) {
        this.cachingWrapperFilterForDiscovery = new CachingWrapperFilter(new FieldCacheTermsFilter(AbstractDictionarySearcher.F_CATID, (String[]) list.toArray(new String[list.size()])));
    }

    @Override // org.talend.dataquality.semantic.index.AbstractDictionarySearcher
    public TopDocs searchDocumentBySynonym(String str) throws IOException {
        Query createQueryForSemanticDictionaryMatch;
        switch (this.searchMode) {
            case MATCH_SEMANTIC_KEYWORD:
                createQueryForSemanticDictionaryMatch = createQueryForSemanticKeywordMatch(str);
                break;
            case MATCH_SEMANTIC_DICTIONARY:
            default:
                createQueryForSemanticDictionaryMatch = createQueryForSemanticDictionaryMatch(str);
                break;
        }
        IndexSearcher indexSearcher = (IndexSearcher) this.mgr.acquire();
        TopDocs search = indexSearcher.search(createQueryForSemanticDictionaryMatch, this.cachingWrapperFilterForDiscovery, this.topDocLimit);
        this.mgr.release(indexSearcher);
        return search;
    }

    @Override // org.talend.dataquality.semantic.index.AbstractDictionarySearcher
    public Document getDocument(int i) {
        Document document = null;
        try {
            IndexSearcher indexSearcher = (IndexSearcher) this.mgr.acquire();
            document = indexSearcher.doc(i);
            this.mgr.release(indexSearcher);
        } catch (IOException e) {
            LOGGER.error(e.getMessage());
        }
        return document;
    }

    public boolean validDocumentWithCategories(String str, DQCategory dQCategory, Set<DQCategory> set) throws IOException {
        Query createQueryForSemanticDictionaryMatch;
        switch (this.searchMode) {
            case MATCH_SEMANTIC_KEYWORD:
                createQueryForSemanticDictionaryMatch = createQueryForSemanticKeywordMatch(str);
                break;
            case MATCH_SEMANTIC_DICTIONARY:
            default:
                createQueryForSemanticDictionaryMatch = createQueryForSemanticDictionaryMatch(str);
                break;
        }
        IndexSearcher indexSearcher = (IndexSearcher) this.mgr.acquire();
        CachingWrapperFilter cachingWrapperFilter = this.categoryToCache.get(dQCategory.getId());
        boolean z = !CollectionUtils.isEmpty(set);
        if (cachingWrapperFilter == null) {
            if (z) {
                HashSet hashSet = new HashSet();
                Iterator<DQCategory> it = set.iterator();
                while (it.hasNext()) {
                    hashSet.add(it.next().getId());
                }
                cachingWrapperFilter = new CachingWrapperFilter(new FieldCacheTermsFilter(AbstractDictionarySearcher.F_CATID, (String[]) hashSet.toArray(new String[hashSet.size()])));
            } else {
                cachingWrapperFilter = new CachingWrapperFilter(new FieldCacheTermsFilter(AbstractDictionarySearcher.F_CATID, new String[]{dQCategory.getId()}));
            }
            this.categoryToCache.put(dQCategory.getId(), cachingWrapperFilter);
        }
        TopDocs search = indexSearcher.search(createQueryForSemanticDictionaryMatch, cachingWrapperFilter, this.topDocLimit);
        ValidationMode validationMode = ValidationMode.EXACT;
        if (!z && dQCategory.getValidationMode() != null) {
            validationMode = dQCategory.getValidationMode();
            if (ValidationMode.SIMPLIFIED.equals(validationMode)) {
                this.mgr.release(indexSearcher);
                return search.totalHits != 0;
            }
        }
        boolean z2 = false;
        for (int i = 0; i < search.scoreDocs.length && !z2; i++) {
            Document doc = indexSearcher.doc(search.scoreDocs[i].doc);
            if (z) {
                validationMode = getChildrenValidationMode(set, doc);
            }
            z2 = validDocumentByValidationMode(doc, str, validationMode);
        }
        this.mgr.release(indexSearcher);
        return z2;
    }

    private ValidationMode getChildrenValidationMode(Set<DQCategory> set, Document document) {
        for (DQCategory dQCategory : set) {
            if (dQCategory.getId().equals(document.getField(AbstractDictionarySearcher.F_CATID).stringValue())) {
                return dQCategory.getValidationMode() != null ? dQCategory.getValidationMode() : ValidationMode.EXACT;
            }
        }
        LOGGER.error("The document does not belong to any children category");
        return ValidationMode.EXACT;
    }

    private boolean validDocumentByValidationMode(Document document, String str, ValidationMode validationMode) throws IOException {
        if (ValidationMode.SIMPLIFIED.equals(validationMode)) {
            return true;
        }
        String transformSringByValidationMode = transformSringByValidationMode(str, validationMode);
        if (StringUtils.isEmpty(transformSringByValidationMode)) {
            return false;
        }
        for (String str2 : document.getValues(AbstractDictionarySearcher.F_RAW)) {
            if (transformSringByValidationMode.equals(transformSringByValidationMode(str2, validationMode))) {
                return true;
            }
        }
        return false;
    }

    private String transformSringByValidationMode(String str, ValidationMode validationMode) {
        return ValidationMode.EXACT_IGNORE_CASE_AND_ACCENT.equals(validationMode) ? StringUtils.stripAccents(str.toLowerCase()) : str;
    }

    protected Filter createFilterForSemanticTypes(Set<String> set) {
        ArrayList arrayList = new ArrayList();
        Iterator<String> it = set.iterator();
        while (it.hasNext()) {
            arrayList.add(new Term(AbstractDictionarySearcher.F_WORD, it.next()));
        }
        return new TermsFilter(arrayList);
    }

    public String getWordByDocNumber(int i) {
        Document document = getDocument(i);
        if (document != null) {
            return document.getValues(AbstractDictionarySearcher.F_WORD)[0];
        }
        return null;
    }

    public String[] getSynonymsByDocNumber(int i) {
        Document document = getDocument(i);
        if (document != null) {
            return document.getValues(AbstractDictionarySearcher.F_RAW);
        }
        return null;
    }

    public int getNumDocs() {
        try {
            IndexSearcher indexSearcher = (IndexSearcher) this.mgr.acquire();
            int numDocs = indexSearcher.getIndexReader().numDocs();
            this.mgr.release(indexSearcher);
            return numDocs;
        } catch (IOException e) {
            LOGGER.error(e.getMessage(), e);
            return -1;
        }
    }

    public void close() {
        try {
            ((IndexSearcher) this.mgr.acquire()).getIndexReader().close();
        } catch (IOException e) {
            LOGGER.error(e.getMessage(), e);
        }
    }

    public void maybeRefreshIndex() {
        try {
            this.mgr.maybeRefresh();
        } catch (IOException e) {
            LOGGER.error(e.getMessage(), e);
        }
    }

    public TopDocs findSimilarValuesInCategory(String str, String str2) throws IOException {
        BooleanQuery booleanQuery = new BooleanQuery();
        if (str2 != null && !"".equals(str2)) {
            booleanQuery.add(new TermQuery(new Term(AbstractDictionarySearcher.F_WORD, str2)), BooleanClause.Occur.MUST);
        }
        BooleanQuery booleanQuery2 = new BooleanQuery();
        List<String> tokensFromAnalyzer = getTokensFromAnalyzer(str);
        booleanQuery2.add(getTermQuery(AbstractDictionarySearcher.F_SYNTERM, StringUtils.join(tokensFromAnalyzer, ' '), true), BooleanClause.Occur.SHOULD);
        BooleanQuery booleanQuery3 = new BooleanQuery();
        Iterator<String> it = tokensFromAnalyzer.iterator();
        while (it.hasNext()) {
            booleanQuery3.add(getTermQuery(AbstractDictionarySearcher.F_SYNTERM, it.next(), true), BooleanClause.Occur.SHOULD);
        }
        booleanQuery2.add(booleanQuery3, BooleanClause.Occur.SHOULD);
        booleanQuery.add(booleanQuery2, BooleanClause.Occur.MUST);
        IndexSearcher indexSearcher = (IndexSearcher) this.mgr.acquire();
        TopDocs search = indexSearcher.search(booleanQuery, 50);
        this.mgr.release(indexSearcher);
        return search;
    }

    public List<Document> listDocumentsByCategoryId(String str, int i, int i2) {
        TopDocs searchAfter;
        try {
            IndexSearcher indexSearcher = (IndexSearcher) this.mgr.acquire();
            IndexReader indexReader = indexSearcher.getIndexReader();
            TermQuery termQuery = new TermQuery(new Term(AbstractDictionarySearcher.F_CATID, str));
            if (i <= 0) {
                searchAfter = indexSearcher.search(termQuery, i2);
            } else {
                TopDocs search = indexSearcher.search(termQuery, i + i2);
                searchAfter = indexSearcher.searchAfter(search.scoreDocs[Math.min(search.totalHits, i) - 1], new TermQuery(new Term(AbstractDictionarySearcher.F_CATID, str)), i2);
            }
            ArrayList arrayList = new ArrayList();
            for (ScoreDoc scoreDoc : searchAfter.scoreDocs) {
                arrayList.add(indexReader.document(scoreDoc.doc));
            }
            this.mgr.release(indexSearcher);
            return arrayList;
        } catch (IOException e) {
            LOGGER.error(e.getMessage(), e);
            return Collections.emptyList();
        }
    }
}
