package org.talend.dataquality.semantic.index;

import java.io.IOException;
import java.net.URI;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Set;
import org.apache.lucene.index.IndexableField;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.store.Directory;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.talend.dataquality.record.linkage.attribute.LevenshteinMatcher;
import org.talend.dataquality.record.linkage.constant.TokenizedResolutionMethod;
import org.talend.dataquality.semantic.model.DQCategory;

/* loaded from: input_file:org/talend/dataquality/semantic/index/LuceneIndex.class */
public class LuceneIndex implements Index {
    private static final Logger LOGGER = LoggerFactory.getLogger(LuceneIndex.class);
    private final DictionarySearcher searcher;
    private final LevenshteinMatcher levenshtein;

    public DictionarySearcher getSearcher() {
        return this.searcher;
    }

    public LuceneIndex(URI uri, DictionarySearchMode dictionarySearchMode) {
        this(new DictionarySearcher(uri), dictionarySearchMode);
    }

    public LuceneIndex(Directory directory, DictionarySearchMode dictionarySearchMode) {
        this(new DictionarySearcher(directory), dictionarySearchMode);
    }

    private LuceneIndex(DictionarySearcher dictionarySearcher, DictionarySearchMode dictionarySearchMode) {
        this.levenshtein = new LevenshteinMatcher();
        this.searcher = dictionarySearcher;
        this.searcher.setTopDocLimit(20);
        this.searcher.setSearchMode(dictionarySearchMode);
        this.searcher.setMaxEdits(2);
        this.levenshtein.setTokenMethod(TokenizedResolutionMethod.NO);
    }

    @Override // org.talend.dataquality.semantic.index.Index
    public void setCategoriesToSearch(List<String> list) {
        this.searcher.setCategoriesToSearch(list);
    }

    @Override // org.talend.dataquality.semantic.index.Index
    public void initIndex() {
        this.searcher.maybeRefreshIndex();
    }

    @Override // org.talend.dataquality.semantic.index.Index
    public void closeIndex() {
        this.searcher.close();
    }

    @Override // org.talend.dataquality.semantic.index.Index
    public Set<String> findCategories(String str) {
        HashSet hashSet = new HashSet();
        try {
            for (ScoreDoc scoreDoc : this.searcher.searchDocumentBySynonym(str).scoreDocs) {
                hashSet.add(this.searcher.getDocument(scoreDoc.doc).getField(AbstractDictionarySearcher.F_CATID).stringValue());
            }
        } catch (IOException e) {
            LOGGER.error(e.getMessage(), e);
        }
        return hashSet;
    }

    @Override // org.talend.dataquality.semantic.index.Index
    public boolean validCategories(String str, DQCategory dQCategory, Set<DQCategory> set) {
        Boolean bool = false;
        try {
            bool = Boolean.valueOf(this.searcher.validDocumentWithCategories(str, dQCategory, set));
        } catch (IOException e) {
            LOGGER.error(e.getMessage(), e);
        }
        return bool.booleanValue();
    }

    private static Map<String, Double> sortMapByValue(Map<String, Double> map) {
        LinkedList<Map.Entry> linkedList = new LinkedList(map.entrySet());
        Collections.sort(linkedList, new Comparator<Map.Entry<String, Double>>() { // from class: org.talend.dataquality.semantic.index.LuceneIndex.1
            @Override // java.util.Comparator
            public int compare(Map.Entry<String, Double> entry, Map.Entry<String, Double> entry2) {
                return entry2.getValue().compareTo(entry.getValue());
            }
        });
        LinkedHashMap linkedHashMap = new LinkedHashMap();
        for (Map.Entry entry : linkedList) {
            linkedHashMap.put(entry.getKey(), entry.getValue());
        }
        return linkedHashMap;
    }

    public Map<String, Double> findSimilarFieldsInCategory(String str, String str2, Double d) {
        HashMap hashMap = new HashMap();
        try {
            for (ScoreDoc scoreDoc : this.searcher.findSimilarValuesInCategory(str, str2).scoreDocs) {
                for (IndexableField indexableField : this.searcher.getDocument(scoreDoc.doc).getFields(AbstractDictionarySearcher.F_RAW)) {
                    String stringValue = indexableField.stringValue();
                    if (!hashMap.containsKey(stringValue)) {
                        double calculateOverallSimilarity = calculateOverallSimilarity(str, stringValue);
                        if (calculateOverallSimilarity >= d.doubleValue()) {
                            hashMap.put(stringValue, Double.valueOf(calculateOverallSimilarity));
                        }
                    }
                }
            }
        } catch (IOException e) {
            LOGGER.error(e.getMessage());
        }
        return sortMapByValue(hashMap);
    }

    public String findMostSimilarFieldInCategory(String str, String str2, double d) {
        if (str == null) {
            return null;
        }
        if (str2 == null) {
            return str;
        }
        String str3 = str;
        double d2 = 0.0d;
        try {
            for (ScoreDoc scoreDoc : this.searcher.findSimilarValuesInCategory(str, str2).scoreDocs) {
                for (IndexableField indexableField : this.searcher.getDocument(scoreDoc.doc).getFields(AbstractDictionarySearcher.F_RAW)) {
                    String stringValue = indexableField.stringValue();
                    double calculateOverallSimilarity = calculateOverallSimilarity(str, stringValue);
                    if (calculateOverallSimilarity >= d && calculateOverallSimilarity > d2) {
                        d2 = calculateOverallSimilarity;
                        str3 = stringValue;
                    }
                }
            }
        } catch (IOException e) {
            LOGGER.error(e.getMessage(), e);
        }
        return str3;
    }

    private double calculateOverallSimilarity(String str, String str2) throws IOException {
        List<String> tokensFromAnalyzer = DictionarySearcher.getTokensFromAnalyzer(str);
        List<String> tokensFromAnalyzer2 = DictionarySearcher.getTokensFromAnalyzer(str2);
        double d = 0.0d;
        for (String str3 : tokensFromAnalyzer) {
            Iterator<String> it = tokensFromAnalyzer2.iterator();
            while (it.hasNext()) {
                double matchingWeight = this.levenshtein.getMatchingWeight(str3, it.next());
                if (matchingWeight > d) {
                    d = matchingWeight;
                }
            }
        }
        return (d + this.levenshtein.getMatchingWeight(str.toLowerCase(), str2.toLowerCase())) / 2.0d;
    }
}
