/*
 * Decompiled with CFR 0.152.
 */
package org.talend.dataquality.semantic.index;

import java.io.IOException;
import java.net.URI;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.LinkedHashMap;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Set;
import org.apache.commons.collections.CollectionUtils;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.IndexableField;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.talend.dataquality.record.linkage.attribute.LevenshteinMatcher;
import org.talend.dataquality.record.linkage.constant.TokenizedResolutionMethod;
import org.talend.dataquality.semantic.index.DictionarySearchMode;
import org.talend.dataquality.semantic.index.DictionarySearcher;
import org.talend.dataquality.semantic.index.Index;
import org.talend.dataquality.semantic.model.DQCategory;

public class LuceneIndex
implements Index,
AutoCloseable {
    private static final long serialVersionUID = -5204817779377650373L;
    private static final Logger LOGGER = LoggerFactory.getLogger(LuceneIndex.class);
    private final DictionarySearcher searcher;
    private final LevenshteinMatcher levenshtein = new LevenshteinMatcher();

    @Override
    public DictionarySearcher getSearcher() {
        return this.searcher;
    }

    public LuceneIndex(URI indexPath, DictionarySearchMode searchMode) {
        this(new DictionarySearcher(indexPath), searchMode);
    }

    public LuceneIndex(Directory directory, DictionarySearchMode searchMode) {
        this(new DictionarySearcher(directory), searchMode);
    }

    private LuceneIndex(DictionarySearcher searcher, DictionarySearchMode searchMode) {
        this.searcher = searcher;
        this.searcher.setTopDocLimit(20);
        this.searcher.setSearchMode(searchMode);
        this.searcher.setMaxEdits(2);
        this.levenshtein.setTokenMethod(TokenizedResolutionMethod.NO);
    }

    @Override
    public void setCategoriesToSearch(List<String> categoryIds) {
        this.searcher.setCategoriesToSearch(categoryIds);
    }

    @Override
    public void initIndex() {
        this.searcher.maybeRefreshIndex();
    }

    @Override
    public void closeIndex() {
        this.searcher.close();
    }

    @Override
    public void close() {
        this.closeIndex();
    }

    @Override
    public Set<String> findCategories(String data) {
        HashSet<String> foundCategorySet = new HashSet<String>();
        try {
            TopDocs docs = this.searcher.searchDocumentBySynonym(data);
            for (ScoreDoc scoreDoc : docs.scoreDocs) {
                Document document = this.searcher.getDocument(scoreDoc.doc);
                foundCategorySet.add(document.getField("catid").stringValue());
            }
        }
        catch (IOException e) {
            LOGGER.error(e.getMessage(), (Throwable)e);
        }
        return foundCategorySet;
    }

    @Override
    public boolean validCategories(String data, DQCategory semanticType, Set<DQCategory> children) {
        boolean validCategory = false;
        try {
            validCategory = this.searcher.validDocumentWithCategories(data, semanticType, children);
        }
        catch (IOException e) {
            LOGGER.error(e.getMessage(), (Throwable)e);
        }
        return validCategory;
    }

    private static Map<String, Double> sortMapByValue(Map<String, Double> unsortedMap) {
        LinkedList<Map.Entry<String, Double>> list = new LinkedList<Map.Entry<String, Double>>(unsortedMap.entrySet());
        Collections.sort(list, (o1, o2) -> ((Double)o2.getValue()).compareTo((Double)o1.getValue()));
        LinkedHashMap<String, Double> sortedMap = new LinkedHashMap<String, Double>();
        for (Map.Entry entry : list) {
            sortedMap.put((String)entry.getKey(), (Double)entry.getValue());
        }
        return sortedMap;
    }

    public Map<String, Double> findSimilarFieldsInCategory(String input, String category, Double similarity) {
        return this.findSimilarFieldsInCategories(input, Collections.singletonList(category), similarity);
    }

    @Override
    public Map<String, Double> findSimilarFieldsInCategories(String input, List<String> categories, double similarity) {
        HashMap<String, Double> similarFieldMap = new HashMap<String, Double>();
        if (input == null) {
            return similarFieldMap;
        }
        for (String category : categories) {
            try {
                TopDocs docs = this.searcher.findSimilarValuesInCategory(input, category);
                for (ScoreDoc scoreDoc : docs.scoreDocs) {
                    IndexableField[] synFields;
                    Document doc = this.searcher.getDocument(scoreDoc.doc);
                    for (IndexableField synField : synFields = doc.getFields("raw")) {
                        double distance;
                        String synFieldValue = synField.stringValue();
                        if (similarFieldMap.containsKey(synFieldValue) || !((distance = this.calculateOverallSimilarity(input, synFieldValue)) >= similarity)) continue;
                        similarFieldMap.put(synFieldValue, distance);
                    }
                }
            }
            catch (IOException e) {
                LOGGER.debug("Error when trying to search {} in categroy {}", (Object)input, (Object)category);
                LOGGER.error(e.getMessage());
            }
        }
        return LuceneIndex.sortMapByValue(similarFieldMap);
    }

    public String findMostSimilarFieldInCategory(String input, String category, double similarity) {
        return this.findMostSimilarFieldInCategories(input, Collections.singletonList(category), similarity);
    }

    @Override
    public String findMostSimilarFieldInCategories(String input, List<String> categories, double similarity) {
        if (input == null) {
            return null;
        }
        if (CollectionUtils.isEmpty(categories)) {
            return input;
        }
        String mostSimilarValue = input;
        double highestSimilarity = 0.0;
        for (String category : categories) {
            try {
                TopDocs docs = this.searcher.findSimilarValuesInCategory(input, category);
                for (ScoreDoc scoreDoc : docs.scoreDocs) {
                    IndexableField[] synFields;
                    Document doc = this.searcher.getDocument(scoreDoc.doc);
                    for (IndexableField synField : synFields = doc.getFields("raw")) {
                        String synFieldValue = synField.stringValue();
                        double currentSimilarity = this.calculateOverallSimilarity(input, synFieldValue);
                        if (!(currentSimilarity >= similarity) || !(currentSimilarity > highestSimilarity)) continue;
                        highestSimilarity = currentSimilarity;
                        mostSimilarValue = synFieldValue;
                    }
                }
            }
            catch (IOException e) {
                LOGGER.debug("Error when trying to search {} in categroy {}", (Object)input, (Object)category);
                LOGGER.error(e.getMessage(), (Throwable)e);
            }
        }
        return mostSimilarValue;
    }

    private double calculateOverallSimilarity(String input, String field) {
        List<String> inputTokens = DictionarySearcher.getTokensFromAnalyzer(input);
        List<String> fieldTokens = DictionarySearcher.getTokensFromAnalyzer(field);
        double bestTokenSimilarity = 0.0;
        for (String inputToken : inputTokens) {
            for (String fieldToken : fieldTokens) {
                double similarity = this.levenshtein.getMatchingWeight(inputToken, fieldToken);
                if (!(similarity > bestTokenSimilarity)) continue;
                bestTokenSimilarity = similarity;
            }
        }
        double fullSimilarity = this.levenshtein.getMatchingWeight(input.toLowerCase(), field.toLowerCase());
        return (bestTokenSimilarity + fullSimilarity) / 2.0;
    }
}

