package org.talend.dataquality.semantic.extraction;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.Comparator;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.regex.Pattern;
import org.apache.commons.lang3.StringUtils;
import org.talend.dataquality.semantic.index.LuceneIndex;
import org.talend.dataquality.semantic.model.DQCategory;
import org.talend.dataquality.semantic.snapshot.DictionarySnapshot;

/* loaded from: input_file:org/talend/dataquality/semantic/extraction/ExtractFromDictionary.class */
public class ExtractFromDictionary extends ExtractFromSemanticType {
    private final LuceneIndex index;
    private static final Pattern fullSeparatorPattern = Pattern.compile("[\\p{Punct}\\s\\u00A0\\u2007\\u202F\\u3000]+");

    /* JADX INFO: Access modifiers changed from: protected */
    public ExtractFromDictionary(DictionarySnapshot dictionarySnapshot, DQCategory dQCategory) {
        super(dictionarySnapshot, dQCategory);
        this.index = (LuceneIndex) initIndex();
    }

    @Override // org.talend.dataquality.semantic.extraction.ExtractFromSemanticType
    public List<MatchedPart> getMatches(TokenizedString tokenizedString) {
        LinkedHashSet linkedHashSet = new LinkedHashSet();
        linkedHashSet.addAll(getMatchPart(tokenizedString, tokenizedString.getTokens()));
        if (tokenizedString.getValue().contains("'") || tokenizedString.getValue().contains(".")) {
            TokenizedString tokenizedString2 = new TokenizedString(tokenizedString.getValue());
            List<String> tokensWithoutApostropheAndDots = getTokensWithoutApostropheAndDots(tokenizedString);
            tokenizedString2.getTokens().clear();
            tokenizedString2.getTokens().addAll(tokensWithoutApostropheAndDots);
            linkedHashSet.addAll(getMatchPart(tokenizedString2, tokensWithoutApostropheAndDots));
        }
        return new ArrayList(linkedHashSet);
    }

    private List<MatchedPart> getMatchPart(TokenizedString tokenizedString, List<String> list) {
        ArrayList arrayList = new ArrayList();
        int size = list.size();
        int i = 0;
        while (i < size) {
            int i2 = -1;
            int i3 = -1;
            String str = null;
            ArrayList arrayList2 = new ArrayList();
            for (int i4 = i; i4 < size; i4++) {
                arrayList2.add(StringUtils.stripAccents(list.get(i4)));
                List<String> findMatches = findMatches(arrayList2);
                if (findMatches.isEmpty()) {
                    break;
                }
                int exactMatchIndex = exactMatchIndex(arrayList2, findMatches);
                if (exactMatchIndex > -1) {
                    str = findMatches.get(exactMatchIndex);
                    i2 = i;
                    i3 = i4;
                }
            }
            if (str != null) {
                arrayList.add(new MatchedPartDict(tokenizedString, i2, i3, str));
                i = i3;
            }
            i++;
        }
        return arrayList;
    }

    private List<String> getTokensWithoutApostropheAndDots(TokenizedString tokenizedString) {
        List<String> tokens = tokenizedString.getTokens();
        ArrayList arrayList = new ArrayList(Arrays.asList(fullSeparatorPattern.split(tokenizedString.getValue())));
        if (!arrayList.isEmpty() && ((String) arrayList.get(0)).isEmpty()) {
            tokens.remove(0);
        }
        return arrayList;
    }

    private List<String> findMatches(List<String> list) {
        return this.index.getSearcher().searchPhraseInSemanticCategory(this.semancticCategory.getId(), StringUtils.join(list, ' '));
    }

    private int exactMatchIndex(List<String> list, List<String> list2) {
        Collections.sort(list2, Comparator.comparingInt((v0) -> {
            return v0.length();
        }).reversed());
        for (int i = 0; i < list2.size(); i++) {
            if (equalsIgnoreCase(TokenizedString.tokenize(StringUtils.stripAccents(list2.get(i))), list)) {
                return i;
            }
        }
        return -1;
    }

    private boolean equalsIgnoreCase(List<String> list, List<String> list2) {
        if (list == null || list2 == null || list.size() != list2.size()) {
            return false;
        }
        for (int i = 0; i < list.size(); i++) {
            String str = list2.get(i);
            if (!list.get(i).equalsIgnoreCase(str)) {
                if (i != list.size() - 1 || !str.endsWith(".")) {
                    return false;
                }
                return list.get(i).equalsIgnoreCase(str.substring(0, str.length() - 1));
            }
        }
        return true;
    }
}
