package org.talend.dataquality.semantic.extraction;

import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.talend.dataquality.semantic.model.DQCategory;
import org.talend.dataquality.semantic.snapshot.DictionarySnapshot;

/* loaded from: input_file:org/talend/dataquality/semantic/extraction/ExtractFromRegex.class */
public class ExtractFromRegex extends ExtractFromSemanticType {
    private static final String SEPARATORS_WITHOUT_PLUS = "[[\\p{Punct}&&[^'.+]]\\s\\u00A0\\u2007\\u202F\\u3000]+";
    private Pattern pattern;

    public ExtractFromRegex(DictionarySnapshot dictionarySnapshot, DQCategory dQCategory) {
        super(dictionarySnapshot, dQCategory);
        this.pattern = Pattern.compile(getCleanedRegex());
    }

    @Override // org.talend.dataquality.semantic.extraction.ExtractFromSemanticType
    public List<MatchedPart> getMatches(String str) {
        TokenizedString tokenizedString = new TokenizedString(str, SEPARATORS_WITHOUT_PLUS);
        ArrayList arrayList = new ArrayList();
        Matcher matcher = this.pattern.matcher(str);
        int i = 0;
        while (matcher.find(i)) {
            int start = matcher.start();
            int end = matcher.end();
            if (validBounds(tokenizedString, start, end)) {
                arrayList.add(new MatchedPartRegex(tokenizedString, start, end));
                i = end;
            } else {
                i++;
            }
        }
        return arrayList;
    }

    private boolean validBounds(TokenizedString tokenizedString, int i, int i2) {
        String value = tokenizedString.getValue();
        return (i == 0 || tokenizedString.getSeparatorPattern().matcher(value.substring(i - 1, i)).matches()) && (i2 == value.length() || tokenizedString.getSeparatorPattern().matcher(value.substring(i2, i2 + 1)).matches() || ".".equals(value.substring(i2, i2 + 1)));
    }

    private String getCleanedRegex() {
        String patternStringByCategoryId = this.dicoSnapshot.getRegexClassifier().getPatternStringByCategoryId(this.semanticCategory.getId());
        if (patternStringByCategoryId.startsWith("^")) {
            patternStringByCategoryId = patternStringByCategoryId.substring(1);
        }
        if (patternStringByCategoryId.endsWith("$") && !isLitteral(patternStringByCategoryId)) {
            patternStringByCategoryId = patternStringByCategoryId.substring(0, patternStringByCategoryId.length() - 1);
        }
        return patternStringByCategoryId;
    }

    private boolean isLitteral(String str) {
        int length = str.length() - 2;
        while (length >= 0 && str.charAt(length) == '\\') {
            length--;
        }
        return (str.length() - length) % 2 == 1;
    }
}
