/*
 * Decompiled with CFR 0.152.
 */
package org.talend.dataquality.semantic.extraction;

import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.talend.dataquality.semantic.extraction.ExtractFromSemanticType;
import org.talend.dataquality.semantic.extraction.MatchedPart;
import org.talend.dataquality.semantic.extraction.MatchedPartRegex;
import org.talend.dataquality.semantic.extraction.TokenizedString;
import org.talend.dataquality.semantic.model.DQCategory;
import org.talend.dataquality.semantic.snapshot.DictionarySnapshot;

public class ExtractFromRegex
extends ExtractFromSemanticType {
    private static final String SEPARATORS_WITHOUT_PLUS = "[[\\p{Punct}&&[^'.+]]\\s\\u00A0\\u2007\\u202F\\u3000]+";
    private Pattern pattern = Pattern.compile(this.getCleanedRegex());

    public ExtractFromRegex(DictionarySnapshot snapshot, DQCategory category) {
        super(snapshot, category);
    }

    @Override
    public List<MatchedPart> getMatches(String field) {
        TokenizedString tokenizedField = new TokenizedString(field, SEPARATORS_WITHOUT_PLUS);
        ArrayList<MatchedPart> matchedParts = new ArrayList<MatchedPart>();
        Matcher matcher = this.pattern.matcher(field);
        int curs = 0;
        while (matcher.find(curs)) {
            int end;
            int start = matcher.start();
            if (this.validBounds(tokenizedField, start, end = matcher.end())) {
                matchedParts.add(new MatchedPartRegex(tokenizedField, start, end));
                curs = end;
                continue;
            }
            ++curs;
        }
        return matchedParts;
    }

    private boolean validBounds(TokenizedString tokenizedField, int start, int end) {
        String input = tokenizedField.getValue();
        boolean endChecked = end == input.length() || tokenizedField.getSeparatorPattern().matcher(input.substring(end, end + 1)).matches() || ".".equals(input.substring(end, end + 1));
        return (start == 0 || tokenizedField.getSeparatorPattern().matcher(input.substring(start - 1, start)).matches()) && endChecked;
    }

    private String getCleanedRegex() {
        String cleaned = this.dicoSnapshot.getRegexClassifier().getPatternStringByCategoryId(this.semanticCategory.getId());
        if (cleaned.startsWith("^")) {
            cleaned = cleaned.substring(1);
        }
        if (cleaned.endsWith("$") && !this.isLitteral(cleaned)) {
            cleaned = cleaned.substring(0, cleaned.length() - 1);
        }
        return cleaned;
    }

    private boolean isLitteral(String regex) {
        int position;
        for (position = regex.length() - 2; position >= 0 && regex.charAt(position) == '\\'; --position) {
        }
        return (regex.length() - position) % 2 == 1;
    }
}

