package org.talend.dataquality.statistics.frequency.recognition;

import java.util.Collections;
import java.util.HashSet;
import java.util.Set;
import org.apache.commons.lang.StringUtils;
import org.talend.daikon.pattern.character.CharPattern;
import org.talend.daikon.pattern.word.WordPattern;
import org.talend.dataquality.statistics.type.DataTypeEnum;

/* JADX WARN: Classes with same name are omitted:
  input_file:lib/dataquality-statistics-8.3.1-SNAPSHOT.jar:org/talend/dataquality/statistics/frequency/recognition/WordPatternRecognizer.class
 */
/* loaded from: input_file:org/talend/dataquality/statistics/frequency/recognition/WordPatternRecognizer.class */
public abstract class WordPatternRecognizer extends AbstractPatternRecognizer {
    private static final Set<Integer> ADDITIONAL_IDEOGRAMS = new HashSet();
    private static final Set<Integer> REMOVED_IDEOGRAMS = new HashSet();

    /* JADX WARN: Classes with same name are omitted:
      input_file:lib/dataquality-statistics-8.3.1-SNAPSHOT.jar:org/talend/dataquality/statistics/frequency/recognition/WordPatternRecognizer$NoCase.class
     */
    /* loaded from: input_file:org/talend/dataquality/statistics/frequency/recognition/WordPatternRecognizer$NoCase.class */
    static class NoCase extends WordPatternRecognizer {
        NoCase() {
        }

        @Override // org.talend.dataquality.statistics.frequency.recognition.WordPatternRecognizer
        public int exploreNextPattern(PatternExplorer patternExplorer, char[] cArr, int i) {
            return patternExplorer.exploreNoCase(cArr, i);
        }
    }

    /* JADX WARN: Classes with same name are omitted:
      input_file:lib/dataquality-statistics-8.3.1-SNAPSHOT.jar:org/talend/dataquality/statistics/frequency/recognition/WordPatternRecognizer$PatternExplorer.class
     */
    /* loaded from: input_file:org/talend/dataquality/statistics/frequency/recognition/WordPatternRecognizer$PatternExplorer.class */
    private enum PatternExplorer {
        ALPHABETIC(WordPattern.LOWER_CHAR.getPattern(), WordPattern.LOWER_WORD.getPattern(), WordPattern.ALPHANUMERIC.getPattern()),
        IDEOGRAPHIC(WordPattern.IDEOGRAM.getPattern(), WordPattern.IDEOGRAM_SEQUENCE.getPattern(), null),
        HIRAGANA(WordPattern.HIRAGANA.getPattern(), WordPattern.HIRAGANA_SEQUENCE.getPattern(), null),
        KATAKANA(WordPattern.KATAKANA.getPattern(), WordPattern.KATAKANA_SEQUENCE.getPattern(), null),
        HANGUL(WordPattern.HANGUL.getPattern(), WordPattern.HANGUL_SEQUENCE.getPattern(), null),
        NUMERIC(WordPattern.DIGIT.getPattern(), WordPattern.NUMBER.getPattern(), null),
        UPPER_CASE(WordPattern.UPPER_CHAR.getPattern(), WordPattern.UPPER_WORD.getPattern(), WordPattern.WORD.getPattern()),
        NOT_UPPER_CASE(WordPattern.LOWER_CHAR.getPattern(), WordPattern.LOWER_WORD.getPattern(), null);

        private String patternUnit;
        private String patternSequence;
        private String specialPattern;
        private boolean isSpecial;
        private boolean isIncludeSurrPair;

        PatternExplorer(String str, String str2, String str3) {
            this.patternUnit = str;
            this.patternSequence = str2;
            this.specialPattern = str3;
        }

        /* JADX INFO: Access modifiers changed from: private */
        /* JADX WARN: Can't fix incorrect switch cases order, some code will duplicate */
        public int exploreWithCase(char[] cArr, int i) {
            this.isSpecial = false;
            this.isIncludeSurrPair = false;
            int i2 = i;
            switch (this) {
                case IDEOGRAPHIC:
                    while (i2 < cArr.length && WordPatternRecognizer.isIdeographic(Character.codePointAt(cArr, i2))) {
                        i2 += getCodepointSize(cArr[i2]);
                    }
                case NUMERIC:
                    while (i2 < cArr.length && WordPatternRecognizer.isDigit(Character.codePointAt(cArr, i2))) {
                        i2 += getCodepointSize(cArr[i2]);
                    }
                case UPPER_CASE:
                    while (i2 < cArr.length && WordPatternRecognizer.isUpper(Character.codePointAt(cArr, i2))) {
                        i2 += getCodepointSize(cArr[i2]);
                    }
                    if (i2 == i + 1) {
                        i2 += exploreSpecial(cArr, i2);
                        break;
                    }
                    break;
                case NOT_UPPER_CASE:
                    while (i2 < cArr.length && WordPatternRecognizer.isLower(Character.codePointAt(cArr, i2))) {
                        i2 += getCodepointSize(cArr[i2]);
                    }
                case HANGUL:
                    while (i2 < cArr.length && WordPatternRecognizer.isHangul(Character.codePointAt(cArr, i2))) {
                        i2 += getCodepointSize(cArr[i2]);
                    }
                case HIRAGANA:
                    if (i2 < cArr.length && Character.codePointAt(cArr, i2) != 12540) {
                        while (i2 < cArr.length && WordPatternRecognizer.isHiragana(Character.codePointAt(cArr, i2))) {
                            i2 += getCodepointSize(cArr[i2]);
                        }
                    }
                    break;
                case KATAKANA:
                    if (i2 < cArr.length && Character.codePointAt(cArr, i2) != 12540) {
                        while (i2 < cArr.length && WordPatternRecognizer.isKatakana(Character.codePointAt(cArr, i2))) {
                            i2 += getCodepointSize(cArr[i2]);
                        }
                    }
                    break;
            }
            return i2 - i;
        }

        /* JADX INFO: Access modifiers changed from: private */
        /* JADX WARN: Can't fix incorrect switch cases order, some code will duplicate */
        public int exploreNoCase(char[] cArr, int i) {
            this.isSpecial = false;
            this.isIncludeSurrPair = false;
            int i2 = i;
            switch (this) {
                case IDEOGRAPHIC:
                    while (i2 < cArr.length && WordPatternRecognizer.isIdeographic(Character.codePointAt(cArr, i2))) {
                        i2 += getCodepointSize(cArr[i2]);
                    }
                case NUMERIC:
                    while (i2 < cArr.length && WordPatternRecognizer.isDigit(Character.codePointAt(cArr, i2))) {
                        i2 += getCodepointSize(cArr[i2]);
                    }
                case HANGUL:
                    while (i2 < cArr.length && WordPatternRecognizer.isHangul(Character.codePointAt(cArr, i2))) {
                        i2 += getCodepointSize(cArr[i2]);
                    }
                case HIRAGANA:
                    if (i2 < cArr.length && Character.codePointAt(cArr, i2) != 12540) {
                        while (i2 < cArr.length && WordPatternRecognizer.isHiragana(Character.codePointAt(cArr, i2))) {
                            i2 += getCodepointSize(cArr[i2]);
                        }
                    }
                    break;
                case KATAKANA:
                    if (i2 < cArr.length && Character.codePointAt(cArr, i2) != 12540) {
                        while (i2 < cArr.length && WordPatternRecognizer.isKatakana(Character.codePointAt(cArr, i2))) {
                            i2 += getCodepointSize(cArr[i2]);
                        }
                    }
                    break;
                case ALPHABETIC:
                    while (i2 < cArr.length && WordPatternRecognizer.isLetter(Character.codePointAt(cArr, i2))) {
                        i2 += getCodepointSize(cArr[i2]);
                    }
            }
            int i3 = i2 - i;
            if (i3 > 0) {
                i3 += exploreSpecial(cArr, i2);
            }
            return i3;
        }

        /* JADX WARN: Can't fix incorrect switch cases order, some code will duplicate */
        private int exploreSpecial(char[] cArr, int i) {
            int i2 = i;
            switch (this) {
                case NUMERIC:
                    i2 += ALPHABETIC.exploreSpecial(cArr, i);
                    if (i2 > i) {
                        this.specialPattern = WordPattern.ALPHANUMERIC.getPattern();
                        break;
                    }
                    break;
                case UPPER_CASE:
                    i2 += NOT_UPPER_CASE.exploreWithCase(cArr, i);
                    break;
                case NOT_UPPER_CASE:
                    i2 += UPPER_CASE.exploreWithCase(cArr, i);
                    break;
                case ALPHABETIC:
                    while (i2 < cArr.length && (WordPatternRecognizer.isLetter(Character.codePointAt(cArr, i2)) || WordPatternRecognizer.isDigit(Character.codePointAt(cArr, i2)))) {
                        i2++;
                    }
            }
            int i3 = i2 - i;
            if (i3 > 0) {
                this.isSpecial = true;
            }
            return i3;
        }

        /* JADX INFO: Access modifiers changed from: private */
        public String getPattern(int i) {
            if (i == 0) {
                return null;
            }
            return this.isSpecial ? this.specialPattern : (i == 1 || (i == 2 && this.isIncludeSurrPair)) ? this.patternUnit : this.patternSequence;
        }

        private int getCodepointSize(char c) {
            if (!Character.isSurrogate(c)) {
                return 1;
            }
            this.isIncludeSurrPair = true;
            return 2;
        }
    }

    /* JADX WARN: Classes with same name are omitted:
      input_file:lib/dataquality-statistics-8.3.1-SNAPSHOT.jar:org/talend/dataquality/statistics/frequency/recognition/WordPatternRecognizer$WithCase.class
     */
    /* loaded from: input_file:org/talend/dataquality/statistics/frequency/recognition/WordPatternRecognizer$WithCase.class */
    static class WithCase extends WordPatternRecognizer {
        WithCase() {
        }

        @Override // org.talend.dataquality.statistics.frequency.recognition.WordPatternRecognizer
        public int exploreNextPattern(PatternExplorer patternExplorer, char[] cArr, int i) {
            return patternExplorer.exploreWithCase(cArr, i);
        }
    }

    public static WordPatternRecognizer withCase() {
        return new WithCase();
    }

    public static WordPatternRecognizer noCase() {
        return new NoCase();
    }

    @Override // org.talend.dataquality.statistics.frequency.recognition.AbstractPatternRecognizer
    public RecognitionResult recognize(String str, DataTypeEnum dataTypeEnum) {
        RecognitionResult recognitionResult = new RecognitionResult();
        if (StringUtils.isEmpty(str)) {
            recognitionResult.setResult(Collections.singleton(str), false);
            return recognitionResult;
        }
        char[] charArray = str.toCharArray();
        StringBuilder sb = new StringBuilder();
        int i = 0;
        while (i < charArray.length) {
            int i2 = i;
            for (PatternExplorer patternExplorer : PatternExplorer.values()) {
                int exploreNextPattern = exploreNextPattern(patternExplorer, charArray, i);
                if (exploreNextPattern > 0) {
                    sb.append(patternExplorer.getPattern(exploreNextPattern));
                    i += exploreNextPattern;
                }
            }
            if (i2 == i) {
                sb.append(charArray[i2]);
                i++;
            }
        }
        recognitionResult.setResult(Collections.singleton(sb.toString()), true);
        return recognitionResult;
    }

    protected abstract int exploreNextPattern(PatternExplorer patternExplorer, char[] cArr, int i);

    @Override // org.talend.dataquality.statistics.frequency.recognition.AbstractPatternRecognizer
    public Set<String> getValuePattern(String str) {
        return recognize(str).getPatternStringSet();
    }

    static boolean isLower(int i) {
        return Character.getType(i) == 2;
    }

    static boolean isUpper(int i) {
        return Character.getType(i) == 1;
    }

    static boolean isLetter(int i) {
        return isLower(i) || isUpper(i);
    }

    static boolean isIdeographic(int i) {
        return (Character.isIdeographic(i) || ADDITIONAL_IDEOGRAMS.contains(Integer.valueOf(i))) && !REMOVED_IDEOGRAMS.contains(Integer.valueOf(i));
    }

    static boolean isHangul(int i) {
        return CharPattern.HANGUL.contains(Integer.valueOf(i));
    }

    static boolean isHiragana(int i) {
        return CharPattern.HIRAGANA.contains(Integer.valueOf(i)) || i == 12540;
    }

    static boolean isKatakana(int i) {
        return CharPattern.HALFWIDTH_KATAKANA.contains(Integer.valueOf(i)) || CharPattern.FULLWIDTH_KATAKANA.contains(Integer.valueOf(i)) || i == 12540;
    }

    static boolean isDigit(int i) {
        return Character.isDigit(i);
    }

    static {
        for (int i = 11904; i <= 11929; i++) {
            ADDITIONAL_IDEOGRAMS.add(Integer.valueOf(i));
        }
        for (int i2 = 11931; i2 <= 12019; i2++) {
            ADDITIONAL_IDEOGRAMS.add(Integer.valueOf(i2));
        }
        for (int i3 = 12032; i3 <= 12245; i3++) {
            ADDITIONAL_IDEOGRAMS.add(Integer.valueOf(i3));
        }
        ADDITIONAL_IDEOGRAMS.add(12293);
        ADDITIONAL_IDEOGRAMS.add(12347);
        REMOVED_IDEOGRAMS.add(12294);
    }
}
