/*
 * Decompiled with CFR 0.152.
 */
package org.talend.dataquality.statistics.frequency.recognition;

import java.util.Collections;
import java.util.HashSet;
import java.util.Set;
import org.apache.commons.lang3.StringUtils;
import org.talend.daikon.pattern.character.CharPattern;
import org.talend.daikon.pattern.word.WordPattern;
import org.talend.dataquality.statistics.frequency.recognition.AbstractPatternRecognizer;
import org.talend.dataquality.statistics.frequency.recognition.RecognitionResult;
import org.talend.dataquality.statistics.type.DataTypeEnum;

public abstract class WordPatternRecognizer
extends AbstractPatternRecognizer {
    private static final Set<Integer> ADDITIONAL_IDEOGRAMS;
    private static final Set<Integer> REMOVED_IDEOGRAMS;

    public static WordPatternRecognizer withCase() {
        return new WithCase();
    }

    public static WordPatternRecognizer noCase() {
        return new NoCase();
    }

    @Override
    public RecognitionResult recognize(String stringToRecognize, DataTypeEnum type) {
        RecognitionResult result = new RecognitionResult();
        if (StringUtils.isEmpty((CharSequence)stringToRecognize)) {
            result.setResult(Collections.singleton(stringToRecognize), false);
            return result;
        }
        char[] ca = stringToRecognize.toCharArray();
        StringBuilder patternSeq = new StringBuilder();
        int runningPos = 0;
        while (runningPos < ca.length) {
            int loopStart = runningPos;
            for (PatternExplorer pe : PatternExplorer.values()) {
                int seqLength = this.exploreNextPattern(pe, ca, runningPos);
                if (seqLength <= 0) continue;
                patternSeq.append(pe.getPattern(seqLength));
                runningPos += seqLength;
            }
            if (loopStart != runningPos) continue;
            patternSeq.append(ca[loopStart]);
            ++runningPos;
        }
        result.setResult(Collections.singleton(patternSeq.toString()), true);
        return result;
    }

    protected abstract int exploreNextPattern(PatternExplorer var1, char[] var2, int var3);

    @Override
    public Set<String> getValuePattern(String originalValue) {
        RecognitionResult result = this.recognize(originalValue);
        return result.getPatternStringSet();
    }

    static boolean isLower(int codePoint) {
        return Character.getType(codePoint) == 2;
    }

    static boolean isUpper(int codePoint) {
        return Character.getType(codePoint) == 1;
    }

    static boolean isLetter(int codePoint) {
        return WordPatternRecognizer.isLower(codePoint) || WordPatternRecognizer.isUpper(codePoint);
    }

    static boolean isIdeographic(int codePoint) {
        return (Character.isIdeographic(codePoint) || ADDITIONAL_IDEOGRAMS.contains(codePoint)) && !REMOVED_IDEOGRAMS.contains(codePoint);
    }

    static boolean isHangul(int codePoint) {
        return CharPattern.HANGUL.contains(Integer.valueOf(codePoint));
    }

    static boolean isHiragana(int codePoint) {
        return CharPattern.HIRAGANA.contains(Integer.valueOf(codePoint)) || codePoint == 12540;
    }

    static boolean isKatakana(int codePoint) {
        return CharPattern.HALFWIDTH_KATAKANA.contains(Integer.valueOf(codePoint)) || CharPattern.FULLWIDTH_KATAKANA.contains(Integer.valueOf(codePoint)) || codePoint == 12540;
    }

    static boolean isDigit(int codePoint) {
        return Character.isDigit(codePoint);
    }

    static {
        int i;
        ADDITIONAL_IDEOGRAMS = new HashSet<Integer>();
        REMOVED_IDEOGRAMS = new HashSet<Integer>();
        for (i = 11904; i <= 11929; ++i) {
            ADDITIONAL_IDEOGRAMS.add(i);
        }
        for (i = 11931; i <= 12019; ++i) {
            ADDITIONAL_IDEOGRAMS.add(i);
        }
        for (i = 12032; i <= 12245; ++i) {
            ADDITIONAL_IDEOGRAMS.add(i);
        }
        ADDITIONAL_IDEOGRAMS.add(12293);
        ADDITIONAL_IDEOGRAMS.add(12347);
        REMOVED_IDEOGRAMS.add(12294);
    }

    static class NoCase
    extends WordPatternRecognizer {
        NoCase() {
        }

        @Override
        public int exploreNextPattern(PatternExplorer pe, char[] ca, int startingPos) {
            return pe.exploreNoCase(ca, startingPos);
        }
    }

    static class WithCase
    extends WordPatternRecognizer {
        WithCase() {
        }

        @Override
        public int exploreNextPattern(PatternExplorer pe, char[] ca, int startingPos) {
            return pe.exploreWithCase(ca, startingPos);
        }
    }

    private static enum PatternExplorer {
        ALPHABETIC(WordPattern.LOWER_CHAR.getPattern(), WordPattern.LOWER_WORD.getPattern(), WordPattern.ALPHANUMERIC.getPattern()),
        IDEOGRAPHIC(WordPattern.IDEOGRAM.getPattern(), WordPattern.IDEOGRAM_SEQUENCE.getPattern(), null),
        HIRAGANA(WordPattern.HIRAGANA.getPattern(), WordPattern.HIRAGANA_SEQUENCE.getPattern(), null),
        KATAKANA(WordPattern.KATAKANA.getPattern(), WordPattern.KATAKANA_SEQUENCE.getPattern(), null),
        HANGUL(WordPattern.HANGUL.getPattern(), WordPattern.HANGUL_SEQUENCE.getPattern(), null),
        NUMERIC(WordPattern.DIGIT.getPattern(), WordPattern.NUMBER.getPattern(), null),
        UPPER_CASE(WordPattern.UPPER_CHAR.getPattern(), WordPattern.UPPER_WORD.getPattern(), WordPattern.WORD.getPattern()),
        NOT_UPPER_CASE(WordPattern.LOWER_CHAR.getPattern(), WordPattern.LOWER_WORD.getPattern(), null);

        private String patternUnit;
        private String patternSequence;
        private String specialPattern;
        private boolean isSpecial;
        private boolean isIncludeSurrPair;

        private PatternExplorer(String patternUnit, String patternSequence, String specialPattern) {
            this.patternUnit = patternUnit;
            this.patternSequence = patternSequence;
            this.specialPattern = specialPattern;
        }

        private int exploreWithCase(char[] ca, int start) {
            int pos;
            this.isSpecial = false;
            this.isIncludeSurrPair = false;
            switch (this) {
                case IDEOGRAPHIC: {
                    for (pos = start; pos < ca.length && WordPatternRecognizer.isIdeographic(Character.codePointAt(ca, pos)); pos += this.getCodepointSize(ca[pos])) {
                    }
                    break;
                }
                case NUMERIC: {
                    while (pos < ca.length && WordPatternRecognizer.isDigit(Character.codePointAt(ca, pos))) {
                        pos += this.getCodepointSize(ca[pos]);
                    }
                    break;
                }
                case UPPER_CASE: {
                    while (pos < ca.length && WordPatternRecognizer.isUpper(Character.codePointAt(ca, pos))) {
                        pos += this.getCodepointSize(ca[pos]);
                    }
                    if (pos != start + 1) break;
                    pos += this.exploreSpecial(ca, pos);
                    break;
                }
                case NOT_UPPER_CASE: {
                    while (pos < ca.length && WordPatternRecognizer.isLower(Character.codePointAt(ca, pos))) {
                        pos += this.getCodepointSize(ca[pos]);
                    }
                    break;
                }
                case HANGUL: {
                    while (pos < ca.length && WordPatternRecognizer.isHangul(Character.codePointAt(ca, pos))) {
                        pos += this.getCodepointSize(ca[pos]);
                    }
                    break;
                }
                case HIRAGANA: {
                    if (pos >= ca.length || Character.codePointAt(ca, pos) == 12540) break;
                    while (pos < ca.length && WordPatternRecognizer.isHiragana(Character.codePointAt(ca, pos))) {
                        pos += this.getCodepointSize(ca[pos]);
                    }
                    break;
                }
                case KATAKANA: {
                    if (pos >= ca.length || Character.codePointAt(ca, pos) == 12540) break;
                    while (pos < ca.length && WordPatternRecognizer.isKatakana(Character.codePointAt(ca, pos))) {
                        pos += this.getCodepointSize(ca[pos]);
                    }
                    break;
                }
            }
            return pos - start;
        }

        private int exploreNoCase(char[] ca, int start) {
            int pos;
            this.isSpecial = false;
            this.isIncludeSurrPair = false;
            switch (this) {
                case ALPHABETIC: {
                    for (pos = start; pos < ca.length && WordPatternRecognizer.isLetter(Character.codePointAt(ca, pos)); pos += this.getCodepointSize(ca[pos])) {
                    }
                    break;
                }
                case IDEOGRAPHIC: {
                    while (pos < ca.length && WordPatternRecognizer.isIdeographic(Character.codePointAt(ca, pos))) {
                        pos += this.getCodepointSize(ca[pos]);
                    }
                    break;
                }
                case NUMERIC: {
                    while (pos < ca.length && WordPatternRecognizer.isDigit(Character.codePointAt(ca, pos))) {
                        pos += this.getCodepointSize(ca[pos]);
                    }
                    break;
                }
                case HANGUL: {
                    while (pos < ca.length && WordPatternRecognizer.isHangul(Character.codePointAt(ca, pos))) {
                        pos += this.getCodepointSize(ca[pos]);
                    }
                    break;
                }
                case HIRAGANA: {
                    if (pos >= ca.length || Character.codePointAt(ca, pos) == 12540) break;
                    while (pos < ca.length && WordPatternRecognizer.isHiragana(Character.codePointAt(ca, pos))) {
                        pos += this.getCodepointSize(ca[pos]);
                    }
                    break;
                }
                case KATAKANA: {
                    if (pos >= ca.length || Character.codePointAt(ca, pos) == 12540) break;
                    while (pos < ca.length && WordPatternRecognizer.isKatakana(Character.codePointAt(ca, pos))) {
                        pos += this.getCodepointSize(ca[pos]);
                    }
                    break;
                }
            }
            int seqLength = pos - start;
            if (seqLength > 0) {
                seqLength += this.exploreSpecial(ca, pos);
            }
            return seqLength;
        }

        private int exploreSpecial(char[] ca, int start) {
            int pos;
            switch (this) {
                case ALPHABETIC: {
                    for (pos = start; pos < ca.length && (WordPatternRecognizer.isLetter(Character.codePointAt(ca, pos)) || WordPatternRecognizer.isDigit(Character.codePointAt(ca, pos))); ++pos) {
                    }
                    break;
                }
                case NUMERIC: {
                    if ((pos += ALPHABETIC.exploreSpecial(ca, start)) <= start) break;
                    this.specialPattern = WordPattern.ALPHANUMERIC.getPattern();
                    break;
                }
                case UPPER_CASE: {
                    pos += NOT_UPPER_CASE.exploreWithCase(ca, start);
                    break;
                }
                case NOT_UPPER_CASE: {
                    pos += UPPER_CASE.exploreWithCase(ca, start);
                    break;
                }
            }
            int seqLength = pos - start;
            if (seqLength > 0) {
                this.isSpecial = true;
            }
            return seqLength;
        }

        private String getPattern(int seqLength) {
            if (seqLength == 0) {
                return null;
            }
            if (this.isSpecial) {
                return this.specialPattern;
            }
            if (seqLength == 1 || seqLength == 2 && this.isIncludeSurrPair) {
                return this.patternUnit;
            }
            return this.patternSequence;
        }

        private int getCodepointSize(char currentChar) {
            if (!Character.isSurrogate(currentChar)) {
                return 1;
            }
            this.isIncludeSurrPair = true;
            return 2;
        }
    }
}

