package org.apache.any23.encoding;

import java.io.IOException;
import java.io.InputStream;
import java.nio.charset.Charset;
import java.nio.charset.StandardCharsets;
import java.util.Iterator;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.tika.detect.TextStatistics;
import org.apache.tika.utils.CharsetUtils;
import org.jsoup.nodes.Element;
import org.jsoup.select.Evaluator;
import org.jsoup.select.QueryParser;
import org.jsoup.select.Selector;
import org.rypt.f8.Utf8Statistics;

/* loaded from: input_file:org/apache/any23/encoding/EncodingUtils.class */
class EncodingUtils {
    private static final int[] windows1252Illegals = {129, 141, 143, 144, 157};
    private static final int[] windows1250Illegals = {129, 131, 136, 144, 152};
    private static final int[] iso_8859_7Illegals = {174, 210, 255};
    private static final int[] windows1253Illegals = {129, 136, 138, 140, 141, 142, 143, 144, 152, 154, 156, 157, 158, 159, 170, 210, 255};
    private static final int[] windows1255Illegals = {129, 138, 140, 141, 142, 143, 144, 154, 156, 157, 158, 159, 202, 217, 218, 219, 220, 221, 222, 223, 251, 252, 255};
    private static final int[] iso_8859_8Illegals = {161, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 251, 252, 255};
    private static final int[] windows1254Illegals = {129, 141, 142, 143, 144, 157, 158};
    private static final int[] windows1251Illegals = {152};
    private static final int[] iso_8859_6Illegals = {161, 162, 163, 165, 166, 167, 168, 169, 170, 171, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 188, 189, 190, 192, 219, 220, 221, 222, 223, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255};
    private static final Evaluator charsetMetas = QueryParser.parse("meta[http-equiv=content-type], meta[charset]");
    private static final Pattern contentTypeCharsetPattern = Pattern.compile("(?i)\\bcharset\\s*=[\\s\"']*([^\\s,;\"']+)");
    private static final Pattern xmlEncoding = Pattern.compile("(?is)\\A\\s*<\\?\\s*xml\\s+[^<>]*encoding\\s*=\\s*(?:['\"]\\s*)?([-_:.a-z0-9]+)");

    /* loaded from: input_file:org/apache/any23/encoding/EncodingUtils$TextStatisticsOptimizedForUtf8.class */
    private static class TextStatisticsOptimizedForUtf8 extends TextStatistics {
        private final Utf8Statistics utf8Stats = new Utf8Statistics();

        private TextStatisticsOptimizedForUtf8() {
        }

        public void addData(byte[] bArr, int i, int i2) {
            super.addData(bArr, i, i2);
            this.utf8Stats.write(bArr, i, i2);
        }

        public boolean looksLikeUTF8() {
            return this.utf8Stats.looksLikeUtf8();
        }
    }

    EncodingUtils() {
    }

    /* JADX INFO: Access modifiers changed from: package-private */
    public static String iso_8859_1(InputStream inputStream) throws IOException {
        StringBuilder sb = new StringBuilder(Math.max(inputStream.available(), 8192));
        byte[] bArr = new byte[8192];
        while (true) {
            int read = inputStream.read(bArr);
            if (read == -1) {
                return sb.toString();
            }
            for (int i = 0; i < read; i++) {
                sb.append((char) (bArr[i] & 255));
            }
        }
    }

    /* JADX INFO: Access modifiers changed from: package-private */
    public static Charset correctVariant(TextStatistics textStatistics, Charset charset) {
        if (charset == null) {
            return null;
        }
        String name = charset.name();
        boolean z = -1;
        switch (name.hashCode()) {
            case -1815629645:
                if (name.equals("ISO-8859-8-I")) {
                    z = 7;
                    break;
                }
                break;
            case -266231418:
                if (name.equals("windows-1250")) {
                    z = 3;
                    break;
                }
                break;
            case -266231417:
                if (name.equals("windows-1251")) {
                    z = 11;
                    break;
                }
                break;
            case -266231416:
                if (name.equals("windows-1252")) {
                    z = true;
                    break;
                }
                break;
            case -266231415:
                if (name.equals("windows-1253")) {
                    z = 5;
                    break;
                }
                break;
            case -266231414:
                if (name.equals("windows-1254")) {
                    z = 10;
                    break;
                }
                break;
            case -266231413:
                if (name.equals("windows-1255")) {
                    z = 8;
                    break;
                }
                break;
            case 2027158704:
                if (name.equals("ISO-8859-1")) {
                    z = false;
                    break;
                }
                break;
            case 2027158705:
                if (name.equals("ISO-8859-2")) {
                    z = 2;
                    break;
                }
                break;
            case 2027158709:
                if (name.equals("ISO-8859-6")) {
                    z = 12;
                    break;
                }
                break;
            case 2027158710:
                if (name.equals("ISO-8859-7")) {
                    z = 4;
                    break;
                }
                break;
            case 2027158711:
                if (name.equals("ISO-8859-8")) {
                    z = 6;
                    break;
                }
                break;
            case 2027158712:
                if (name.equals("ISO-8859-9")) {
                    z = 9;
                    break;
                }
                break;
        }
        switch (z) {
            case false:
                if ((textStatistics.count(13) != 0 || hasC1Control(textStatistics)) && hasNoneOf(textStatistics, windows1252Illegals)) {
                    try {
                        return forName("windows-1252");
                    } catch (Exception e) {
                    }
                }
                return iso_8859_1_or_15(textStatistics);
            case true:
                return hasNoneOf(textStatistics, windows1252Illegals) ? charset : iso_8859_1_or_15(textStatistics);
            case true:
                if (hasC1Control(textStatistics) && hasNoneOf(textStatistics, windows1250Illegals)) {
                    try {
                        return forName("windows-1250");
                    } catch (Exception e2) {
                    }
                }
                return charset;
            case true:
                return hasNoneOf(textStatistics, windows1250Illegals) ? charset : charset("ISO-8859-2");
            case true:
                if (hasC1Control(textStatistics) && hasNoneOf(textStatistics, windows1253Illegals)) {
                    try {
                        return forName("windows-1253");
                    } catch (Exception e3) {
                    }
                }
                if (hasNoneOf(textStatistics, iso_8859_7Illegals)) {
                    return charset;
                }
                return null;
            case true:
                if (hasNoneOf(textStatistics, windows1253Illegals)) {
                    return charset;
                }
                if (hasNoneOf(textStatistics, iso_8859_7Illegals)) {
                    return charset("ISO-8859-7");
                }
                return null;
            case true:
            case true:
                if (hasC1Control(textStatistics) && hasNoneOf(textStatistics, windows1255Illegals)) {
                    try {
                        return forName("windows-1255");
                    } catch (Exception e4) {
                    }
                }
                if (hasNoneOf(textStatistics, iso_8859_8Illegals)) {
                    return charset;
                }
                return null;
            case true:
                if (hasNoneOf(textStatistics, windows1255Illegals)) {
                    return charset;
                }
                if (hasNoneOf(textStatistics, iso_8859_8Illegals)) {
                    return charset("ISO-8859-8");
                }
                return null;
            case true:
                if (hasC1Control(textStatistics) && hasNoneOf(textStatistics, windows1254Illegals)) {
                    try {
                        return forName("windows-1254");
                    } catch (Exception e5) {
                    }
                }
                return charset;
            case true:
                return hasNoneOf(textStatistics, windows1254Illegals) ? charset : charset("ISO-8859-9");
            case true:
                if (hasNoneOf(textStatistics, windows1251Illegals)) {
                    return charset;
                }
                return null;
            case true:
                if (hasNoneOf(textStatistics, iso_8859_6Illegals)) {
                    return charset;
                }
                return null;
            default:
                return charset;
        }
    }

    private static Charset iso_8859_1_or_15(TextStatistics textStatistics) {
        if (textStatistics.count(164) != 0) {
            try {
                return forName("ISO-8859-15");
            } catch (Exception e) {
            }
        }
        return StandardCharsets.ISO_8859_1;
    }

    private static boolean hasNoneOf(TextStatistics textStatistics, int[] iArr) {
        for (int i : iArr) {
            if (textStatistics.count(i) != 0) {
                return false;
            }
        }
        return true;
    }

    private static boolean hasC1Control(TextStatistics textStatistics) {
        for (int i = 128; i < 160; i++) {
            if (textStatistics.count(i) != 0) {
                return true;
            }
        }
        return false;
    }

    /* JADX INFO: Access modifiers changed from: package-private */
    public static TextStatistics stats(InputStream inputStream) throws IOException {
        TextStatisticsOptimizedForUtf8 textStatisticsOptimizedForUtf8 = new TextStatisticsOptimizedForUtf8();
        byte[] bArr = new byte[8192];
        while (true) {
            int read = inputStream.read(bArr);
            if (read == -1) {
                return textStatisticsOptimizedForUtf8;
            }
            textStatisticsOptimizedForUtf8.addData(bArr, 0, read);
        }
    }

    /* JADX INFO: Access modifiers changed from: package-private */
    public static Charset forName(String str) throws Exception {
        try {
            return CharsetUtils.forName(str);
        } catch (Exception e) {
            try {
                return CharsetUtils.forName(str.replaceAll("(?i)-I\\b", ""));
            } catch (Exception e2) {
                throw e;
            }
        }
    }

    private static Charset charset(String str) {
        try {
            return forName(str);
        } catch (Exception e) {
            return null;
        }
    }

    /* JADX INFO: Access modifiers changed from: package-private */
    public static Charset htmlCharset(TextStatistics textStatistics, Element element) {
        Iterator it = Selector.select(charsetMetas, element).iterator();
        while (it.hasNext()) {
            Element element2 = (Element) it.next();
            Charset correctVariant = correctVariant(textStatistics, charset(element2.attr("charset")));
            if (correctVariant != null) {
                return correctVariant;
            }
            Charset correctVariant2 = correctVariant(textStatistics, contentTypeCharset(element2.attr("content")));
            if (correctVariant2 != null) {
                return correctVariant2;
            }
        }
        return null;
    }

    /* JADX INFO: Access modifiers changed from: package-private */
    public static Charset contentTypeCharset(CharSequence charSequence) {
        if (charSequence == null) {
            return null;
        }
        Matcher matcher = contentTypeCharsetPattern.matcher(charSequence);
        if (!matcher.find()) {
            return null;
        }
        try {
            return forName(matcher.group(1));
        } catch (Exception e) {
            return null;
        }
    }

    /* JADX INFO: Access modifiers changed from: package-private */
    public static Charset xmlCharset(TextStatistics textStatistics, CharSequence charSequence) {
        Matcher matcher = xmlEncoding.matcher(charSequence);
        if (matcher.find()) {
            return correctVariant(textStatistics, charset(matcher.group(1)));
        }
        return null;
    }
}
