package org.talend.dataquality.nlp;

import java.io.Serializable;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.spark.api.java.JavaPairRDD;
import org.apache.spark.api.java.function.Function;
import org.talend.dataquality.nlp.toolkit.AbstractToolkit;
import scala.Tuple2;

/* loaded from: input_file:org/talend/dataquality/nlp/CRFLabeling.class */
public class CRFLabeling implements Serializable {
    private static final long serialVersionUID = 1;

    /* loaded from: input_file:org/talend/dataquality/nlp/CRFLabeling$Scores.class */
    public static class Scores implements Serializable {
        private static final long serialVersionUID = 1;
        private double precision;
        private double recall;
        private String className;
        private double tp = 0.0d;
        private double pt = 0.0d;
        private double lt = 0.0d;

        public Scores(long j, long j2, long j3, String str) {
            this.tp += j;
            this.pt += j2;
            this.lt += j3;
            this.className = str;
        }

        public double getPrecision() {
            return this.tp / this.pt;
        }

        public double getRecall() {
            return this.tp / this.lt;
        }

        public void updateTruePositive(long j) {
            this.tp += j;
        }

        public void updatePredictedTrue(long j) {
            this.pt += j;
        }

        public void updateLabelTrue(long j) {
            this.lt += j;
        }

        public double getF1score() {
            this.precision = this.tp / this.pt;
            this.recall = this.tp / this.lt;
            return 2.0d * ((this.recall * this.precision) / (this.recall + this.precision));
        }

        public double getTruePositive() {
            return this.tp;
        }

        public double getPredictedTrue() {
            return this.pt;
        }

        public double getLabelTrue() {
            return this.lt;
        }

        public String getClassName() {
            return this.className;
        }
    }

    /* loaded from: input_file:org/talend/dataquality/nlp/CRFLabeling$TokenNumCharPair.class */
    public static class TokenNumCharPair implements Serializable {
        private static final long serialVersionUID = 1;
        private String token;
        private int num;
        private String tag;

        public TokenNumCharPair(String str, int i, String str2) {
            this.token = str;
            this.num = i;
            this.tag = str2;
        }

        public String getToken() {
            return this.token;
        }

        public int getNum() {
            return this.num;
        }

        public String getTag() {
            return this.tag;
        }
    }

    static int countLetters(String str) {
        int i = 0;
        for (char c : str.toCharArray()) {
            if (Character.isAlphabetic(Character.valueOf(c).charValue())) {
                i++;
            }
        }
        return i;
    }

    static List<Boolean> labelTokens(List<AbstractToolkit.TokenSpan> list, List<AbstractToolkit.TokenSpan> list2) {
        ArrayList arrayList = new ArrayList(Collections.nCopies(list.size(), false));
        for (AbstractToolkit.TokenSpan tokenSpan : list2) {
            for (int i = 0; i < list.size(); i++) {
                AbstractToolkit.TokenSpan tokenSpan2 = list.get(i);
                if (tokenSpan2.getStart() == tokenSpan.getStart() || tokenSpan2.getEnd() == tokenSpan.getEnd() || (tokenSpan2.getStart() >= tokenSpan.getStart() && tokenSpan2.getEnd() <= tokenSpan.getEnd())) {
                    arrayList.set(i, true);
                }
            }
        }
        return arrayList;
    }

    /* JADX INFO: Access modifiers changed from: package-private */
    public static String labelSentence(List<TokenNumCharPair> list, String str) {
        StringBuilder sb = new StringBuilder(str);
        ArrayList arrayList = new ArrayList();
        ArrayList arrayList2 = new ArrayList();
        for (TokenNumCharPair tokenNumCharPair : list) {
            String token = tokenNumCharPair.getToken();
            int num = tokenNumCharPair.getNum();
            Matcher matcher = Pattern.compile(Pattern.quote(token)).matcher(str);
            ArrayList arrayList3 = new ArrayList();
            ArrayList arrayList4 = new ArrayList();
            while (matcher.find()) {
                int start = matcher.start();
                arrayList3.add(new AbstractToolkit.TokenSpan(start, matcher.end()));
                arrayList4.add(Integer.valueOf(countLetters(sb.substring(0, start))));
            }
            int i = 0;
            int length = sb.length();
            for (int i2 = 0; i2 < arrayList3.size(); i2++) {
                int abs = Math.abs(((Integer) arrayList4.get(i2)).intValue() - num);
                if (abs < length) {
                    i = i2;
                    length = abs;
                }
            }
            if (length != sb.length()) {
                arrayList.add(arrayList3.get(i));
                arrayList2.add(tokenNumCharPair.getTag());
            }
        }
        for (int size = arrayList.size() - 1; size >= 0; size--) {
            sb.replace(((AbstractToolkit.TokenSpan) arrayList.get(size)).getStart(), ((AbstractToolkit.TokenSpan) arrayList.get(size)).getEnd(), String.format("<%s>%s</%s>", arrayList2.get(size), sb.substring(((AbstractToolkit.TokenSpan) arrayList.get(size)).getStart(), ((AbstractToolkit.TokenSpan) arrayList.get(size)).getEnd()), arrayList2.get(size)));
        }
        return sb.toString();
    }

    public static List<Scores> getScores(JavaPairRDD<String, String> javaPairRDD) {
        ArrayList arrayList = new ArrayList();
        for (final String str : javaPairRDD.values().distinct().collect()) {
            arrayList.add(new Scores(javaPairRDD.filter(new Function<Tuple2<String, String>, Boolean>() { // from class: org.talend.dataquality.nlp.CRFLabeling.1
                private static final long serialVersionUID = 1;

                public Boolean call(Tuple2<String, String> tuple2) throws Exception {
                    return Boolean.valueOf(((String) tuple2._1()).equals(str) && ((String) tuple2._2()).equals(str));
                }
            }).count(), javaPairRDD.keys().filter(new Function<String, Boolean>() { // from class: org.talend.dataquality.nlp.CRFLabeling.2
                private static final long serialVersionUID = 1;

                public Boolean call(String str2) throws Exception {
                    return Boolean.valueOf(str2.equals(str));
                }
            }).count(), javaPairRDD.values().filter(new Function<String, Boolean>() { // from class: org.talend.dataquality.nlp.CRFLabeling.3
                private static final long serialVersionUID = 1;

                public Boolean call(String str2) throws Exception {
                    return Boolean.valueOf(str2.equals(str));
                }
            }).count(), str));
        }
        return arrayList;
    }
}
