package com.datumbox.framework.utilities.text.extractors;

import com.datumbox.common.utilities.PHPfunctions;
import com.datumbox.framework.utilities.text.extractors.TextExtractor;
import com.datumbox.framework.utilities.text.tokenizers.Tokenizer;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import org.apache.commons.lang3.math.NumberUtils;

/* loaded from: input_file:com/datumbox/framework/utilities/text/extractors/NgramsExtractor.class */
public class NgramsExtractor extends TextExtractor<Parameters, String, Double> {
    private static final String SEPARATOR = "_";
    private Map<Integer, String> ID2word;
    private Map<Integer, Double> ID2occurrences;
    private Map<Integer, Integer> position2ID;
    private Integer numberOfWordsInDoc;

    /* loaded from: input_file:com/datumbox/framework/utilities/text/extractors/NgramsExtractor$Parameters.class */
    public static class Parameters extends TextExtractor.Parameters {
        private int maxCombinations = 3;
        private int minWordLength = 1;
        private int minWordOccurrence = 1;
        private int examinationWindowLength = 15;
        private int maxDistanceBetweenKwds = 4;
        private int keepFloatPointsUntilCombination = 3;
        private double keepFloatPointsAbove = 0.1d;

        public int getMaxCombinations() {
            return this.maxCombinations;
        }

        public void setMaxCombinations(int i) {
            this.maxCombinations = i;
        }

        public int getMinWordLength() {
            return this.minWordLength;
        }

        public void setMinWordLength(int i) {
            this.minWordLength = i;
        }

        public int getMinWordOccurrence() {
            return this.minWordOccurrence;
        }

        public void setMinWordOccurrence(int i) {
            this.minWordOccurrence = i;
        }

        public int getExaminationWindowLength() {
            return this.examinationWindowLength;
        }

        public void setExaminationWindowLength(int i) {
            this.examinationWindowLength = i;
        }

        public int getMaxDistanceBetweenKwds() {
            return this.maxDistanceBetweenKwds;
        }

        public void setMaxDistanceBetweenKwds(int i) {
            this.maxDistanceBetweenKwds = i;
        }

        public int getKeepFloatPointsUntilCombination() {
            return this.keepFloatPointsUntilCombination;
        }

        public void setKeepFloatPointsUntilCombination(int i) {
            this.keepFloatPointsUntilCombination = i;
        }

        public double getKeepFloatPointsAbove() {
            return this.keepFloatPointsAbove;
        }

        public void setKeepFloatPointsAbove(double d) {
            this.keepFloatPointsAbove = d;
        }
    }

    public NgramsExtractor(Parameters parameters) {
        super(parameters);
    }

    @Override // com.datumbox.framework.utilities.text.extractors.TextExtractor
    public Map<String, Double> extract(String str) {
        this.ID2word = new HashMap();
        this.ID2occurrences = new HashMap();
        this.position2ID = new LinkedHashMap();
        this.numberOfWordsInDoc = 0;
        buildInternalArrays(str);
        HashMap hashMap = new HashMap();
        for (Map.Entry<Integer, Integer> entry : this.position2ID.entrySet()) {
            if (useThisWord(entry.getValue())) {
                for (Map.Entry<String, Integer> entry2 : getCombinationsWithinWindow(entry.getKey(), ((Parameters) this.parameters).getMaxCombinations()).entrySet()) {
                    String key = entry2.getKey();
                    int intValue = entry2.getValue().intValue() - (PHPfunctions.substr_count(key, SEPARATOR) - 1);
                    Double d = (Double) hashMap.get(key);
                    if (d == null) {
                        d = Double.valueOf(0.0d);
                    }
                    hashMap.put(key, intValue <= 0 ? Double.valueOf(d.doubleValue() + 1.0d) : Double.valueOf(d.doubleValue() + (0.5d * intValue)));
                }
            }
        }
        HashMap hashMap2 = new HashMap();
        for (Map.Entry entry3 : hashMap.entrySet()) {
            Double d2 = (Double) entry3.getValue();
            if (d2.doubleValue() >= ((Parameters) this.parameters).getMinWordOccurrence()) {
                String[] split = ((String) entry3.getKey()).split(SEPARATOR);
                StringBuilder sb = new StringBuilder(split.length * 6);
                for (int length = split.length - 1; length >= 0; length--) {
                    sb.append(this.ID2word.get(Integer.valueOf(split[length]))).append(" ");
                }
                if (sb.length() > 0) {
                    hashMap2.put(sb.toString().trim(), d2);
                }
            }
        }
        return hashMap2;
    }

    public double numberOfOccurrences(String str, String str2) {
        double d = 0.0d;
        Tokenizer generateTokenizer = ((Parameters) this.parameters).generateTokenizer();
        List<String> list = generateTokenizer.tokenize(str);
        int size = list.size();
        if (size == 0) {
            return 0.0d;
        }
        List<String> list2 = generateTokenizer.tokenize(str2);
        int size2 = list2.size();
        LinkedHashMap linkedHashMap = new LinkedHashMap();
        for (int i = 0; i < size2; i++) {
            String str3 = list2.get(i);
            if (list.contains(str3)) {
                if (!linkedHashMap.containsKey(str3)) {
                    linkedHashMap.put(str3, new ArrayList());
                }
                ((List) linkedHashMap.get(str3)).add(Integer.valueOf(i));
            }
        }
        while (!linkedHashMap.isEmpty()) {
            int i2 = 0;
            Integer num = null;
            for (String str4 : list) {
                List<Integer> list3 = (List) linkedHashMap.get(str4);
                if (list3 == null || list3.isEmpty()) {
                    return d;
                }
                if (num == null) {
                    Iterator it = list3.iterator();
                    num = (Integer) it.next();
                    it.remove();
                } else {
                    Integer num2 = null;
                    int i3 = Integer.MAX_VALUE;
                    for (Integer num3 : list3) {
                        int intValue = num3.intValue() - num.intValue();
                        if (intValue > 0 && intValue <= i3) {
                            i3 = intValue;
                            num2 = num3;
                        }
                    }
                    if (num2 == null) {
                        return d;
                    }
                    Integer num4 = num2;
                    i2 += num4.intValue() - num.intValue();
                    num = num4;
                }
                if (((List) linkedHashMap.get(str4)).isEmpty()) {
                    linkedHashMap.remove(str4);
                }
            }
            int i4 = 0 + (i2 - (size - 1));
            d = i4 <= 0 ? d + 1.0d : d + (0.5d * i4);
        }
        return d;
    }

    private Map<String, Integer> getCombinationsWithinWindow(Integer num, int i) {
        int intValue;
        int intValue2;
        int min = Math.min(num.intValue() + ((Parameters) this.parameters).getExaminationWindowLength(), this.numberOfWordsInDoc.intValue());
        HashMap hashMap = new HashMap();
        hashMap.put(this.position2ID.get(num).toString() + SEPARATOR, 0);
        boolean isNumber = NumberUtils.isNumber(this.ID2word.get(this.position2ID.get(num)));
        int maxDistanceBetweenKwds = ((Parameters) this.parameters).getMaxDistanceBetweenKwds() + 2;
        for (int intValue3 = num.intValue() + 1; intValue3 < min; intValue3++) {
            Integer num2 = this.position2ID.get(Integer.valueOf(intValue3));
            if (num2 != null && useThisWord(num2) && (!isNumber || !NumberUtils.isNumber(this.ID2word.get(num2)))) {
                String str = SEPARATOR + num2 + SEPARATOR;
                HashMap hashMap2 = new HashMap();
                for (Map.Entry entry : hashMap.entrySet()) {
                    String str2 = (String) entry.getKey();
                    int substr_count = PHPfunctions.substr_count(str2, SEPARATOR);
                    if (substr_count < i && ((intValue2 = (intValue = intValue3 - num.intValue()) - substr_count) <= 0 || (substr_count <= ((Parameters) this.parameters).getKeepFloatPointsUntilCombination() && 0.5d * intValue2 <= ((Parameters) this.parameters).getKeepFloatPointsAbove()))) {
                        Integer num3 = (Integer) entry.getValue();
                        if (intValue - num3.intValue() < maxDistanceBetweenKwds) {
                            String substring = str2.substring(0, str2.indexOf(SEPARATOR));
                            if (intValue3 <= num.intValue() + num3.intValue() + 1 || this.ID2occurrences.get(Integer.valueOf(substring)).doubleValue() != 1.0d) {
                                String str3 = SEPARATOR + str2;
                                if (!str3.contains(str)) {
                                    hashMap2.put(num2 + str3, Integer.valueOf(intValue));
                                }
                            }
                        }
                    }
                }
                for (Map.Entry entry2 : hashMap2.entrySet()) {
                    String str4 = (String) entry2.getKey();
                    if (!hashMap.containsKey(str4)) {
                        hashMap.put(str4, entry2.getValue());
                    }
                }
            }
        }
        return hashMap;
    }

    private boolean useThisWord(Integer num) {
        String str = this.ID2word.get(num);
        if (str == null) {
            return false;
        }
        if (((Parameters) this.parameters).getMinWordLength() <= 1 || str.length() >= ((Parameters) this.parameters).getMinWordLength() || NumberUtils.isNumber(str)) {
            return ((Parameters) this.parameters).getMinWordOccurrence() <= 1 || this.ID2occurrences.get(num).doubleValue() >= ((double) ((Parameters) this.parameters).getMinWordOccurrence());
        }
        return false;
    }

    private void buildInternalArrays(String str) {
        HashMap hashMap = new HashMap();
        List<String> list = ((Parameters) this.parameters).generateTokenizer().tokenize(str);
        int i = -1;
        this.numberOfWordsInDoc = Integer.valueOf(list.size());
        for (int i2 = 0; i2 < this.numberOfWordsInDoc.intValue(); i2++) {
            String str2 = list.get(i2);
            Integer num = (Integer) hashMap.get(str2);
            if (num == null) {
                i++;
                num = Integer.valueOf(i);
                hashMap.put(str2, num);
                this.ID2word.put(num, str2);
                this.ID2occurrences.put(num, Double.valueOf(0.0d));
            }
            this.ID2occurrences.put(num, Double.valueOf(this.ID2occurrences.get(num).doubleValue() + 1.0d));
            this.position2ID.put(Integer.valueOf(i2), num);
        }
    }
}
