package com.datumbox.framework.machinelearning.featureselection.scorebased;

import com.datumbox.common.dataobjects.Dataset;
import com.datumbox.common.dataobjects.TypeInference;
import com.datumbox.common.persistentstorage.interfaces.BigMap;
import com.datumbox.common.persistentstorage.interfaces.DatabaseConfiguration;
import com.datumbox.common.persistentstorage.interfaces.DatabaseConnector;
import com.datumbox.framework.machinelearning.common.bases.featureselection.ScoreBasedFeatureSelection;
import java.util.Iterator;
import java.util.Map;

/* loaded from: input_file:com/datumbox/framework/machinelearning/featureselection/scorebased/TFIDF.class */
public class TFIDF extends ScoreBasedFeatureSelection<ModelParameters, TrainingParameters> {

    /* loaded from: input_file:com/datumbox/framework/machinelearning/featureselection/scorebased/TFIDF$ModelParameters.class */
    public static class ModelParameters extends ScoreBasedFeatureSelection.ModelParameters {

        @BigMap
        private Map<Object, Double> maxTFIDFfeatureScores;

        protected ModelParameters(DatabaseConnector databaseConnector) {
            super(databaseConnector);
        }

        public Map<Object, Double> getMaxTFIDFfeatureScores() {
            return this.maxTFIDFfeatureScores;
        }

        protected void setMaxTFIDFfeatureScores(Map<Object, Double> map) {
            this.maxTFIDFfeatureScores = map;
        }
    }

    /* loaded from: input_file:com/datumbox/framework/machinelearning/featureselection/scorebased/TFIDF$TrainingParameters.class */
    public static class TrainingParameters extends ScoreBasedFeatureSelection.TrainingParameters {
        private boolean binarized = false;
        private Integer maxFeatures = null;

        public boolean isBinarized() {
            return this.binarized;
        }

        public void setBinarized(boolean z) {
            this.binarized = z;
        }

        public Integer getMaxFeatures() {
            return this.maxFeatures;
        }

        public void setMaxFeatures(Integer num) {
            this.maxFeatures = num;
        }
    }

    public TFIDF(String str, DatabaseConfiguration databaseConfiguration) {
        super(str, databaseConfiguration, ModelParameters.class, TrainingParameters.class);
    }

    @Override // com.datumbox.framework.machinelearning.common.bases.baseobjects.BaseTrainable
    protected void _fit(Dataset dataset) {
        Double d;
        ModelParameters modelParameters = (ModelParameters) this.knowledgeBase.getModelParameters();
        TrainingParameters trainingParameters = (TrainingParameters) this.knowledgeBase.getTrainingParameters();
        boolean isBinarized = trainingParameters.isBinarized();
        int intValue = modelParameters.getN().intValue();
        DatabaseConnector dbc = this.knowledgeBase.getDbc();
        Map bigMap = dbc.getBigMap("tmp_idf", true);
        Iterator<Integer> it = dataset.iterator();
        while (it.hasNext()) {
            for (Map.Entry<Object, Object> entry : dataset.get(it.next()).getX().entrySet()) {
                Object key = entry.getKey();
                Double d2 = TypeInference.toDouble(entry.getValue());
                if (d2 != null && d2.doubleValue() != 0.0d) {
                    Double d3 = (Double) bigMap.get(key);
                    if (d3 == null) {
                        d3 = Double.valueOf(0.0d);
                    }
                    bigMap.put(key, Double.valueOf(d3.doubleValue() + 1.0d));
                }
            }
        }
        for (Map.Entry entry2 : bigMap.entrySet()) {
            bigMap.put(entry2.getKey(), Double.valueOf(Math.log10(intValue / ((Double) entry2.getValue()).doubleValue())));
        }
        Map<Object, Double> maxTFIDFfeatureScores = modelParameters.getMaxTFIDFfeatureScores();
        Iterator<Integer> it2 = dataset.iterator();
        while (it2.hasNext()) {
            for (Map.Entry<Object, Object> entry3 : dataset.get(it2.next()).getX().entrySet()) {
                Object key2 = entry3.getKey();
                Double d4 = TypeInference.toDouble(entry3.getValue());
                if (d4 != null && d4.doubleValue() != 0.0d) {
                    if (isBinarized) {
                        d4 = Double.valueOf(1.0d);
                    }
                    double doubleValue = d4.doubleValue() * ((Double) bigMap.get(key2)).doubleValue();
                    if (doubleValue != 0.0d && ((d = maxTFIDFfeatureScores.get(key2)) == null || d.doubleValue() < doubleValue)) {
                        maxTFIDFfeatureScores.put(key2, Double.valueOf(doubleValue));
                    }
                }
            }
        }
        dbc.dropBigMap("tmp_idf", bigMap);
        Integer maxFeatures = trainingParameters.getMaxFeatures();
        if (maxFeatures == null || maxFeatures.intValue() >= maxTFIDFfeatureScores.size()) {
            return;
        }
        ScoreBasedFeatureSelection.selectHighScoreFeatures(maxTFIDFfeatureScores, maxFeatures);
    }

    @Override // com.datumbox.framework.machinelearning.common.bases.featureselection.FeatureSelection
    protected void filterFeatures(Dataset dataset) {
        DatabaseConnector dbc = this.knowledgeBase.getDbc();
        Map<Object, Double> maxTFIDFfeatureScores = ((ModelParameters) this.knowledgeBase.getModelParameters()).getMaxTFIDFfeatureScores();
        Map bigMap = dbc.getBigMap("tmp_removedColumns", true);
        for (Object obj : dataset.getXDataTypes().keySet()) {
            if (!maxTFIDFfeatureScores.containsKey(obj)) {
                bigMap.put(obj, true);
            }
        }
        dataset.removeColumns(bigMap.keySet());
        dbc.dropBigMap("tmp_removedColumns", bigMap);
    }
}
