package com.datumbox.common.dataobjects;

import com.datumbox.common.dataobjects.TypeInference;
import com.datumbox.common.persistentstorage.interfaces.DatabaseConfiguration;
import com.datumbox.common.persistentstorage.interfaces.DatabaseConnector;
import com.datumbox.framework.utilities.text.cleaners.StringCleaner;
import com.datumbox.framework.utilities.text.extractors.TextExtractor;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.Reader;
import java.io.Serializable;
import java.net.URI;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;
import java.util.Set;
import org.apache.commons.csv.CSVFormat;
import org.apache.commons.csv.CSVParser;
import org.apache.commons.csv.CSVRecord;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:com/datumbox/common/dataobjects/Dataset.class */
public final class Dataset implements Serializable, Iterable<Integer> {
    public static final String yColumnName = "~Y";
    public static final String constantColumnName = "~CONSTANT";
    private Map<Integer, Record> recordList;
    private TypeInference.DataType yDataType;
    private Map<Object, TypeInference.DataType> xDataTypes;
    private String dbName;
    private transient DatabaseConnector dbc;
    private transient DatabaseConfiguration dbConf;

    /* loaded from: input_file:com/datumbox/common/dataobjects/Dataset$Builder.class */
    public static final class Builder {
        public static Dataset parseTextFiles(Map<Object, URI> map, TextExtractor textExtractor, DatabaseConfiguration databaseConfiguration) {
            Dataset dataset = new Dataset(databaseConfiguration);
            Logger logger = LoggerFactory.getLogger(Builder.class);
            for (Map.Entry<Object, URI> entry : map.entrySet()) {
                Object key = entry.getKey();
                URI value = entry.getValue();
                logger.info("Dataset Parsing {} class", key);
                try {
                    BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(new FileInputStream(new File(value)), "UTF8"));
                    Throwable th = null;
                    while (true) {
                        try {
                            try {
                                String readLine = bufferedReader.readLine();
                                if (readLine == null) {
                                    break;
                                }
                                dataset.add(new Record(new AssociativeArray(textExtractor.extract(StringCleaner.clear(readLine))), key));
                            } finally {
                            }
                        } finally {
                        }
                    }
                    if (bufferedReader != null) {
                        if (0 != 0) {
                            try {
                                bufferedReader.close();
                            } catch (Throwable th2) {
                                th.addSuppressed(th2);
                            }
                        } else {
                            bufferedReader.close();
                        }
                    }
                } catch (IOException e) {
                    dataset.erase();
                    throw new RuntimeException(e);
                }
            }
            return dataset;
        }

        public static Dataset parseCSVFile(Reader reader, String str, Map<String, TypeInference.DataType> map, char c, char c2, String str2, DatabaseConfiguration databaseConfiguration) {
            Logger logger = LoggerFactory.getLogger(Builder.class);
            logger.info("Parsing CSV file");
            if (!map.containsKey(str)) {
                logger.warn("WARNING: The file is missing the response variable column {}.", str);
            }
            TypeInference.DataType dataType = map.get(str);
            HashMap hashMap = new HashMap(map);
            hashMap.remove(str);
            Dataset dataset = new Dataset(databaseConfiguration, dataType, hashMap);
            try {
                CSVParser cSVParser = new CSVParser(reader, CSVFormat.RFC4180.withHeader(new String[0]).withDelimiter(c).withQuote(c2).withRecordSeparator(str2));
                Throwable th = null;
                try {
                    try {
                        Iterator it = cSVParser.iterator();
                        while (it.hasNext()) {
                            CSVRecord cSVRecord = (CSVRecord) it.next();
                            if (cSVRecord.isConsistent()) {
                                Object obj = null;
                                AssociativeArray associativeArray = new AssociativeArray();
                                for (Map.Entry<String, TypeInference.DataType> entry : map.entrySet()) {
                                    String key = entry.getKey();
                                    Object parse = TypeInference.DataType.parse(cSVRecord.get(key), entry.getValue());
                                    if (str == null || !str.equals(key)) {
                                        associativeArray.put(key, parse);
                                    } else {
                                        obj = parse;
                                    }
                                }
                                dataset._add(new Record(associativeArray, obj));
                            } else {
                                logger.warn("WARNING: Skipping row {} because its size does not match the header size.", Long.valueOf(cSVRecord.getRecordNumber()));
                            }
                        }
                        if (cSVParser != null) {
                            if (0 != 0) {
                                try {
                                    cSVParser.close();
                                } catch (Throwable th2) {
                                    th.addSuppressed(th2);
                                }
                            } else {
                                cSVParser.close();
                            }
                        }
                        return dataset;
                    } finally {
                    }
                } finally {
                }
            } catch (IOException e) {
                dataset.erase();
                throw new RuntimeException(e);
            }
        }
    }

    public Dataset(DatabaseConfiguration databaseConfiguration) {
        this.dbName = "dts";
        this.dbConf = databaseConfiguration;
        this.dbc = this.dbConf.getConnector(this.dbName);
        this.recordList = this.dbc.getBigMap("tmp_recordList", true);
        this.yDataType = null;
        this.xDataTypes = this.dbc.getBigMap("tmp_xColumnTypes", true);
    }

    private Dataset(DatabaseConfiguration databaseConfiguration, TypeInference.DataType dataType, Map<String, TypeInference.DataType> map) {
        this(databaseConfiguration);
        this.yDataType = dataType;
        this.xDataTypes.putAll(map);
    }

    public TypeInference.DataType getYDataType() {
        return this.yDataType;
    }

    public Map<Object, TypeInference.DataType> getXDataTypes() {
        return Collections.unmodifiableMap(this.xDataTypes);
    }

    public int getVariableNumber() {
        return this.xDataTypes.size();
    }

    public int getRecordNumber() {
        return this.recordList.size();
    }

    public boolean isEmpty() {
        return this.recordList.isEmpty();
    }

    public FlatDataList extractXColumnValues(Object obj) {
        FlatDataList flatDataList = new FlatDataList();
        Iterator<Integer> it = iterator();
        while (it.hasNext()) {
            flatDataList.add(this.recordList.get(it.next()).getX().get(obj));
        }
        return flatDataList;
    }

    public FlatDataList extractYValues() {
        FlatDataList flatDataList = new FlatDataList();
        Iterator<Integer> it = iterator();
        while (it.hasNext()) {
            flatDataList.add(this.recordList.get(it.next()).getY());
        }
        return flatDataList;
    }

    public TransposeDataList extractXColumnValuesByY(Object obj) {
        TransposeDataList transposeDataList = new TransposeDataList();
        Iterator<Integer> it = iterator();
        while (it.hasNext()) {
            Record record = this.recordList.get(it.next());
            if (!transposeDataList.containsKey(record.getY())) {
                transposeDataList.put(record.getY(), new FlatDataList(new ArrayList()));
            }
            transposeDataList.get(record.getY()).add(record.getX().get(obj));
        }
        return transposeDataList;
    }

    public Dataset generateNewSubset(FlatDataList flatDataList) {
        Dataset dataset = new Dataset(this.dbConf);
        Iterator<Object> it = flatDataList.iterator();
        while (it.hasNext()) {
            dataset.add(this.recordList.get((Integer) it.next()));
        }
        return dataset;
    }

    public Dataset copy() {
        Dataset dataset = new Dataset(this.dbConf);
        Iterator<Integer> it = iterator();
        while (it.hasNext()) {
            dataset.add(this.recordList.get(it.next()));
        }
        return dataset;
    }

    public Record get(Integer num) {
        return this.recordList.get(num);
    }

    public void removeColumns(Set<Object> set) {
        set.retainAll(this.xDataTypes.keySet());
        if (set.isEmpty()) {
            return;
        }
        this.xDataTypes.keySet().removeAll(set);
        Iterator<Integer> it = iterator();
        while (it.hasNext()) {
            Integer next = it.next();
            Record record = this.recordList.get(next);
            AssociativeArray copy = record.getX().copy();
            int size = copy.size();
            copy.keySet().removeAll(set);
            if (copy.size() != size) {
                this.recordList.put(next, new Record(copy, record.getY(), record.getYPredicted(), record.getYPredictedProbabilities()));
            }
        }
    }

    private void updateMeta(Record record) {
        for (Map.Entry<Object, Object> entry : record.getX().entrySet()) {
            Object key = entry.getKey();
            Object value = entry.getValue();
            if (!this.xDataTypes.containsKey(key)) {
                this.xDataTypes.put(key, TypeInference.getDataType(value));
            }
        }
        if (this.yDataType == null) {
            this.yDataType = TypeInference.getDataType(record.getY());
        }
    }

    public void recalculateMeta() {
        this.yDataType = null;
        this.xDataTypes.clear();
        Iterator<Integer> it = iterator();
        while (it.hasNext()) {
            updateMeta(this.recordList.get(it.next()));
        }
    }

    public Integer add(Record record) {
        Integer _add = _add(record);
        updateMeta(record);
        return _add;
    }

    /* JADX INFO: Access modifiers changed from: private */
    public Integer _add(Record record) {
        Integer valueOf = Integer.valueOf(this.recordList.size());
        this.recordList.put(valueOf, record);
        return valueOf;
    }

    public Integer set(Integer num, Record record) {
        _set(num, record);
        updateMeta(record);
        return num;
    }

    public void _set(Integer num, Record record) {
        if (!this.recordList.containsKey(num)) {
            throw new IndexOutOfBoundsException();
        }
        this.recordList.put(num, record);
    }

    public void erase() {
        this.dbc.dropBigMap("tmp_xColumnTypes", this.xDataTypes);
        this.dbc.dropBigMap("tmp_recordList", this.recordList);
        this.dbc.dropDatabase();
        this.dbc.close();
        this.yDataType = null;
        this.xDataTypes = null;
        this.recordList = null;
    }

    @Override // java.lang.Iterable
    public Iterator<Integer> iterator() {
        return new Iterator<Integer>() { // from class: com.datumbox.common.dataobjects.Dataset.1
            private Integer counter = 0;
            private final int n;

            {
                this.n = Dataset.this.recordList.size();
            }

            @Override // java.util.Iterator
            public boolean hasNext() {
                return this.counter.intValue() < this.n;
            }

            /* JADX WARN: Can't rename method to resolve collision */
            @Override // java.util.Iterator
            public Integer next() {
                Integer num = this.counter;
                this.counter = Integer.valueOf(this.counter.intValue() + 1);
                return num;
            }

            @Override // java.util.Iterator
            public void remove() {
                throw new UnsupportedOperationException();
            }
        };
    }
}
