/*
 * Decompiled with CFR 0.152.
 */
package com.mongodb.hadoop.splitter;

import com.mongodb.AggregationOutput;
import com.mongodb.BasicDBObject;
import com.mongodb.BasicDBObjectBuilder;
import com.mongodb.CommandResult;
import com.mongodb.DBCollection;
import com.mongodb.DBObject;
import com.mongodb.MongoException;
import com.mongodb.hadoop.input.MongoInputSplit;
import com.mongodb.hadoop.splitter.MongoCollectionSplitter;
import com.mongodb.hadoop.splitter.SplitFailedException;
import com.mongodb.hadoop.util.MongoConfigUtil;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.mapreduce.InputSplit;

public class SampleSplitter
extends MongoCollectionSplitter {
    public static final String SAMPLES_PER_SPLIT = "mongo.input.splits.samples_per_split";
    public static final int DEFAULT_SAMPLES_PER_SPLIT = 10;
    private static final Log LOG = LogFactory.getLog(SampleSplitter.class);

    public SampleSplitter() {
    }

    public SampleSplitter(Configuration conf) {
        super(conf);
    }

    @Override
    public List<InputSplit> calculateSplits() throws SplitFailedException {
        AggregationOutput aggregationOutput;
        Configuration conf = this.getConfiguration();
        long splitSizeMB = MongoConfigUtil.getSplitSize(conf);
        long samplesPerSplit = MongoConfigUtil.getSamplesPerSplit(conf);
        DBObject splitKey = MongoConfigUtil.getInputSplitKey(conf);
        DBCollection inputCollection = MongoConfigUtil.getInputCollection(conf);
        CommandResult result = inputCollection.getDB().command(new BasicDBObject("collstats", inputCollection.getName()));
        if (!result.ok()) {
            throw new SplitFailedException("Could not execute command 'collstats': " + result.getErrorMessage());
        }
        int count = result.getInt("count");
        int avgObjSize = result.getInt("avgObjSize");
        int numDocsPerSplit = (int)Math.floor(splitSizeMB * 1024L * 1024L / (long)avgObjSize);
        int numSplits = (int)Math.ceil((double)count / (double)numDocsPerSplit);
        int totalSamples = (int)Math.floor(samplesPerSplit * (long)numSplits);
        if (count < numDocsPerSplit) {
            LOG.warn((Object)"Not enough documents for more than one split! Consider setting mongo.input.split_size to a lower value.");
            MongoInputSplit split = this.createSplitFromBounds(null, null);
            return Collections.singletonList(split);
        }
        DBObject[] pipeline = new DBObject[]{new BasicDBObjectBuilder().push("$sample").add("size", totalSamples).get(), new BasicDBObject("$project", splitKey), new BasicDBObject("$sort", splitKey)};
        try {
            aggregationOutput = inputCollection.aggregate(Arrays.asList(pipeline));
        }
        catch (MongoException e) {
            throw new SplitFailedException("Failed to aggregate sample documents. Note that this Splitter implementation is incompatible with MongoDB versions prior to 3.2.", e);
        }
        BasicDBObject previousKey = null;
        ArrayList<InputSplit> splits = new ArrayList<InputSplit>(numSplits);
        int i = 0;
        for (DBObject sample : aggregationOutput.results()) {
            if ((long)i++ % samplesPerSplit != 0L) continue;
            BasicDBObject bdbo = (BasicDBObject)sample;
            splits.add(this.createSplitFromBounds(previousKey, bdbo));
            previousKey = bdbo;
        }
        splits.add(this.createSplitFromBounds(previousKey, null));
        if (MongoConfigUtil.isFilterEmptySplitsEnabled(conf)) {
            return SampleSplitter.filterEmptySplits(splits);
        }
        return splits;
    }
}

