package com.marklogic.contentpump;

import com.marklogic.mapreduce.DocumentURIWithSourceInfo;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.mapreduce.InputSplit;
import org.apache.hadoop.mapreduce.JobContext;
import org.apache.hadoop.mapreduce.RecordReader;
import org.apache.hadoop.mapreduce.TaskAttemptContext;
import org.apache.hadoop.mapreduce.lib.input.FileSplit;

/* loaded from: input_file:com/marklogic/contentpump/CombineDocumentInputFormat.class */
public class CombineDocumentInputFormat<VALUE> extends FileAndDirectoryInputFormat<DocumentURIWithSourceInfo, VALUE> {
    public static final Log LOG = LogFactory.getLog(CombineDocumentInputFormat.class);

    @Override // com.marklogic.contentpump.FileAndDirectoryInputFormat
    protected boolean isSplitable(JobContext jobContext, Path path) {
        return false;
    }

    public RecordReader<DocumentURIWithSourceInfo, VALUE> createRecordReader(InputSplit inputSplit, TaskAttemptContext taskAttemptContext) throws IOException, InterruptedException {
        return new CombineDocumentReader();
    }

    @Override // com.marklogic.contentpump.FileAndDirectoryInputFormat
    public List<InputSplit> getSplits(JobContext jobContext) throws IOException {
        long max = Math.max(getFormatMinSplitSize(), getMinSplitSize(jobContext));
        long maxSplitSize = getMaxSplitSize(jobContext);
        List<InputSplit> splits = super.getSplits(jobContext);
        ArrayList arrayList = new ArrayList();
        CombineDocumentSplit combineDocumentSplit = null;
        Iterator<InputSplit> it = splits.iterator();
        while (it.hasNext()) {
            FileSplit fileSplit = (InputSplit) it.next();
            Path path = fileSplit.getPath();
            FileStatus fileStatus = path.getFileSystem(jobContext.getConfiguration()).getFileStatus(path);
            long len = fileStatus.getLen();
            long computeSplitSize = computeSplitSize(fileStatus.getBlockSize(), max, maxSplitSize);
            if (len != 0) {
                if (combineDocumentSplit == null) {
                    combineDocumentSplit = new CombineDocumentSplit();
                }
                try {
                    if (combineDocumentSplit.getLength() + len < computeSplitSize || combineDocumentSplit.getLength() < max) {
                        combineDocumentSplit.addSplit(fileSplit);
                    } else {
                        arrayList.add(combineDocumentSplit);
                        combineDocumentSplit = new CombineDocumentSplit();
                        combineDocumentSplit.addSplit(fileSplit);
                    }
                } catch (InterruptedException e) {
                    LOG.error(e);
                    throw new RuntimeException(e);
                }
            }
        }
        if (combineDocumentSplit != null) {
            arrayList.add(combineDocumentSplit);
        }
        if (LOG.isDebugEnabled()) {
            LOG.debug("Total # of splits: " + splits.size());
            LOG.debug("Total # of combined splits: " + arrayList.size());
        }
        return arrayList;
    }
}
