package com.marklogic.contentpump;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.BlockLocation;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.PathFilter;
import org.apache.hadoop.hdfs.DistributedFileSystem;
import org.apache.hadoop.mapreduce.InputSplit;
import org.apache.hadoop.mapreduce.JobContext;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.FileSplit;
import org.apache.hadoop.mapreduce.lib.input.InvalidInputException;
import org.apache.hadoop.mapreduce.security.TokenCache;

/* loaded from: input_file:com/marklogic/contentpump/FileAndDirectoryInputFormat.class */
public abstract class FileAndDirectoryInputFormat<K, V> extends FileInputFormat<K, V> {
    private static final double SPLIT_SLOP = 1.1d;
    public static final Log LOG = LogFactory.getLog(FileAndDirectoryInputFormat.class);
    protected static int SPLIT_COUNT_LIMIT = 1000000;
    public static final PathFilter hiddenFileFilter = new PathFilter() { // from class: com.marklogic.contentpump.FileAndDirectoryInputFormat.1
        public boolean accept(Path path) {
            String name = path.getName();
            return (name.startsWith("_") || name.startsWith(".")) ? false : true;
        }
    };

    /* loaded from: input_file:com/marklogic/contentpump/FileAndDirectoryInputFormat$MultiPathFilter.class */
    public static class MultiPathFilter implements PathFilter {
        private List<PathFilter> filters;

        public MultiPathFilter(List<PathFilter> list) {
            this.filters = list;
        }

        public boolean accept(Path path) {
            Iterator<PathFilter> it = this.filters.iterator();
            while (it.hasNext()) {
                if (!it.next().accept(path)) {
                    return false;
                }
            }
            return true;
        }
    }

    protected boolean isSplitable(JobContext jobContext, Path path) {
        Configuration configuration = jobContext.getConfiguration();
        return configuration.getBoolean(ConfigConstants.CONF_SPLIT_INPUT, false) && !configuration.getBoolean(ConfigConstants.INPUT_COMPRESSED, false);
    }

    public List<InputSplit> getSplits(JobContext jobContext) throws IOException {
        ArrayList arrayList = new ArrayList();
        Configuration configuration = jobContext.getConfiguration();
        try {
            List<FileStatus> listStatus = listStatus(jobContext);
            long max = Math.max(getFormatMinSplitSize(), getMinSplitSize(jobContext));
            long maxSplitSize = getMaxSplitSize(jobContext);
            for (FileStatus fileStatus : listStatus) {
                Path path = fileStatus.getPath();
                FileSystem fileSystem = path.getFileSystem(configuration);
                long len = fileStatus.getLen();
                BlockLocation[] blockLocationArr = null;
                if (!fileStatus.isDirectory() || !(fileSystem instanceof DistributedFileSystem)) {
                    blockLocationArr = fileSystem.getFileBlockLocations(fileStatus, 0L, len);
                } else if (len != 0) {
                    throw new IOException("non-zero length directory on HDFS:" + path.toUri().toString());
                }
                if (len != 0 && isSplitable(jobContext, path)) {
                    long computeSplitSize = computeSplitSize(fileStatus.getBlockSize(), max, maxSplitSize);
                    long j = len;
                    while (j / computeSplitSize > SPLIT_SLOP) {
                        arrayList.add(new FileSplit(path, len - j, computeSplitSize, blockLocationArr[getBlockIndex(blockLocationArr, len - j)].getHosts()));
                        j -= computeSplitSize;
                    }
                    if (j != 0) {
                        arrayList.add(new FileSplit(path, len - j, j, blockLocationArr[blockLocationArr.length - 1].getHosts()));
                    }
                } else if (len != 0) {
                    arrayList.add(new FileSplit(path, 0L, len, blockLocationArr[0].getHosts()));
                } else {
                    arrayList.add(new FileSplit(path, 0L, len, new String[0]));
                }
            }
            PathFilter inputPathFilter = getInputPathFilter(jobContext);
            ArrayList arrayList2 = new ArrayList();
            arrayList2.add(hiddenFileFilter);
            if (inputPathFilter != null) {
                arrayList2.add(inputPathFilter);
            }
            MultiPathFilter multiPathFilter = new MultiPathFilter(arrayList2);
            int i = 0;
            while (i < arrayList.size() && arrayList.size() < SPLIT_COUNT_LIMIT) {
                Path path2 = ((FileSplit) arrayList.get(i)).getPath();
                FileSystem fileSystem2 = path2.getFileSystem(configuration);
                if (fileSystem2.getFileStatus(path2).isDirectory()) {
                    FileStatus[] listStatus2 = fileSystem2.listStatus(path2, multiPathFilter);
                    if (listStatus2.length + i < SPLIT_COUNT_LIMIT) {
                        arrayList.remove(i);
                        for (FileStatus fileStatus2 : listStatus2) {
                            arrayList.add(new FileSplit(fileStatus2.getPath(), 0L, fileStatus2.getLen(), (String[]) null));
                        }
                    } else {
                        i++;
                    }
                } else {
                    i++;
                }
            }
            return arrayList;
        } catch (InvalidInputException e) {
            throw new IOException("No input files found with the specified input path " + configuration.get(ConfigConstants.CONF_INPUT_DIRECTORY) + " and input file pattern " + configuration.get(ConfigConstants.CONF_INPUT_FILE_PATTERN, ".*"), e);
        }
    }

    protected List<FileStatus> listStatus(JobContext jobContext) throws IOException {
        Path[] inputPaths = getInputPaths(jobContext);
        if (inputPaths.length == 0) {
            throw new IOException("No input paths specified in job");
        }
        TokenCache.obtainTokensForNamenodes(jobContext.getCredentials(), inputPaths, jobContext.getConfiguration());
        boolean inputDirRecursive = getInputDirRecursive(jobContext);
        ArrayList arrayList = new ArrayList();
        arrayList.add(hiddenFileFilter);
        PathFilter inputPathFilter = getInputPathFilter(jobContext);
        if (inputPathFilter != null) {
            arrayList.add(inputPathFilter);
        }
        List<FileStatus> simpleListStatus = simpleListStatus(jobContext, inputPaths, new MultiPathFilter(arrayList), inputDirRecursive);
        LOG.info("Total input paths to process : " + simpleListStatus.size());
        return simpleListStatus;
    }

    private List<FileStatus> simpleListStatus(JobContext jobContext, Path[] pathArr, PathFilter pathFilter, boolean z) throws IOException {
        ArrayList arrayList = new ArrayList();
        ArrayList arrayList2 = new ArrayList();
        Configuration configuration = jobContext.getConfiguration();
        for (Path path : pathArr) {
            FileSystem fileSystem = path.getFileSystem(configuration);
            try {
                FileStatus[] globStatus = fileSystem.globStatus(path, pathFilter);
                if (globStatus == null) {
                    arrayList2.add(new IOException("Input path does not exist: " + path));
                } else if (globStatus.length == 0) {
                    arrayList2.add(new IOException("Input Pattern " + path + " matches 0 files"));
                } else {
                    for (FileStatus fileStatus : globStatus) {
                        if (fileStatus.isDirectory()) {
                            for (FileStatus fileStatus2 : fileSystem.listStatus(fileStatus.getPath(), pathFilter)) {
                                if (z && fileStatus2.isDirectory()) {
                                    simpleAddInputPathRecursively(arrayList, fileSystem, fileStatus2.getPath(), pathFilter);
                                } else {
                                    arrayList.add(fileStatus2);
                                }
                            }
                        } else {
                            arrayList.add(fileStatus);
                        }
                    }
                }
            } catch (IllegalArgumentException e) {
                arrayList2.add(new IOException(e.getMessage()));
            }
        }
        if (arrayList2.isEmpty()) {
            return arrayList;
        }
        throw new InvalidInputException(arrayList2);
    }

    protected void simpleAddInputPathRecursively(List<FileStatus> list, FileSystem fileSystem, Path path, PathFilter pathFilter) throws IOException {
        for (FileStatus fileStatus : fileSystem.listStatus(path, pathFilter)) {
            if (fileStatus.isDirectory()) {
                simpleAddInputPathRecursively(list, fileSystem, fileStatus.getPath(), pathFilter);
            } else {
                list.add(fileStatus);
            }
        }
    }
}
