package org.apache.parquet.tools.command;

import java.io.IOException;
import java.io.OutputStream;
import java.io.PrintWriter;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import org.apache.commons.cli.CommandLine;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.Path;
import org.apache.parquet.hadoop.ParquetFileWriter;
import org.apache.parquet.hadoop.metadata.FileMetaData;
import org.apache.parquet.hadoop.util.HadoopInputFile;
import org.apache.parquet.hadoop.util.HiddenFileFilter;
import org.apache.parquet.tools.Main;

/* loaded from: input_file:org/apache/parquet/tools/command/MergeCommand.class */
public class MergeCommand extends ArgsOnlyCommand {
    public static final String[] USAGE = {"<input> [<input> ...] <output>", "where <input> is the source parquet files/directory to be merged", "   <output> is the destination parquet file"};
    private static final int MAX_FILE_NUM = 100;
    private static final long TOO_SMALL_FILE_THRESHOLD = 67108864;
    private Configuration conf;

    public MergeCommand() {
        super(2, 101);
        this.conf = new Configuration();
    }

    @Override // org.apache.parquet.tools.command.Command
    public String[] getUsageDescription() {
        return USAGE;
    }

    @Override // org.apache.parquet.tools.command.Command
    public String getCommandDescription() {
        return "Merges multiple Parquet files into one. The command doesn't merge row groups, just places one after the other. When used to merge many small files, the resulting file will still contain small row groups, which usually leads to bad query performance.";
    }

    @Override // org.apache.parquet.tools.command.ArgsOnlyCommand, org.apache.parquet.tools.command.Command
    public void execute(CommandLine commandLine) throws Exception {
        List<String> argList = commandLine.getArgList();
        List<Path> inputFiles = getInputFiles(argList.subList(0, argList.size() - 1));
        Path path = new Path(argList.get(argList.size() - 1));
        FileMetaData mergedMetadata = mergedMetadata(inputFiles);
        PrintWriter printWriter = new PrintWriter((OutputStream) Main.out, true);
        ParquetFileWriter parquetFileWriter = new ParquetFileWriter(this.conf, mergedMetadata.getSchema(), path, ParquetFileWriter.Mode.CREATE);
        parquetFileWriter.start();
        boolean z = false;
        for (Path path2 : inputFiles) {
            if (path2.getFileSystem(this.conf).getFileStatus(path2).getLen() < 67108864) {
                printWriter.format("Warning: file %s is too small, length: %d\n", path2, Long.valueOf(path2.getFileSystem(this.conf).getFileStatus(path2).getLen()));
                z = true;
            }
            parquetFileWriter.appendFile(HadoopInputFile.fromPath(path2, this.conf));
        }
        if (z) {
            printWriter.println("Warning: you merged too small files. Although the size of the merged file is bigger, it STILL contains small row groups, thus you don't have the advantage of big row groups, which usually leads to bad query performance!");
        }
        parquetFileWriter.end(mergedMetadata.getKeyValueMetaData());
    }

    private FileMetaData mergedMetadata(List<Path> list) throws IOException {
        return ParquetFileWriter.mergeMetadataFiles(list, this.conf).getFileMetaData();
    }

    private List<Path> getInputFiles(List<String> list) throws IOException {
        List<Path> list2 = null;
        if (list.size() == 1) {
            Path path = new Path(list.get(0));
            FileStatus fileStatus = path.getFileSystem(this.conf).getFileStatus(path);
            if (fileStatus.isDir()) {
                list2 = getInputFilesFromDirectory(fileStatus);
            }
        } else {
            list2 = parseInputFiles(list);
        }
        checkParquetFiles(list2);
        return list2;
    }

    private void checkParquetFiles(List<Path> list) throws IOException {
        if (list == null || list.size() <= 1) {
            throw new IllegalArgumentException("Not enough files to merge");
        }
        for (Path path : list) {
            if (path.getFileSystem(this.conf).getFileStatus(path).isDir()) {
                throw new IllegalArgumentException("Illegal parquet file: " + path.toUri());
            }
        }
    }

    private List<Path> getInputFilesFromDirectory(FileStatus fileStatus) throws IOException {
        FileStatus[] listStatus = fileStatus.getPath().getFileSystem(this.conf).listStatus(fileStatus.getPath(), HiddenFileFilter.INSTANCE);
        ArrayList arrayList = new ArrayList();
        for (FileStatus fileStatus2 : listStatus) {
            arrayList.add(fileStatus2.getPath());
        }
        return arrayList;
    }

    private List<Path> parseInputFiles(List<String> list) {
        ArrayList arrayList = new ArrayList();
        Iterator<String> it = list.iterator();
        while (it.hasNext()) {
            arrayList.add(new Path(it.next()));
        }
        return arrayList;
    }
}
