package com.marklogic.contentpump;

import com.marklogic.contentpump.utilities.CSVParserFormatter;
import com.marklogic.contentpump.utilities.FileIterator;
import com.marklogic.contentpump.utilities.IdGenerator;
import com.marklogic.mapreduce.CompressionCodec;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.util.zip.GZIPInputStream;
import java.util.zip.ZipEntry;
import java.util.zip.ZipInputStream;
import org.apache.commons.csv.CSVParser;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.InputSplit;
import org.apache.hadoop.mapreduce.TaskAttemptContext;
import org.apache.hadoop.mapreduce.lib.input.FileSplit;

/* loaded from: input_file:com/marklogic/contentpump/CompressedDelimitedTextReader.class */
public class CompressedDelimitedTextReader extends DelimitedTextReader<Text> {
    public static final Log LOG = LogFactory.getLog(CompressedDelimitedTextReader.class);
    private byte[] buf = new byte[65536];
    private InputStream zipIn;
    private ZipEntry currZipEntry;
    private CompressionCodec codec;

    public CompressedDelimitedTextReader() {
        this.compressed = true;
    }

    @Override // com.marklogic.contentpump.DelimitedTextReader, com.marklogic.contentpump.ImportRecordReader
    public void initialize(InputSplit inputSplit, TaskAttemptContext taskAttemptContext) throws IOException, InterruptedException {
        initConfig(taskAttemptContext);
        initDocType();
        initDelimConf();
        setFile(((FileSplit) inputSplit).getPath());
        this.fs = this.file.getFileSystem(taskAttemptContext.getConfiguration());
        if (this.fs.getFileStatus(this.file).isDirectory()) {
            this.iterator = new FileIterator((FileSplit) inputSplit, taskAttemptContext);
            inputSplit = (InputSplit) this.iterator.next();
        }
        initStream(inputSplit);
    }

    protected void initStream(InputSplit inputSplit) throws IOException {
        FSDataInputStream openFile = openFile(inputSplit, false);
        if (openFile == null) {
            return;
        }
        String str = this.conf.get(ConfigConstants.CONF_INPUT_COMPRESSION_CODEC, CompressionCodec.ZIP.toString());
        if (str.equalsIgnoreCase(CompressionCodec.ZIP.toString())) {
            this.zipIn = new ZipInputStream(openFile);
            this.codec = CompressionCodec.ZIP;
        } else {
            if (!str.equalsIgnoreCase(CompressionCodec.GZIP.toString())) {
                throw new UnsupportedOperationException("Unsupported codec: " + str);
            }
            this.zipIn = new GZIPInputStream(openFile);
            this.codec = CompressionCodec.GZIP;
        }
        if (this.uriName == null) {
            this.generateId = this.conf.getBoolean(ConfigConstants.CONF_INPUT_GENERATE_URI, false);
            if (this.generateId) {
                this.idGen = new IdGenerator(this.file.toUri().getPath() + "-" + ((FileSplit) inputSplit).getStart());
            } else {
                this.uriId = 0;
            }
        }
    }

    @Override // com.marklogic.contentpump.DelimitedTextReader, com.marklogic.contentpump.ImportRecordReader
    public boolean nextKeyValue() throws IOException, InterruptedException {
        if (this.zipIn == null) {
            this.hasNext = false;
            return false;
        }
        if (this.instream == null) {
            if (this.codec.equals(CompressionCodec.ZIP)) {
                return nextKeyValueInZip();
            }
            if (!this.codec.equals(CompressionCodec.GZIP)) {
                throw new UnsupportedOperationException("Unsupported codec: " + this.codec.name());
            }
            this.instream = new InputStreamReader(this.zipIn, this.encoding);
            this.parser = new CSVParser(this.instream, CSVParserFormatter.getFormat(this.delimiter, '\"', true, true));
            this.parserIterator = this.parser.iterator();
            return super.nextKeyValue();
        }
        if (this.codec.equals(CompressionCodec.ZIP)) {
            if (super.nextKeyValue()) {
                return true;
            }
            return nextKeyValueInZip();
        }
        if (!this.codec.equals(CompressionCodec.GZIP)) {
            throw new UnsupportedOperationException("Unsupported codec: " + this.codec.name());
        }
        if (super.nextKeyValue()) {
            return true;
        }
        if (this.iterator == null || !this.iterator.hasNext()) {
            return false;
        }
        close();
        initStream((InputSplit) this.iterator.next());
        if (this.encoding == null) {
            this.instream = new InputStreamReader(this.zipIn);
        } else {
            this.instream = new InputStreamReader(this.zipIn, this.encoding);
        }
        this.parser = new CSVParser(this.instream, CSVParserFormatter.getFormat(this.delimiter, '\"', true, true));
        this.parserIterator = this.parser.iterator();
        return super.nextKeyValue();
    }

    private boolean nextKeyValueInZip() throws IOException, InterruptedException {
        ZipInputStream zipInputStream = (ZipInputStream) this.zipIn;
        while (true) {
            this.currZipEntry = zipInputStream.getNextEntry();
            if (this.currZipEntry == null) {
                if (this.iterator == null || !this.iterator.hasNext()) {
                    this.hasNext = false;
                    return false;
                }
                close();
                initStream((InputSplit) this.iterator.next());
                return nextKeyValueInZip();
            }
            if (LOG.isDebugEnabled()) {
                LOG.debug("ZipEntry: " + this.currZipEntry.getName());
            }
            if (this.currZipEntry.getSize() != 0) {
                this.subId = this.currZipEntry.getName();
                if (this.encoding == null) {
                    this.instream = new InputStreamReader(zipInputStream);
                } else {
                    this.instream = new InputStreamReader(zipInputStream, this.encoding);
                }
                this.parser = new CSVParser(this.instream, CSVParserFormatter.getFormat(this.delimiter, '\"', true, true));
                this.parserIterator = this.parser.iterator();
                this.fields = null;
                if (super.nextKeyValue()) {
                    return true;
                }
            }
        }
    }

    @Override // com.marklogic.contentpump.DelimitedTextReader, com.marklogic.contentpump.ImportRecordReader
    public void close() throws IOException {
        super.close();
        if (this.zipIn != null) {
            this.zipIn.close();
        }
    }

    @Override // com.marklogic.contentpump.DelimitedTextReader, com.marklogic.contentpump.ImportRecordReader
    public float getProgress() throws IOException, InterruptedException {
        return this.hasNext ? 0.0f : 1.0f;
    }
}
