package com.marklogic.contentpump;

import com.marklogic.contentpump.utilities.CSVParserFormatter;
import com.marklogic.contentpump.utilities.DocBuilder;
import com.marklogic.contentpump.utilities.EncodingUtil;
import com.marklogic.contentpump.utilities.FileIterator;
import com.marklogic.contentpump.utilities.IdGenerator;
import com.marklogic.contentpump.utilities.JSONDocBuilder;
import com.marklogic.contentpump.utilities.XMLDocBuilder;
import com.marklogic.mapreduce.MarkLogicConstants;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.nio.charset.Charset;
import java.util.Iterator;
import org.apache.commons.csv.CSVParser;
import org.apache.commons.csv.CSVRecord;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.InputSplit;
import org.apache.hadoop.mapreduce.TaskAttemptContext;
import org.apache.hadoop.mapreduce.lib.input.FileSplit;

/* loaded from: input_file:com/marklogic/contentpump/DelimitedTextReader.class */
public class DelimitedTextReader<VALUEIN> extends ImportRecordReader<VALUEIN> {
    public static final Log LOG = LogFactory.getLog(DelimitedTextReader.class);
    public static final char encapsulator = '\"';
    protected String[] fields;
    protected char delimiter;
    protected CSVParser parser;
    protected InputStreamReader instream;
    protected FSDataInputStream fileIn;
    protected String uriName;
    protected long bytesRead;
    protected boolean generateId;
    protected IdGenerator idGen;
    protected boolean compressed;
    protected DocBuilder docBuilder;
    protected Iterator<CSVRecord> parserIterator;
    protected boolean hasNext = true;
    protected long fileLen = Long.MAX_VALUE;
    protected int uriId = -1;
    private int prevLineNumber = 1;

    @Override // com.marklogic.contentpump.ImportRecordReader
    public void close() throws IOException {
        if (this.instream != null) {
            this.instream.close();
        }
    }

    @Override // com.marklogic.contentpump.ImportRecordReader
    public float getProgress() throws IOException, InterruptedException {
        return (float) (this.bytesRead / this.fileLen);
    }

    @Override // com.marklogic.contentpump.ImportRecordReader
    public void initialize(InputSplit inputSplit, TaskAttemptContext taskAttemptContext) throws IOException, InterruptedException {
        initConfig(taskAttemptContext);
        initDocType();
        initDelimConf();
        setFile(((FileSplit) inputSplit).getPath());
        this.fs = this.file.getFileSystem(taskAttemptContext.getConfiguration());
        if (this.fs.getFileStatus(this.file).isDirectory()) {
            this.iterator = new FileIterator((FileSplit) inputSplit, taskAttemptContext);
            inputSplit = (InputSplit) this.iterator.next();
        }
        initParser(inputSplit);
    }

    protected void initParser(InputSplit inputSplit) throws IOException, InterruptedException {
        this.fileIn = openFile(inputSplit, true);
        if (this.fileIn == null) {
            return;
        }
        this.instream = new InputStreamReader((InputStream) this.fileIn, this.encoding);
        this.bytesRead = 0L;
        this.fileLen = inputSplit.getLength();
        if (this.uriName == null) {
            this.generateId = this.conf.getBoolean(ConfigConstants.CONF_INPUT_GENERATE_URI, false);
            if (this.generateId) {
                this.idGen = new IdGenerator(this.file.toUri().getPath() + "-" + ((FileSplit) inputSplit).getStart());
            } else {
                this.uriId = 0;
            }
        }
        this.parser = new CSVParser(this.instream, CSVParserFormatter.getFormat(this.delimiter, '\"', true, true));
        this.parserIterator = this.parser.iterator();
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public void initDelimConf() {
        String str = this.conf.get(ConfigConstants.CONF_DELIMITER, ConfigConstants.DEFAULT_DELIMITER);
        if (str.length() != 1) {
            throw new UnsupportedOperationException("Invalid delimiter: " + str);
        }
        this.delimiter = str.charAt(0);
        this.uriName = this.conf.get(ConfigConstants.CONF_INPUT_URI_ID, (String) null);
        this.docBuilder.init(this.conf);
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public String[] getLine() throws IOException {
        return getLine(getRecordLine());
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public String[] getLine(CSVRecord cSVRecord) throws IOException {
        Iterator it = cSVRecord.iterator();
        int size = cSVRecord.size();
        String[] strArr = new String[size];
        for (int i = 0; i < size; i++) {
            if (!it.hasNext()) {
                throw new IOException("Record size doesn't match the real size");
            }
            strArr[i] = (String) it.next();
        }
        return strArr;
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public CSVRecord getRecordLine() {
        return this.parserIterator.next();
    }

    @Override // com.marklogic.contentpump.ImportRecordReader
    public boolean nextKeyValue() throws IOException, InterruptedException {
        if (this.parser == null || this.parserIterator == null) {
            return false;
        }
        try {
            if (!this.parserIterator.hasNext()) {
                if (this.compressed) {
                    this.bytesRead = this.fileLen;
                    return false;
                }
                if (this.iterator == null || !this.iterator.hasNext()) {
                    this.bytesRead = this.fileLen;
                    return false;
                }
                close();
                initParser(this.iterator.next());
                return nextKeyValue();
            }
            String[] line = getLine();
            if (this.fields == null) {
                this.fields = line;
                if (Charset.defaultCharset().equals(Charset.forName("UTF-8"))) {
                    EncodingUtil.handleBOMUTF8(this.fields, 0);
                }
                boolean z = this.generateId || this.uriId == 0;
                int i = 0;
                while (true) {
                    if (i >= this.fields.length || z) {
                        break;
                    }
                    if (this.fields[i].equals(this.uriName)) {
                        this.uriId = i;
                        z = true;
                        break;
                    }
                    i++;
                }
                if (!z) {
                    LOG.error("Skipped file: " + this.file.toUri() + ", reason: uri_id " + this.uriName + " is not found");
                    this.parser = null;
                    return false;
                }
                try {
                    this.docBuilder.configFields(this.conf, this.fields);
                    if (!this.parserIterator.hasNext()) {
                        if (this.compressed) {
                            this.bytesRead = this.fileLen;
                            return false;
                        }
                        if (this.iterator == null || !this.iterator.hasNext()) {
                            this.bytesRead = this.fileLen;
                            return false;
                        }
                        close();
                        initParser(this.iterator.next());
                        return nextKeyValue();
                    }
                    line = getLine();
                } catch (IllegalArgumentException e) {
                    LOG.error("Skipped file: " + this.file.toUri() + ", reason: " + e.getMessage());
                    this.parser = null;
                    return false;
                }
            }
            int currentLineNumber = (int) this.parser.getCurrentLineNumber();
            if (currentLineNumber == this.prevLineNumber) {
                currentLineNumber++;
            }
            this.prevLineNumber = currentLineNumber;
            if (line.length != this.fields.length) {
                setSkipKey(currentLineNumber, 0, "number of fields does not match number of columns");
                return true;
            }
            this.docBuilder.newDoc();
            for (int i2 = 0; i2 < this.fields.length; i2++) {
                if (!this.fields[i2].equals("")) {
                    if (!this.generateId && this.uriId == i2 && setKey(line[i2], currentLineNumber, 0, true)) {
                        return true;
                    }
                    try {
                        this.docBuilder.put(this.fields[i2], line[i2]);
                    } catch (Exception e2) {
                        setSkipKey(currentLineNumber, 0, e2.getMessage());
                        return true;
                    }
                }
            }
            this.docBuilder.build();
            if (this.generateId && setKey(this.idGen.incrementAndGet(), currentLineNumber, 0, true)) {
                return true;
            }
            if (this.value instanceof Text) {
                ((Text) this.value).set(this.docBuilder.getDoc());
            } else {
                ((Text) ((ContentWithFileNameWritable) this.value).getValue()).set(this.docBuilder.getDoc());
            }
            return true;
        } catch (RuntimeException e3) {
            if (!e3.getMessage().contains("invalid char between encapsulated token and delimiter")) {
                throw e3;
            }
            setSkipKey((int) this.parser.getCurrentLineNumber(), 0, "invalid char between encapsulated token and delimiter");
            if (!this.parserIterator.hasNext()) {
                return true;
            }
            this.parserIterator.next();
            return true;
        }
    }

    protected String convertToLine(String[] strArr) {
        StringBuilder sb = new StringBuilder();
        for (String str : strArr) {
            sb.append(str);
            sb.append(this.delimiter);
        }
        return sb.substring(0, sb.length() - 1);
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public void initDocType() {
        if (this.conf.get(MarkLogicConstants.CONTENT_TYPE, MarkLogicConstants.DEFAULT_CONTENT_TYPE).equals(MarkLogicConstants.DEFAULT_CONTENT_TYPE)) {
            this.docBuilder = new XMLDocBuilder();
        } else {
            this.docBuilder = new JSONDocBuilder();
        }
    }
}
