package org.apache.pig.piggybank.storage;

import java.io.IOException;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.InputFormat;
import org.apache.hadoop.mapreduce.InputSplit;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.RecordReader;
import org.apache.hadoop.mapreduce.TaskAttemptContext;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.FileSplit;
import org.apache.pig.LoadFunc;
import org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.PigSplit;
import org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.PigTextInputFormat;
import org.apache.pig.bzip2r.Bzip2TextInputFormat;
import org.apache.pig.data.DataByteArray;
import org.apache.pig.data.Tuple;
import org.apache.pig.data.TupleFactory;

/* loaded from: input_file:org/apache/pig/piggybank/storage/XMLLoader.class */
public class XMLLoader extends LoadFunc {
    private String loadLocation;
    protected RecordReader in = null;
    private String identifier;

    /* loaded from: input_file:org/apache/pig/piggybank/storage/XMLLoader$XMLRecordReader.class */
    public static class XMLRecordReader extends RecordReader<LongWritable, Text> {
        protected final RecordReader<LongWritable, Text> wrapped;
        private static final String XMLTagNameRegExp = "[a-zA-Z\\_][0-9a-zA-Z\\-_]+";
        private Pattern identifiersPattern;
        private LongWritable key;
        private Text value;
        private long bufferPos;
        private String buffer;
        private long originalEnd;
        private boolean terminated;

        public XMLRecordReader(RecordReader<LongWritable, Text> recordReader) {
            this.wrapped = recordReader;
        }

        public void initialize(InputSplit inputSplit, TaskAttemptContext taskAttemptContext) throws IOException, InterruptedException {
            this.key = new LongWritable();
            this.value = new Text();
            if (!(inputSplit instanceof FileSplit)) {
                throw new RuntimeException("Cannot override a split of type'" + inputSplit.getClass() + "'");
            }
            FileSplit fileSplit = (FileSplit) inputSplit;
            this.originalEnd = fileSplit.getStart() + fileSplit.getLength();
            Path path = fileSplit.getPath();
            this.wrapped.initialize(new FileSplit(path, fileSplit.getStart(), Math.min(fileSplit.getLength() * 10, path.getFileSystem(taskAttemptContext.getConfiguration()).getFileStatus(path).getLen() - fileSplit.getStart()), fileSplit.getLocations()), taskAttemptContext);
        }

        public void setXMLIdentifier(String str) {
            if (!str.matches(XMLTagNameRegExp)) {
                throw new RuntimeException("XML tag identifier '" + str + "' does not match the regular expression /" + XMLTagNameRegExp + "/");
            }
            this.identifiersPattern = Pattern.compile("(" + ("<\\s*" + str + "\\s*[^>]*/>") + ")|(" + ("<\\s*" + str + "(?:\\s*|\\s+(?:[^/>]*|[^>]*[^>/]))>") + ")|(" + ("</\\s*" + str + "\\s*>") + ")");
        }

        public void close() throws IOException {
            this.wrapped.close();
        }

        public boolean equals(Object obj) {
            return this.wrapped.equals(obj);
        }

        /* renamed from: getCurrentKey, reason: merged with bridge method [inline-methods] */
        public LongWritable m140getCurrentKey() throws IOException, InterruptedException {
            return this.key;
        }

        /* renamed from: getCurrentValue, reason: merged with bridge method [inline-methods] */
        public Text m139getCurrentValue() throws IOException, InterruptedException {
            return this.value;
        }

        public float getProgress() throws IOException, InterruptedException {
            return Math.max(1.0f, this.wrapped.getProgress() * 10.0f);
        }

        public int hashCode() {
            return this.wrapped.hashCode();
        }

        public boolean nextKeyValue() throws IOException, InterruptedException {
            if (this.terminated) {
                return false;
            }
            int i = 0;
            StringBuffer stringBuffer = new StringBuffer();
            int i2 = 0;
            while (true) {
                try {
                    if (this.buffer != null && this.buffer.length() != 0) {
                        Matcher matcher = this.identifiersPattern.matcher(this.buffer);
                        while (matcher.find()) {
                            int start = matcher.start();
                            int end = matcher.end();
                            String group = matcher.group(1);
                            if (group != null) {
                                this.value = new Text(group);
                                this.key.set(this.bufferPos + matcher.start(1));
                                this.bufferPos += matcher.end(1);
                                this.buffer = this.buffer.substring(end);
                                return true;
                            }
                            if (matcher.group(2) != null) {
                                if (i == 0) {
                                    i2 = start;
                                    this.key.set(this.bufferPos + start);
                                }
                                i++;
                            } else {
                                if (matcher.group(3) == null) {
                                    throw new RuntimeException("Invalid match '" + matcher.group() + "' in string '" + this.buffer + "'");
                                }
                                if (i > 0) {
                                    i--;
                                    if (i == 0) {
                                        if (stringBuffer.length() == 0) {
                                            this.value = new Text(this.buffer.substring(i2, end));
                                        } else {
                                            stringBuffer.append((CharSequence) this.buffer, i2, end);
                                            this.value = new Text(stringBuffer.toString());
                                        }
                                        this.buffer = this.buffer.substring(end);
                                        this.bufferPos += end;
                                        return true;
                                    }
                                } else {
                                    continue;
                                }
                            }
                        }
                        if (i > 0) {
                            stringBuffer.append((CharSequence) this.buffer, i2, this.buffer.length());
                        }
                        this.buffer = null;
                    } else {
                        if (!this.wrapped.nextKeyValue()) {
                            return false;
                        }
                        if (this.bufferPos >= this.originalEnd && i == 0) {
                            this.terminated = true;
                            return false;
                        }
                        this.bufferPos = ((LongWritable) this.wrapped.getCurrentKey()).get();
                        this.buffer = ((Text) this.wrapped.getCurrentValue()).toString();
                    }
                } catch (InterruptedException e) {
                    throw new IOException("Error getting input");
                }
            }
        }

        public String toString() {
            return this.wrapped.toString();
        }
    }

    public XMLLoader(String str) {
        this.identifier = str;
    }

    public void prepareToRead(RecordReader recordReader, PigSplit pigSplit) throws IOException {
        this.in = recordReader;
    }

    public Tuple getNext() throws IOException {
        try {
            if (this.in.nextKeyValue()) {
                return createTuple(this.in.getCurrentValue().toString());
            }
            return null;
        } catch (InterruptedException e) {
            e.printStackTrace();
            return null;
        }
    }

    public Tuple createTuple(String str) {
        return TupleFactory.getInstance().newTuple(new DataByteArray(str));
    }

    public InputFormat getInputFormat() throws IOException {
        return (this.loadLocation.endsWith(".bz2") || this.loadLocation.endsWith(".bz")) ? new Bzip2TextInputFormat() { // from class: org.apache.pig.piggybank.storage.XMLLoader.1
            public RecordReader<LongWritable, Text> createRecordReader(InputSplit inputSplit, TaskAttemptContext taskAttemptContext) {
                try {
                    XMLRecordReader xMLRecordReader = new XMLRecordReader(super.createRecordReader(inputSplit, taskAttemptContext));
                    xMLRecordReader.setXMLIdentifier(XMLLoader.this.identifier);
                    return xMLRecordReader;
                } catch (IOException e) {
                    throw new RuntimeException("Cannot create input split", e);
                } catch (InterruptedException e2) {
                    throw new RuntimeException("Cannot create input split", e2);
                }
            }
        } : new PigTextInputFormat() { // from class: org.apache.pig.piggybank.storage.XMLLoader.2
            public RecordReader<LongWritable, Text> createRecordReader(InputSplit inputSplit, TaskAttemptContext taskAttemptContext) {
                XMLRecordReader xMLRecordReader = new XMLRecordReader(super.createRecordReader(inputSplit, taskAttemptContext));
                xMLRecordReader.setXMLIdentifier(XMLLoader.this.identifier);
                return xMLRecordReader;
            }
        };
    }

    public void setLocation(String str, Job job) throws IOException {
        this.loadLocation = str;
        FileInputFormat.setInputPaths(job, str);
    }
}
