package org.apache.pig.builtin;

import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
import com.google.common.primitives.Longs;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Comparator;
import java.util.List;
import java.util.Properties;
import org.apache.avro.Schema;
import org.apache.avro.SchemaParseException;
import org.apache.avro.file.DataFileStream;
import org.apache.avro.generic.GenericData;
import org.apache.avro.generic.GenericDatumReader;
import org.apache.avro.mapred.AvroOutputFormat;
import org.apache.commons.cli.CommandLine;
import org.apache.commons.cli.GnuParser;
import org.apache.commons.cli.HelpFormatter;
import org.apache.commons.cli.Options;
import org.apache.commons.cli.ParseException;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.PathFilter;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.mapreduce.InputFormat;
import org.apache.hadoop.mapreduce.InputSplit;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.OutputFormat;
import org.apache.hadoop.mapreduce.RecordReader;
import org.apache.hadoop.mapreduce.RecordWriter;
import org.apache.hadoop.mapreduce.TaskAttemptContext;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.pig.Expression;
import org.apache.pig.LoadFunc;
import org.apache.pig.LoadMetadata;
import org.apache.pig.LoadPushDown;
import org.apache.pig.PigWarning;
import org.apache.pig.ResourceSchema;
import org.apache.pig.ResourceStatistics;
import org.apache.pig.StoreFunc;
import org.apache.pig.StoreFuncInterface;
import org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.JobControlCompiler;
import org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.PigFileInputFormat;
import org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.PigSplit;
import org.apache.pig.data.Tuple;
import org.apache.pig.impl.logicalLayer.FrontendException;
import org.apache.pig.impl.util.UDFContext;
import org.apache.pig.impl.util.avro.AvroArrayReader;
import org.apache.pig.impl.util.avro.AvroRecordReader;
import org.apache.pig.impl.util.avro.AvroRecordWriter;
import org.apache.pig.impl.util.avro.AvroStorageSchemaConversionUtilities;
import org.apache.pig.impl.util.avro.AvroTupleWrapper;
import org.apache.pig.scripting.ScriptEngine;

/* loaded from: input_file:org/apache/pig/builtin/AvroStorage.class */
public class AvroStorage extends LoadFunc implements StoreFuncInterface, LoadMetadata, LoadPushDown {
    private String schemaName;
    private String schemaNameSpace;
    protected boolean allowRecursive;
    protected boolean doubleColonsToDoubleUnderscores;
    protected Schema schema;
    protected final Log log;
    protected String udfContextSignature;
    protected static final PathFilter VISIBLE_FILES = new PathFilter() { // from class: org.apache.pig.builtin.AvroStorage.1
        public boolean accept(Path path) {
            return (path.getName().startsWith(JobControlCompiler.PIG_MAP_SEPARATOR) || path.getName().startsWith(ScriptEngine.NAMESPACE_SEPARATOR)) ? false : true;
        }
    };
    public static final String OUTPUT_AVRO_SCHEMA = "org.apache.pig.builtin.AvroStorage.output.schema";
    private RecordWriter<NullWritable, Object> writer;
    public static final String INPUT_AVRO_SCHEMA = "org.apache.pig.builtin.AvroStorage.input.schema";
    private RecordReader reader;
    PigSplit split;
    protected LoadPushDown.RequiredFieldList requiredFieldList;

    public AvroStorage() {
        this(null, null);
    }

    public AvroStorage(String str) {
        this(str, null);
    }

    public AvroStorage(String str, String str2) {
        this.schemaName = "record";
        this.schemaNameSpace = null;
        this.allowRecursive = false;
        this.doubleColonsToDoubleUnderscores = false;
        this.log = LogFactory.getLog(getClass());
        this.udfContextSignature = null;
        if (str != null && str.length() > 0) {
            try {
                Schema parse = new Schema.Parser().parse(str);
                setInputAvroSchema(parse);
                setOutputAvroSchema(parse);
            } catch (SchemaParseException e) {
                this.schemaName = str;
            }
        }
        if (str2 != null) {
            String[] split = str2.split(" ");
            Options options = new Options();
            try {
                GnuParser gnuParser = new GnuParser();
                options.addOption("n", "namespace", true, "Namespace for an automatically generated output schema");
                options.addOption("f", "schemafile", true, "Specifies URL for avro schema file from which to read the input or output schema");
                options.addOption("e", "examplefile", true, "Specifies URL for avro data file from which to copy the output schema");
                options.addOption("r", "allowrecursive", false, "Option to allow recursive schema definitions (default is false)");
                options.addOption("d", "doublecolons", false, "Option to translate Pig schema names with double colons to names with double underscores (default is false)");
                CommandLine parse2 = gnuParser.parse(options, split);
                this.schemaNameSpace = parse2.getOptionValue("namespace", (String) null);
                this.allowRecursive = parse2.hasOption('r');
                this.doubleColonsToDoubleUnderscores = parse2.hasOption('d');
                if (parse2.hasOption('f')) {
                    try {
                        Path path = new Path(parse2.getOptionValue('f'));
                        Schema parse3 = new Schema.Parser().parse((InputStream) FileSystem.get(path.toUri(), new Configuration()).open(path));
                        setInputAvroSchema(parse3);
                        setOutputAvroSchema(parse3);
                    } catch (FileNotFoundException e2) {
                        System.err.printf("file not found exception\n", new Object[0]);
                        this.log.warn("Schema file not found when instantiating AvroStorage. (If the schema was described in a local file on the front end, and this message is in the back end log, you can ignore this mesasge.)", e2);
                    }
                } else if (parse2.hasOption('e')) {
                    setOutputAvroSchema(getAvroSchema(parse2.getOptionValue('e'), new Job(new Configuration())));
                }
            } catch (ParseException e3) {
                this.log.error("Exception in AvroStorage", e3);
                this.log.error("AvroStorage called with arguments " + str + ", " + str2);
                warn("ParseException in AvroStorage", PigWarning.UDF_WARNING_1);
                new HelpFormatter().printHelp("AvroStorage(',', '[options]')", options);
                throw new RuntimeException(e3);
            } catch (IOException e4) {
                this.log.warn("Exception in AvroStorage", e4);
                this.log.warn("AvroStorage called with arguments " + str + ", " + str2);
                warn("IOException in AvroStorage", PigWarning.UDF_WARNING_1);
                throw new RuntimeException(e4);
            }
        }
    }

    @Override // org.apache.pig.LoadFunc
    public final void setUDFContextSignature(String str) {
        this.udfContextSignature = str;
        super.setUDFContextSignature(str);
    }

    protected final Properties getProperties() {
        return this.udfContextSignature == null ? getProperties(AvroStorage.class, null) : getProperties(AvroStorage.class, this.udfContextSignature);
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public final Properties getProperties(Class cls, String str) {
        UDFContext uDFContext = UDFContext.getUDFContext();
        return str == null ? uDFContext.getUDFProperties(cls) : uDFContext.getUDFProperties(cls, new String[]{str});
    }

    @Override // org.apache.pig.LoadMetadata
    public final ResourceSchema getSchema(String str, Job job) throws IOException {
        if (this.schema == null) {
            setInputAvroSchema(getAvroSchema(str, job));
        }
        return AvroStorageSchemaConversionUtilities.avroSchemaToResourceSchema(this.schema, Boolean.valueOf(this.allowRecursive));
    }

    protected final Schema getAvroSchema(String str, Job job) throws IOException {
        String[] pathStrings = getPathStrings(str);
        Path[] pathArr = new Path[pathStrings.length];
        for (int i = 0; i < pathArr.length; i++) {
            pathArr[i] = new Path(pathStrings[i]);
        }
        return getAvroSchema(pathArr, job);
    }

    public Schema getAvroSchema(Path[] pathArr, Job job) throws IOException {
        GenericDatumReader genericDatumReader = new GenericDatumReader();
        ArrayList arrayList = new ArrayList();
        FileSystem fileSystem = FileSystem.get(pathArr[0].toUri(), job.getConfiguration());
        for (Path path : pathArr) {
            for (FileStatus fileStatus : fileSystem.globStatus(path)) {
                arrayList.add(fileStatus);
            }
        }
        FileStatus[] fileStatusArr = (FileStatus[]) arrayList.toArray(new FileStatus[arrayList.size()]);
        if (fileStatusArr == null) {
            throw new IOException("Path " + pathArr.toString() + " does not exist.");
        }
        if (fileStatusArr.length == 0) {
            throw new IOException("No path matches pattern " + pathArr.toString());
        }
        Path depthFirstSearchForFile = depthFirstSearchForFile(fileStatusArr, fileSystem);
        if (depthFirstSearchForFile == null) {
            throw new IOException("No path matches pattern " + pathArr.toString());
        }
        DataFileStream dataFileStream = new DataFileStream(fileSystem.open(depthFirstSearchForFile), genericDatumReader);
        Schema schema = dataFileStream.getSchema();
        dataFileStream.close();
        return schema;
    }

    private Path depthFirstSearchForFile(FileStatus fileStatus, FileSystem fileSystem) throws IOException {
        return fileSystem.isFile(fileStatus.getPath()) ? fileStatus.getPath() : depthFirstSearchForFile(fileSystem.listStatus(fileStatus.getPath(), VISIBLE_FILES), fileSystem);
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public Path depthFirstSearchForFile(FileStatus[] fileStatusArr, FileSystem fileSystem) throws IOException {
        Arrays.sort(fileStatusArr, new Comparator<FileStatus>() { // from class: org.apache.pig.builtin.AvroStorage.2
            @Override // java.util.Comparator
            public int compare(FileStatus fileStatus, FileStatus fileStatus2) {
                return Longs.compare(fileStatus2.getModificationTime(), fileStatus.getModificationTime());
            }
        });
        for (FileStatus fileStatus : fileStatusArr) {
            Path depthFirstSearchForFile = depthFirstSearchForFile(fileStatus, fileSystem);
            if (depthFirstSearchForFile != null) {
                return depthFirstSearchForFile;
            }
        }
        return null;
    }

    @Override // org.apache.pig.LoadMetadata
    public final ResourceStatistics getStatistics(String str, Job job) throws IOException {
        return null;
    }

    @Override // org.apache.pig.LoadMetadata
    public final String[] getPartitionKeys(String str, Job job) throws IOException {
        return null;
    }

    @Override // org.apache.pig.LoadMetadata
    public void setPartitionFilter(Expression expression) throws IOException {
    }

    @Override // org.apache.pig.StoreFuncInterface
    public final String relToAbsPathForStoreLocation(String str, Path path) throws IOException {
        return LoadFunc.getAbsolutePath(str, path);
    }

    @Override // org.apache.pig.StoreFuncInterface
    public OutputFormat<NullWritable, Object> getOutputFormat() throws IOException {
        return new FileOutputFormat<NullWritable, Object>() { // from class: org.apache.pig.builtin.AvroStorage.1AvroStorageOutputFormat
            public RecordWriter<NullWritable, Object> getRecordWriter(TaskAttemptContext taskAttemptContext) throws IOException, InterruptedException {
                return new AvroRecordWriter(getDefaultWorkFile(taskAttemptContext, AvroOutputFormat.EXT), taskAttemptContext.getConfiguration());
            }
        };
    }

    @Override // org.apache.pig.StoreFuncInterface
    public final void setStoreLocation(String str, Job job) throws IOException {
        FileOutputFormat.setOutputPath(job, new Path(str));
    }

    @Override // org.apache.pig.StoreFuncInterface
    public final void checkSchema(ResourceSchema resourceSchema) throws IOException {
        if (resourceSchema == null) {
            throw new IOException("checkSchema: called with null ResourceSchema");
        }
        Schema resourceSchemaToAvroSchema = AvroStorageSchemaConversionUtilities.resourceSchemaToAvroSchema(resourceSchema, (this.schemaName == null || this.schemaName.length() == 0) ? "pig_output" : this.schemaName, this.schemaNameSpace, Maps.newHashMap(), Boolean.valueOf(this.doubleColonsToDoubleUnderscores));
        if (resourceSchemaToAvroSchema == null) {
            throw new IOException("checkSchema: could not translate ResourceSchema to Avro Schema");
        }
        setOutputAvroSchema(resourceSchemaToAvroSchema);
    }

    protected final void setOutputAvroSchema(Schema schema) {
        this.schema = schema;
        getProperties().setProperty(OUTPUT_AVRO_SCHEMA, schema.toString());
    }

    protected final Schema getOutputAvroSchema() {
        String property;
        if (this.schema == null && (property = getProperties().getProperty(OUTPUT_AVRO_SCHEMA)) != null) {
            this.schema = new Schema.Parser().parse(property);
        }
        return this.schema;
    }

    @Override // org.apache.pig.StoreFuncInterface
    public final void prepareToWrite(RecordWriter recordWriter) throws IOException {
        if (this.udfContextSignature == null) {
            throw new IOException(getClass().toString() + ".prepareToWrite called without setting udf context signature");
        }
        this.writer = recordWriter;
        ((AvroRecordWriter) this.writer).prepareToWrite(getOutputAvroSchema());
    }

    @Override // org.apache.pig.StoreFuncInterface
    public final void putNext(Tuple tuple) throws IOException {
        try {
            this.writer.write((Object) null, tuple);
        } catch (InterruptedException e) {
            this.log.error("InterruptedException in putNext");
            throw new IOException(e);
        }
    }

    @Override // org.apache.pig.StoreFuncInterface
    public final void setStoreFuncUDFContextSignature(String str) {
        this.udfContextSignature = str;
        super.setUDFContextSignature(str);
    }

    @Override // org.apache.pig.StoreFuncInterface
    public final void cleanupOnFailure(String str, Job job) throws IOException {
        StoreFunc.cleanupOnFailureImpl(str, job);
    }

    @Override // org.apache.pig.LoadFunc
    public void setLocation(String str, Job job) throws IOException {
        FileInputFormat.setInputPaths(job, str);
        if (this.schema == null) {
            this.schema = getInputAvroSchema();
            if (this.schema == null) {
                this.schema = getAvroSchema(str, job);
                if (this.schema == null) {
                    throw new IOException("Could not determine avro schema for location " + str);
                }
                setInputAvroSchema(this.schema);
            }
        }
    }

    protected final void setInputAvroSchema(Schema schema) {
        this.schema = schema;
        getProperties().setProperty(INPUT_AVRO_SCHEMA, schema.toString());
    }

    public final Schema getInputAvroSchema() {
        String property;
        if (this.schema == null && (property = getProperties().getProperty(INPUT_AVRO_SCHEMA)) != null) {
            this.schema = new Schema.Parser().parse(property);
        }
        return this.schema;
    }

    @Override // org.apache.pig.LoadFunc
    public InputFormat<NullWritable, GenericData.Record> getInputFormat() throws IOException {
        return new PigFileInputFormat<NullWritable, GenericData.Record>() { // from class: org.apache.pig.builtin.AvroStorage.3
            public RecordReader<NullWritable, GenericData.Record> createRecordReader(InputSplit inputSplit, TaskAttemptContext taskAttemptContext) throws IOException, InterruptedException {
                Schema inputAvroSchema = AvroStorage.this.getInputAvroSchema();
                RecordReader avroArrayReader = inputAvroSchema.getType() == Schema.Type.ARRAY ? new AvroArrayReader(inputAvroSchema) : new AvroRecordReader(inputAvroSchema);
                avroArrayReader.initialize(inputSplit, taskAttemptContext);
                taskAttemptContext.setStatus(inputSplit.toString());
                return avroArrayReader;
            }
        };
    }

    @Override // org.apache.pig.LoadFunc
    public final void prepareToRead(RecordReader recordReader, PigSplit pigSplit) throws IOException {
        this.reader = recordReader;
        this.split = pigSplit;
    }

    @Override // org.apache.pig.LoadFunc
    public final Tuple getNext() throws IOException {
        try {
            if (this.reader.nextKeyValue()) {
                return new AvroTupleWrapper((GenericData.Record) this.reader.getCurrentValue());
            }
            return null;
        } catch (InterruptedException e) {
            throw new IOException("Wrapped Interrupted Exception", e);
        }
    }

    @Override // org.apache.pig.StoreFuncInterface
    public void cleanupOnSuccess(String str, Job job) throws IOException {
    }

    @Override // org.apache.pig.LoadPushDown
    public List<LoadPushDown.OperatorSet> getFeatures() {
        return Lists.newArrayList(LoadPushDown.OperatorSet.PROJECTION);
    }

    @Override // org.apache.pig.LoadPushDown
    public LoadPushDown.RequiredFieldResponse pushProjection(LoadPushDown.RequiredFieldList requiredFieldList) throws FrontendException {
        this.requiredFieldList = requiredFieldList;
        Schema newSchemaFromRequiredFieldList = AvroStorageSchemaConversionUtilities.newSchemaFromRequiredFieldList(this.schema, requiredFieldList);
        if (newSchemaFromRequiredFieldList != null) {
            this.schema = newSchemaFromRequiredFieldList;
            setInputAvroSchema(this.schema);
            return new LoadPushDown.RequiredFieldResponse(true);
        }
        this.log.warn("could not select fields subset " + requiredFieldList + "\n");
        warn("could not select fields subset", PigWarning.UDF_WARNING_2);
        return new LoadPushDown.RequiredFieldResponse(false);
    }
}
