/*
 * Decompiled with CFR 0.152.
 */
package org.talend.dataquality.semantic.statistics;

import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import org.apache.avro.AvroRuntimeException;
import org.apache.avro.Schema;
import org.apache.avro.generic.GenericData;
import org.apache.avro.generic.GenericRecord;
import org.apache.avro.generic.IndexedRecord;
import org.apache.commons.lang3.StringUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.talend.dataquality.common.inference.AvroAnalyzer;
import org.talend.dataquality.common.util.AvroUtils;
import org.talend.dataquality.semantic.index.Index;
import org.talend.dataquality.semantic.model.DQCategory;
import org.talend.dataquality.semantic.recognizer.CategoryRecognizer;
import org.talend.dataquality.semantic.recognizer.DefaultCategoryRecognizer;
import org.talend.dataquality.semantic.snapshot.DictionarySnapshot;
import org.talend.dataquality.semantic.statistics.SemanticType;
import org.talend.dataquality.semantic.statistics.model.AvroSemanticCategory;

public class AvroSemanticAnalyzer
implements AvroAnalyzer {
    private static final Logger LOGGER = LoggerFactory.getLogger(AvroSemanticAnalyzer.class);
    public static final String DISCOVERY_AGGREGATE_PROP_NAME = "talend.component.semanticType";
    public static final String DISCOVERY_TOTAL_FIELD_NAME = "total";
    public static final String DISCOVERY_MATCHING_FIELD_NAME = "matching";
    public static final String DISCOVERY_TOP_RANKED_FIELD_NAME = "topRanked";
    public static final String DISCOVERY_MATCHING_ID_PROP_NAME = "id";
    public static final String DISCOVERY_MATCHING_NAME_PROP_NAME = "name";
    public static final String DISCOVERY_MATCHING_TOTAL_PROP_NAME = "total";
    public static final String DISCOVERY_MATCHING_SCORE_PROP_NAME = "frequency";
    public static final String DISCOVERY_MATCHING_DEPTH_PROP_NAME = "depth";
    protected final Map<String, SemanticType> discoveryResults = new HashMap<String, SemanticType>();
    private final DictionarySnapshot dictionarySnapshot;
    private Schema semanticSchema;
    private Schema globalDiscoverySchema;
    private Schema recordDiscoverySchema;
    private final Map<String, DefaultCategoryRecognizer> categoryRecognizers;
    private static final String SEM_DISCOVERY_SCHEMA_DEF = "{\"type\": \"record\",\"name\": \"discovery_metadata\", \"namespace\": \"org.talend.dataquality\",\"fields\":[{ \"type\":\"string\", \"name\":\"matching\"}, { \"type\":\"int\", \"name\":\"total\"}]}";
    public static final Schema SEM_DISCOVERY_SCHEMA = new Schema.Parser().parse("{\"type\": \"record\",\"name\": \"discovery_metadata\", \"namespace\": \"org.talend.dataquality\",\"fields\":[{ \"type\":\"string\", \"name\":\"matching\"}, { \"type\":\"int\", \"name\":\"total\"}]}");

    public AvroSemanticAnalyzer(DictionarySnapshot dictionarySnapshot) {
        this.dictionarySnapshot = Objects.requireNonNull(dictionarySnapshot, "Dictionary dictionarySnapshot is null.");
        this.categoryRecognizers = new HashMap<String, DefaultCategoryRecognizer>();
    }

    public void init() {
        this.discoveryResults.clear();
        this.categoryRecognizers.clear();
        Index customDict = this.dictionarySnapshot.getCustomDataDict();
        if (customDict != null) {
            customDict.initIndex();
        }
    }

    public void init(Schema schema) {
        this.init();
        Schema cleanSchema = AvroUtils.cleanSchema((Schema)schema, Collections.singletonList(DISCOVERY_AGGREGATE_PROP_NAME));
        this.initResultSchema(AvroUtils.dereferencing((Schema)cleanSchema));
    }

    private void initResultSchema(Schema schema) {
        this.semanticSchema = schema;
        this.globalDiscoverySchema = AvroUtils.copySchema((Schema)this.semanticSchema);
        this.recordDiscoverySchema = AvroUtils.createRecordSemanticSchema((Schema)this.semanticSchema, (Schema)SEM_DISCOVERY_SCHEMA);
    }

    public boolean analyze(IndexedRecord record) {
        this.analyzeRecord(record);
        return true;
    }

    public Stream<IndexedRecord> analyze(Stream<IndexedRecord> records) {
        return ((Stream)records.sequential()).map(this::analyzeRecord);
    }

    private IndexedRecord analyzeRecord(IndexedRecord record) {
        if (record == null) {
            return null;
        }
        if (this.semanticSchema == null) {
            this.initResultSchema(record.getSchema());
        }
        GenericData.Record resultRecord = new GenericData.Record(this.recordDiscoverySchema);
        this.analyzeRecord("", record, (GenericRecord)resultRecord, this.semanticSchema);
        return resultRecord;
    }

    private void analyzeRecord(String id, IndexedRecord record, GenericRecord resultRecord, Schema semanticSchema) {
        for (Schema.Field field : record.getSchema().getFields()) {
            Schema.Field fieldResultSchema = resultRecord.getSchema().getField(field.name());
            Schema.Field fieldSemanticSchema = semanticSchema.getField(field.name());
            if (fieldResultSchema != null) {
                if (fieldSemanticSchema != null) {
                    String itemId = AvroUtils.itemId((String)id, (String)field.name());
                    Object semRecord = this.analyzeItem(itemId, record.get(field.pos()), field.schema(), fieldResultSchema.schema(), fieldSemanticSchema.schema());
                    resultRecord.put(field.name(), semRecord);
                    continue;
                }
                LOGGER.error(field.name() + " field is missing from semantic schema.");
                continue;
            }
            LOGGER.error(field.name() + " field is missing from result record schema.");
        }
    }

    private Object analyzeItem(String itemId, Object item, Schema itemSchema, Schema resultSchema, Schema fieldSemanticSchema) {
        switch (itemSchema.getType()) {
            case RECORD: {
                GenericData.Record resultRecord = new GenericData.Record(resultSchema);
                this.analyzeRecord(itemId, (IndexedRecord)((GenericRecord)item), (GenericRecord)resultRecord, fieldSemanticSchema);
                return resultRecord;
            }
            case ARRAY: {
                List resultArray = ((List)item).stream().map(obj -> this.analyzeItem(itemId, obj, itemSchema.getElementType(), resultSchema.getElementType(), fieldSemanticSchema.getElementType())).collect(Collectors.toList());
                return new GenericData.Array(resultSchema, resultArray);
            }
            case MAP: {
                return ((Map)item).entrySet().stream().collect(Collectors.toMap(Map.Entry::getKey, entry -> this.analyzeItem(itemId, entry.getValue(), itemSchema.getValueType(), resultSchema.getValueType(), fieldSemanticSchema.getValueType())));
            }
            case UNION: {
                int typeIdx = new GenericData().resolveUnion(itemSchema, item);
                List unionSchemas = itemSchema.getTypes();
                Schema realItemSchema = (Schema)unionSchemas.get(typeIdx);
                Schema realResultSchema = resultSchema.getTypes().stream().filter(type -> type.getName().equals(realItemSchema.getName())).findFirst().orElse(SEM_DISCOVERY_SCHEMA);
                Schema realSemanticSchema = (Schema)fieldSemanticSchema.getTypes().get(typeIdx);
                return this.analyzeItem(AvroUtils.itemId((String)itemId, (String)realItemSchema.getName()), item, realItemSchema, realResultSchema, realSemanticSchema);
            }
            case ENUM: 
            case FIXED: 
            case STRING: 
            case BYTES: 
            case INT: 
            case LONG: 
            case FLOAT: 
            case DOUBLE: 
            case BOOLEAN: {
                GenericData.Record semRecord = new GenericData.Record(SEM_DISCOVERY_SCHEMA);
                semRecord.put(DISCOVERY_MATCHING_FIELD_NAME, this.analyzeLeafValue(itemId, item));
                return semRecord;
            }
            case NULL: {
                GenericData.Record nullSemRecord = new GenericData.Record(SEM_DISCOVERY_SCHEMA);
                nullSemRecord.put(DISCOVERY_MATCHING_FIELD_NAME, this.analyzeLeafValue(itemId, item));
                return nullSemRecord;
            }
        }
        throw new IllegalStateException("Unexpected Avro Schema.Type: " + itemSchema.getType());
    }

    private Object analyzeLeafValue(String itemId, Object objValue) {
        String value = objValue == null ? "" : objValue.toString();
        String[] categoriesName = this.categoryRecognizers.computeIfAbsent(itemId, $ -> new DefaultCategoryRecognizer(this.dictionarySnapshot)).process(value);
        return Arrays.stream(categoriesName).map(categoryName -> {
            DQCategory cat = this.dictionarySnapshot.getCategoryMetadataByName((String)categoryName);
            return new AvroSemanticCategory(cat.getId(), cat.getName());
        }).collect(Collectors.toList());
    }

    public Schema getResult() {
        if (this.semanticSchema == null) {
            throw new IllegalStateException("The analyzer should be initialized first.");
        }
        for (Schema.Field field : this.globalDiscoverySchema.getFields()) {
            this.updateDiscovery(field.schema(), field.name());
        }
        return this.globalDiscoverySchema;
    }

    private void updateDiscovery(Schema schema, String fieldName) {
        switch (schema.getType()) {
            case RECORD: {
                for (Schema.Field field : schema.getFields()) {
                    this.updateDiscovery(field.schema(), AvroUtils.itemId((String)fieldName, (String)field.name()));
                }
                break;
            }
            case ARRAY: {
                this.updateDiscovery(schema.getElementType(), fieldName);
                break;
            }
            case MAP: {
                this.updateDiscovery(schema.getValueType(), fieldName);
                break;
            }
            case UNION: {
                if (this.discoveryResults.containsKey(fieldName)) {
                    try {
                        schema.addProp(DISCOVERY_AGGREGATE_PROP_NAME, this.getGlobalFrequencies(fieldName));
                    }
                    catch (AvroRuntimeException e) {
                        LOGGER.error("Failed to add prop to field " + schema.getName() + ".");
                    }
                }
                for (Schema unionSchema : schema.getTypes()) {
                    this.updateDiscovery(unionSchema, AvroUtils.itemId((String)fieldName, (String)unionSchema.getName()));
                }
                break;
            }
            case ENUM: 
            case FIXED: 
            case STRING: 
            case BYTES: 
            case INT: 
            case LONG: 
            case FLOAT: 
            case DOUBLE: 
            case BOOLEAN: 
            case NULL: {
                try {
                    schema.addProp(DISCOVERY_AGGREGATE_PROP_NAME, this.getGlobalFrequencies(fieldName));
                    break;
                }
                catch (AvroRuntimeException e) {
                    LOGGER.error("Failed to add prop to referenced type " + schema.getName() + ". The analyzer is not supporting schema with referenced types.");
                }
            }
        }
    }

    private Map<String, Object> getGlobalFrequencies(String fieldName) {
        DefaultCategoryRecognizer recognizer = this.categoryRecognizers.get(fieldName);
        if (recognizer == null) {
            return Collections.emptyMap();
        }
        List categories = recognizer.getResult(fieldName, 0.1f).stream().filter(categoryFrequency -> StringUtils.isNotEmpty((CharSequence)categoryFrequency.getCategoryName())).map(categoryFrequency -> {
            HashMap<String, Object> category = new HashMap<String, Object>();
            category.put(DISCOVERY_MATCHING_ID_PROP_NAME, categoryFrequency.getCategoryId());
            category.put(DISCOVERY_MATCHING_NAME_PROP_NAME, categoryFrequency.getCategoryName());
            category.put("total", categoryFrequency.getCount());
            category.put(DISCOVERY_MATCHING_SCORE_PROP_NAME, Float.valueOf(categoryFrequency.getScore()));
            category.put(DISCOVERY_MATCHING_DEPTH_PROP_NAME, categoryFrequency.getCategoryLevel());
            return category;
        }).collect(Collectors.toList());
        HashMap<String, Object> result = new HashMap<String, Object>();
        result.put(DISCOVERY_MATCHING_FIELD_NAME, categories);
        result.put("total", recognizer.getTotal());
        if (!categories.isEmpty()) {
            result.put(DISCOVERY_TOP_RANKED_FIELD_NAME, categories.get(0));
        }
        return result;
    }

    public List<Schema> getResults() {
        return Collections.singletonList(this.getResult());
    }

    public void close() {
        this.categoryRecognizers.values().forEach(CategoryRecognizer::end);
    }
}

