/*
 * Decompiled with CFR 0.152.
 */
package org.talend.dataquality.semantic.api;

import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Set;
import org.apache.commons.collections.CollectionUtils;
import org.apache.commons.lang3.StringUtils;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.analysis.util.CharArraySet;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.FieldType;
import org.apache.lucene.document.StringField;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.IndexableField;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;
import org.talend.dataquality.semantic.api.CategoryRegistryManager;
import org.talend.dataquality.semantic.classifier.custom.UserDefinedCategory;
import org.talend.dataquality.semantic.classifier.custom.UserDefinedRegexValidator;
import org.talend.dataquality.semantic.filter.impl.CharSequenceFilter;
import org.talend.dataquality.semantic.index.DictionarySearcher;
import org.talend.dataquality.semantic.model.CategoryType;
import org.talend.dataquality.semantic.model.DQCategory;
import org.talend.dataquality.semantic.model.DQDocument;
import org.talend.dataquality.semantic.model.DQFilter;
import org.talend.dataquality.semantic.model.DQRegEx;
import org.talend.dataquality.semantic.model.DQValidator;
import org.talend.dataquality.semantic.model.ValidationMode;

public class DictionaryUtils {
    public static final FieldType FIELD_TYPE_SYN = new FieldType();
    public static final FieldType FIELD_TYPE_RAW_VALUE = new FieldType();

    private DictionaryUtils() {
    }

    public static Document generateDocument(String docId, String catId, String word, Set<String> values) {
        String tempWord = word.trim();
        Document doc = new Document();
        StringField idTermField = new StringField("docid", docId, Field.Store.YES);
        doc.add((IndexableField)idTermField);
        StringField catidTermField = new StringField("catid", catId, Field.Store.YES);
        doc.add((IndexableField)catidTermField);
        StringField wordTermField = new StringField("word", tempWord, Field.Store.YES);
        doc.add((IndexableField)wordTermField);
        for (String value : values) {
            if (value == null) continue;
            if (DictionaryUtils.containsControlChars(value)) {
                System.out.println("The value [" + value + "] contains at least one ISO control character and is not added to the index of " + word + ".");
                continue;
            }
            if ((value = value.trim()).length() <= 0) continue;
            List<String> tokens = DictionarySearcher.getTokensFromAnalyzer(value);
            doc.add((IndexableField)new StringField("synterm", StringUtils.join(tokens, (char)' '), Field.Store.NO));
            doc.add((IndexableField)new Field("raw", value, FIELD_TYPE_RAW_VALUE));
        }
        return doc;
    }

    private static boolean containsControlChars(String value) {
        for (char c : value.toCharArray()) {
            if (!Character.isISOControl(c)) continue;
            return true;
        }
        return false;
    }

    public static DQCategory categoryFromDocument(Document doc) {
        IndexableField[] childrenFields;
        DQCategory dqCat = new DQCategory();
        dqCat.setId(doc.getField("catid").stringValue());
        dqCat.setName(doc.getField("name").stringValue());
        dqCat.setLabel(doc.getField("label") == null ? "" : doc.getField("label").stringValue());
        dqCat.setType(CategoryType.valueOf((String)doc.getField("type").stringValue()));
        dqCat.setCompleteness(Boolean.valueOf(doc.getField("completeness").stringValue()));
        if (doc.getField("modified") != null) {
            dqCat.setModified(Boolean.valueOf(doc.getField("modified").stringValue()));
        }
        if (doc.getField("deleted") != null) {
            dqCat.setDeleted(Boolean.valueOf(doc.getField("deleted").stringValue()));
        }
        dqCat.setDescription(doc.getField("description") == null ? "" : doc.getField("description").stringValue());
        if (doc.getField("validationMode") != null) {
            dqCat.setValidationMode(ValidationMode.valueOf((String)doc.getField("validationMode").stringValue()));
        }
        if ((childrenFields = doc.getFields("child")) != null) {
            ArrayList<DQCategory> synSet = new ArrayList<DQCategory>();
            for (IndexableField f : childrenFields) {
                DQCategory cat = new DQCategory();
                cat.setId(f.stringValue());
                synSet.add(cat);
            }
            dqCat.setChildren(synSet);
        }
        return dqCat;
    }

    public static Document categoryToDocument(DQCategory category) {
        Document doc = new Document();
        doc.add((IndexableField)new StringField("catid", category.getId(), Field.Store.YES));
        doc.add((IndexableField)new StringField("name", category.getName(), Field.Store.YES));
        doc.add((IndexableField)new TextField("label", category.getLabel() == null ? category.getName() : category.getLabel(), Field.Store.YES));
        doc.add((IndexableField)new StringField("type", category.getType().name(), Field.Store.YES));
        doc.add((IndexableField)new StringField("completeness", String.valueOf(category.getCompleteness()), Field.Store.YES));
        doc.add((IndexableField)new TextField("description", category.getDescription() == null ? "" : category.getDescription(), Field.Store.YES));
        if (category.getModified() != null) {
            doc.add((IndexableField)new StringField("modified", String.valueOf(category.getModified()), Field.Store.YES));
        }
        if (category.getDeleted() != null) {
            doc.add((IndexableField)new StringField("deleted", String.valueOf(category.getDeleted()), Field.Store.YES));
        }
        if (category.getValidationMode() != null) {
            doc.add((IndexableField)new StringField("validationMode", category.getValidationMode().name(), Field.Store.YES));
        }
        if (!CollectionUtils.isEmpty((Collection)category.getChildren())) {
            for (DQCategory child : category.getChildren()) {
                doc.add((IndexableField)new StringField("child", child.getId(), Field.Store.YES));
            }
        }
        return doc;
    }

    public static Document dqDocumentToLuceneDocument(DQDocument doc) {
        return DictionaryUtils.generateDocument(doc.getId(), doc.getCategory().getId(), doc.getCategory().getName(), doc.getValues());
    }

    public static DQDocument dictionaryEntryFromDocument(Document doc) {
        String catId = doc.getField("catid").stringValue();
        String catName = doc.getField("word").stringValue();
        return DictionaryUtils.dictionaryEntryFromDocument(doc, catId, catName);
    }

    public static DQDocument dictionaryEntryFromDocument(Document doc, String knownCatId, String knownCatName) {
        DQDocument dqDoc = new DQDocument();
        DQCategory dqCat = CategoryRegistryManager.getInstance().getCategoryMetadataById(knownCatId);
        if (dqCat == null) {
            dqCat = new DQCategory();
            dqCat.setId(knownCatId);
            dqCat.setName(knownCatName);
        }
        dqDoc.setCategory(dqCat);
        String docId = doc.getField("docid").stringValue();
        dqDoc.setId(docId);
        IndexableField[] synTermFields = doc.getFields("raw");
        LinkedHashSet<String> synSet = new LinkedHashSet<String>();
        for (IndexableField f : synTermFields) {
            synSet.add(f.stringValue());
        }
        dqDoc.setValues(synSet);
        return dqDoc;
    }

    static void rewriteIndex(Directory srcDir, File destFolder) throws IOException {
        IndexWriterConfig iwc = new IndexWriterConfig(Version.LATEST, (Analyzer)new StandardAnalyzer(CharArraySet.EMPTY_SET));
        try (FSDirectory destDir = FSDirectory.open((File)destFolder);
             IndexWriter writer = new IndexWriter((Directory)destDir, iwc);){
            writer.addIndexes(new Directory[]{srcDir});
            writer.commit();
        }
    }

    public static final UserDefinedCategory regexClassifierfromDQCategory(DQCategory category) {
        DQRegEx dqRegEx = category.getRegEx();
        DQFilter dqFilter = dqRegEx.getFilter();
        DQValidator dqValidator = dqRegEx.getValidator();
        UserDefinedCategory regEx = new UserDefinedCategory(category.getName(), category.getLabel());
        regEx.setId(category.getId());
        regEx.setDescription(category.getDescription());
        regEx.setMainCategory(dqRegEx.getMainCategory());
        if (dqFilter != null) {
            CharSequenceFilter filter = new CharSequenceFilter();
            filter.setFilterParam(dqFilter.getFilterParam());
            filter.setFilterType(CharSequenceFilter.CharSequenceFilterType.valueOf(dqFilter.getFilterType()));
            regEx.setFilter(filter);
        }
        if (dqValidator != null) {
            UserDefinedRegexValidator validator = new UserDefinedRegexValidator();
            validator.setPatternString(dqValidator.getPatternString());
            validator.setSubValidatorClassName(dqValidator.getSubValidatorClassName());
            regEx.setValidator(validator);
        }
        return regEx;
    }

    static {
        FIELD_TYPE_SYN.setStored(false);
        FIELD_TYPE_SYN.setIndexed(true);
        FIELD_TYPE_SYN.setOmitNorms(true);
        FIELD_TYPE_SYN.freeze();
        FIELD_TYPE_RAW_VALUE.setIndexed(false);
        FIELD_TYPE_RAW_VALUE.setStored(true);
        FIELD_TYPE_RAW_VALUE.freeze();
    }
}

