/*
 * Decompiled with CFR 0.152.
 */
package org.talend.dataquality.parsing.model;

import com.fasterxml.jackson.core.type.TypeReference;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.github.jcrfsuite.CrfTrainer;
import com.github.jcrfsuite.util.Pair;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.Reader;
import java.nio.charset.Charset;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.List;
import org.talend.dataquality.parsing.core.FeaturesGenerator;
import org.talend.dataquality.parsing.model.TaggedObject;
import third_party.org.chokkan.crfsuite.ItemSequence;
import third_party.org.chokkan.crfsuite.StringList;

public class ParserModelTrainer {
    private static FeaturesGenerator featuresGenerator;

    public ParserModelTrainer(FeaturesGenerator featuresGenerator) {
        ParserModelTrainer.featuresGenerator = featuresGenerator;
    }

    public static List<TaggedObject> readTrainingSetFromFile(String fileName, Charset encoding) throws IOException {
        InputStreamReader reader = new InputStreamReader((InputStream)new FileInputStream(fileName), encoding);
        ObjectMapper mapper = new ObjectMapper();
        return (List)mapper.readValue((Reader)reader, (TypeReference)new TypeReference<List<TaggedObject>>(){});
    }

    private Pair<List<ItemSequence>, List<StringList>> prepareTrainingInstances(List<TaggedObject> instanceList) {
        ArrayList<ItemSequence> xseqs = new ArrayList<ItemSequence>();
        ArrayList<StringList> yseqs = new ArrayList<StringList>();
        for (TaggedObject instance : instanceList) {
            ItemSequence xseq = featuresGenerator.getFeatureSequenceFromTokens(instance.getTokens());
            xseqs.add(xseq);
            StringList yseq = new StringList();
            instance.getTags().forEach(arg_0 -> ((StringList)yseq).add(arg_0));
            yseqs.add(yseq);
        }
        return new Pair(xseqs, yseqs);
    }

    public void crfTrain(String trainingSetFilePath, String modelOutputFilePath, Charset trainingSetEncoding) throws IOException {
        this.crfTrain(Collections.singleton(trainingSetFilePath), modelOutputFilePath, trainingSetEncoding);
    }

    public void crfTrain(Collection<String> trainingSetFilePaths, String modelOutputFilePath, Charset trainingSetEncoding) throws IOException {
        ArrayList<TaggedObject> taggedObjectList = new ArrayList<TaggedObject>();
        for (String path : trainingSetFilePaths) {
            taggedObjectList.addAll(ParserModelTrainer.readTrainingSetFromFile(path, trainingSetEncoding));
        }
        Pair<List<ItemSequence>, List<StringList>> trainingInstances = this.prepareTrainingInstances(taggedObjectList);
        CrfTrainer.train((List)((List)trainingInstances.first), (List)((List)trainingInstances.second), (String)modelOutputFilePath);
    }
}

