package org.talend.dataquality.parsing.model;

import com.fasterxml.jackson.core.type.TypeReference;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.github.jcrfsuite.CrfTrainer;
import com.github.jcrfsuite.util.Pair;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.nio.charset.Charset;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.Iterator;
import java.util.List;
import org.talend.dataquality.parsing.core.FeaturesGenerator;
import third_party.org.chokkan.crfsuite.ItemSequence;
import third_party.org.chokkan.crfsuite.StringList;

/* loaded from: input_file:org/talend/dataquality/parsing/model/ParserModelTrainer.class */
public class ParserModelTrainer {
    private static FeaturesGenerator featuresGenerator;

    public ParserModelTrainer(FeaturesGenerator featuresGenerator2) {
        featuresGenerator = featuresGenerator2;
    }

    public static List<TaggedObject> readTrainingSetFromFile(String str, Charset charset) throws IOException {
        return (List) new ObjectMapper().readValue(new InputStreamReader(new FileInputStream(str), charset), new TypeReference<List<TaggedObject>>() { // from class: org.talend.dataquality.parsing.model.ParserModelTrainer.1
        });
    }

    private Pair<List<ItemSequence>, List<StringList>> prepareTrainingInstances(List<TaggedObject> list) {
        ArrayList arrayList = new ArrayList();
        ArrayList arrayList2 = new ArrayList();
        for (TaggedObject taggedObject : list) {
            arrayList.add(featuresGenerator.getFeatureSequenceFromTokens(taggedObject.getTokens()));
            StringList stringList = new StringList();
            List<String> tags = taggedObject.getTags();
            stringList.getClass();
            tags.forEach(stringList::add);
            arrayList2.add(stringList);
        }
        return new Pair<>(arrayList, arrayList2);
    }

    public void crfTrain(String str, String str2, Charset charset) throws IOException {
        crfTrain(Collections.singleton(str), str2, charset);
    }

    public void crfTrain(Collection<String> collection, String str, Charset charset) throws IOException {
        ArrayList arrayList = new ArrayList();
        Iterator<String> it = collection.iterator();
        while (it.hasNext()) {
            arrayList.addAll(readTrainingSetFromFile(it.next(), charset));
        }
        Pair<List<ItemSequence>, List<StringList>> prepareTrainingInstances = prepareTrainingInstances(arrayList);
        CrfTrainer.train((List) prepareTrainingInstances.first, (List) prepareTrainingInstances.second, str);
    }
}
