package org.talend.dataquality.nlp.toolkit;

import edu.stanford.nlp.ie.AbstractSequenceClassifier;
import edu.stanford.nlp.ie.crf.CRFClassifier;
import edu.stanford.nlp.ling.CoreAnnotations;
import edu.stanford.nlp.ling.CoreLabel;
import edu.stanford.nlp.ling.TaggedWord;
import edu.stanford.nlp.parser.lexparser.LexicalizedParser;
import edu.stanford.nlp.process.CoreLabelTokenFactory;
import edu.stanford.nlp.process.PTBTokenizer;
import edu.stanford.nlp.simple.Document;
import edu.stanford.nlp.simple.Sentence;
import edu.stanford.nlp.tagger.maxent.MaxentTagger;
import java.io.IOException;
import java.io.StringReader;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.Optional;

/* loaded from: input_file:org/talend/dataquality/nlp/toolkit/StanfordNLP.class */
public class StanfordNLP extends AbstractToolkit {
    private static final long serialVersionUID = 1;
    static CoreLabelTokenFactory tokenizer;
    static MaxentTagger tagger;
    static AbstractSequenceClassifier<CoreLabel> classifier;
    static LexicalizedParser parser;
    String modelpath = getClass().getClassLoader().getResource("edu/stanford/nlp/models/").toString();

    public StanfordNLP() throws IOException {
        tokenizer = new CoreLabelTokenFactory();
        tagger = new MaxentTagger(this.modelpath + "pos-tagger/english-left3words/english-left3words-distsim.tagger");
        parser = LexicalizedParser.loadModel(this.modelpath + "lexparser/englishPCFG.ser.gz", new String[0]);
        classifier = CRFClassifier.getClassifierNoExceptions(this.modelpath + "ner/english.all.3class.distsim.crf.ser.gz");
    }

    @Override // org.talend.dataquality.nlp.toolkit.AbstractToolkit
    public List<String> sentenceSplit(String str) {
        ArrayList arrayList = new ArrayList();
        Iterator it = new Document(str).sentences().iterator();
        while (it.hasNext()) {
            arrayList.add(((Sentence) it.next()).text());
        }
        return arrayList;
    }

    @Override // org.talend.dataquality.nlp.toolkit.AbstractToolkit
    public List<String> tokenization(List<String> list) {
        ArrayList arrayList = new ArrayList();
        Iterator<String> it = list.iterator();
        while (it.hasNext()) {
            PTBTokenizer pTBTokenizer = new PTBTokenizer(new StringReader(it.next()), tokenizer, "");
            while (pTBTokenizer.hasNext()) {
                arrayList.add(((CoreLabel) pTBTokenizer.next()).originalText());
            }
        }
        return arrayList;
    }

    List<TaggedWord> toTaggedWordList(List<String> list) {
        ArrayList arrayList = new ArrayList();
        Iterator<String> it = list.iterator();
        while (it.hasNext()) {
            arrayList.add(new TaggedWord(it.next()));
        }
        return arrayList;
    }

    List<CoreLabel> toCoreLabelList(List<String> list) {
        ArrayList arrayList = new ArrayList();
        for (String str : list) {
            CoreLabel coreLabel = new CoreLabel();
            coreLabel.setWord(str);
            coreLabel.setOriginalText(str);
            arrayList.add(coreLabel);
        }
        return arrayList;
    }

    List<String> toStringList(List<TaggedWord> list) {
        ArrayList arrayList = new ArrayList();
        Iterator<TaggedWord> it = list.iterator();
        while (it.hasNext()) {
            arrayList.add(it.next().tag());
        }
        return arrayList;
    }

    @Override // org.talend.dataquality.nlp.toolkit.AbstractToolkit
    public List<String> partOfSpeechTagging(List<String> list) {
        return toStringList(tagger.tagSentence(toTaggedWordList(list)));
    }

    @Override // org.talend.dataquality.nlp.toolkit.AbstractToolkit
    public List<String> namedEntityRecognition(List<String> list) {
        ArrayList arrayList = new ArrayList();
        Iterator it = classifier.classifySentence(toCoreLabelList(list)).iterator();
        while (it.hasNext()) {
            arrayList.add(((CoreLabel) it.next()).get(CoreAnnotations.AnswerAnnotation.class));
        }
        return arrayList;
    }

    @Override // org.talend.dataquality.nlp.toolkit.AbstractToolkit
    public String syntacticParsing(List<String> list) {
        return parser.parse(toCoreLabelList(list)).pennString();
    }

    public String dependencyParsing(String str) {
        String str2 = "";
        Iterator it = new Document(str).sentences().iterator();
        while (it.hasNext()) {
            Iterator it2 = ((Sentence) it.next()).incomingDependencyLabels().iterator();
            while (it2.hasNext()) {
                str2 = str2 + ((Optional) it2.next()).toString();
            }
        }
        return str2;
    }

    @Override // org.talend.dataquality.nlp.toolkit.AbstractToolkit
    public List<String> lemmatization(List<String> list, List<String> list2) {
        ArrayList arrayList = new ArrayList();
        Iterator<CoreLabel> it = toCoreLabelList(list).iterator();
        while (it.hasNext()) {
            arrayList.add(it.next().lemma());
        }
        return arrayList;
    }

    @Override // org.talend.dataquality.nlp.toolkit.AbstractToolkit
    public void setSentenceSplitModelPath(String str) {
    }

    @Override // org.talend.dataquality.nlp.toolkit.AbstractToolkit
    public void setTokenizationModelPath(String str) {
    }

    @Override // org.talend.dataquality.nlp.toolkit.AbstractToolkit
    public void setPartOfSpeechTaggingModelPath(String str) {
    }

    @Override // org.talend.dataquality.nlp.toolkit.AbstractToolkit
    public void setNamedEntityyModelPath(String str) {
    }

    @Override // org.talend.dataquality.nlp.toolkit.AbstractToolkit
    public void setSyntacticParsingModelPath(String str) {
    }
}
