/*
 * Decompiled with CFR 0.152.
 */
package org.apache.spark.ml.r;

import org.apache.spark.SparkException;
import org.apache.spark.ml.Model;
import org.apache.spark.ml.Pipeline;
import org.apache.spark.ml.PipelineModel;
import org.apache.spark.ml.PipelineStage;
import org.apache.spark.ml.Transformer;
import org.apache.spark.ml.UnaryTransformer;
import org.apache.spark.ml.clustering.LDA;
import org.apache.spark.ml.clustering.LDAModel;
import org.apache.spark.ml.feature.CountVectorizer;
import org.apache.spark.ml.feature.CountVectorizerModel;
import org.apache.spark.ml.feature.RegexTokenizer;
import org.apache.spark.ml.feature.StopWordsRemover;
import org.apache.spark.ml.linalg.VectorUDT;
import org.apache.spark.ml.r.LDAWrapper;
import org.apache.spark.ml.util.Identifiable$;
import org.apache.spark.ml.util.MLReadable;
import org.apache.spark.ml.util.MLReadable$class;
import org.apache.spark.ml.util.MLReader;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.types.DataType;
import org.apache.spark.sql.types.StringType;
import org.apache.spark.sql.types.StructField;
import scala.Array$;
import scala.Predef$;
import scala.StringContext;
import scala.collection.GenTraversableOnce;
import scala.collection.Seq;
import scala.collection.immutable.Nil$;
import scala.collection.mutable.StringBuilder;
import scala.reflect.ClassTag$;
import scala.runtime.BoxedUnit;
import scala.runtime.BoxesRunTime;

public final class LDAWrapper$
implements MLReadable<LDAWrapper> {
    public static final LDAWrapper$ MODULE$;
    private final String TOKENIZER_COL;
    private final String STOPWORDS_REMOVER_COL;
    private final String COUNT_VECTOR_COL;

    static {
        new LDAWrapper$();
    }

    public String TOKENIZER_COL() {
        return this.TOKENIZER_COL;
    }

    public String STOPWORDS_REMOVER_COL() {
        return this.STOPWORDS_REMOVER_COL;
    }

    public String COUNT_VECTOR_COL() {
        return this.COUNT_VECTOR_COL;
    }

    private PipelineStage[] getPreStages(String features, String[] customizedStopWords, int maxVocabSize) {
        RegexTokenizer tokenizer = (RegexTokenizer)((UnaryTransformer)new RegexTokenizer().setInputCol(features)).setOutputCol(this.TOKENIZER_COL());
        StopWordsRemover stopWordsRemover = new StopWordsRemover().setInputCol(this.TOKENIZER_COL()).setOutputCol(this.STOPWORDS_REMOVER_COL());
        stopWordsRemover.setStopWords((String[])Predef$.MODULE$.refArrayOps((Object[])stopWordsRemover.getStopWords()).$plus$plus((GenTraversableOnce)Predef$.MODULE$.refArrayOps((Object[])customizedStopWords), Array$.MODULE$.canBuildFrom(ClassTag$.MODULE$.apply(String.class))));
        CountVectorizer countVectorizer = new CountVectorizer().setVocabSize(maxVocabSize).setInputCol(this.STOPWORDS_REMOVER_COL()).setOutputCol(this.COUNT_VECTOR_COL());
        return (PipelineStage[])((Object[])new PipelineStage[]{tokenizer, stopWordsRemover, countVectorizer});
    }

    /*
     * WARNING - void declaration
     */
    public LDAWrapper fit(Dataset<Row> data, String features, int k, int maxIter, String optimizer, double subsamplingRate, double topicConcentration, double[] docConcentration, String[] customizedStopWords, int maxVocabSize) {
        void var14_12;
        block7: {
            String[] stringArray;
            PipelineStage[] pipelineStageArray;
            StructField featureSchema;
            LDA lda;
            block6: {
                DataType dataType;
                block5: {
                    lda = new LDA().setK(k).setMaxIter(maxIter).setSubsamplingRate(subsamplingRate);
                    featureSchema = data.schema().apply(features);
                    dataType = featureSchema.dataType();
                    if (!(dataType instanceof StringType)) break block5;
                    pipelineStageArray = (PipelineStage[])Predef$.MODULE$.refArrayOps((Object[])this.getPreStages(features, customizedStopWords, maxVocabSize)).$plus$plus((GenTraversableOnce)Predef$.MODULE$.refArrayOps((Object[])new LDA[]{lda.setFeaturesCol(this.COUNT_VECTOR_COL())}), Array$.MODULE$.canBuildFrom(ClassTag$.MODULE$.apply(PipelineStage.class)));
                    break block6;
                }
                if (!(dataType instanceof VectorUDT)) break block7;
                pipelineStageArray = (PipelineStage[])((Object[])new LDA[]{lda.setFeaturesCol(features)});
            }
            PipelineStage[] stages = pipelineStageArray;
            Object object = topicConcentration != (double)-1 ? lda.setTopicConcentration(topicConcentration) : BoxedUnit.UNIT;
            LDA lDA = docConcentration.length == 1 ? (BoxesRunTime.unboxToDouble((Object)Predef$.MODULE$.doubleArrayOps(docConcentration).head()) != (double)-1 ? lda.setDocConcentration(BoxesRunTime.unboxToDouble((Object)Predef$.MODULE$.doubleArrayOps(docConcentration).head())) : BoxedUnit.UNIT) : lda.setDocConcentration(docConcentration);
            Pipeline pipeline = new Pipeline().setStages(stages);
            Model model = pipeline.fit((Dataset)data);
            DataType dataType = featureSchema.dataType();
            if (dataType instanceof StringType) {
                CountVectorizerModel countVectorModel = (CountVectorizerModel)((PipelineModel)model).stages()[2];
                stringArray = countVectorModel.vocabulary();
            } else {
                stringArray = (String[])Array$.MODULE$.empty(ClassTag$.MODULE$.apply(String.class));
            }
            String[] vocabulary = stringArray;
            LDAModel ldaModel = (LDAModel)Predef$.MODULE$.refArrayOps((Object[])((PipelineModel)model).stages()).last();
            PipelineModel preprocessor = new PipelineModel(new StringContext((Seq)Predef$.MODULE$.wrapRefArray((Object[])new String[]{"", ""})).s((Seq)Predef$.MODULE$.genericWrapArray((Object)new Object[]{Identifiable$.MODULE$.randomUID(pipeline.uid())})), (Transformer[])Predef$.MODULE$.refArrayOps((Object[])((PipelineModel)model).stages()).dropRight(1));
            Dataset<Row> preprocessedData = preprocessor.transform(data);
            return new LDAWrapper((PipelineModel)model, ldaModel.logLikelihood(preprocessedData), ldaModel.logPerplexity(preprocessedData), vocabulary);
        }
        throw new SparkException(new StringBuilder().append((Object)new StringContext((Seq)Predef$.MODULE$.wrapRefArray((Object[])new String[]{"Unsupported input features type of ", ","})).s((Seq)Predef$.MODULE$.genericWrapArray((Object)new Object[]{var14_12.dataType().typeName()}))).append((Object)new StringContext((Seq)Predef$.MODULE$.wrapRefArray((Object[])new String[]{" only String type and Vector type are supported now."})).s((Seq)Nil$.MODULE$)).toString());
    }

    @Override
    public MLReader<LDAWrapper> read() {
        return new LDAWrapper.LDAWrapperReader();
    }

    @Override
    public LDAWrapper load(String path) {
        return (LDAWrapper)MLReadable$class.load(this, path);
    }

    private LDAWrapper$() {
        MODULE$ = this;
        MLReadable$class.$init$(this);
        this.TOKENIZER_COL = new StringContext((Seq)Predef$.MODULE$.wrapRefArray((Object[])new String[]{"", ""})).s((Seq)Predef$.MODULE$.genericWrapArray((Object)new Object[]{Identifiable$.MODULE$.randomUID("rawTokens")}));
        this.STOPWORDS_REMOVER_COL = new StringContext((Seq)Predef$.MODULE$.wrapRefArray((Object[])new String[]{"", ""})).s((Seq)Predef$.MODULE$.genericWrapArray((Object)new Object[]{Identifiable$.MODULE$.randomUID("tokens")}));
        this.COUNT_VECTOR_COL = new StringContext((Seq)Predef$.MODULE$.wrapRefArray((Object[])new String[]{"", ""})).s((Seq)Predef$.MODULE$.genericWrapArray((Object)new Object[]{Identifiable$.MODULE$.randomUID("features")}));
    }
}

