package org.apache.mahout.utils.clustering;

import com.google.common.collect.Lists;
import java.util.Collections;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.TreeMap;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.mahout.clustering.Cluster;
import org.apache.mahout.clustering.cdbw.CDbwEvaluator;
import org.apache.mahout.clustering.classify.WeightedPropertyVectorWritable;
import org.apache.mahout.clustering.evaluation.ClusterEvaluator;
import org.apache.mahout.clustering.evaluation.RepresentativePointsDriver;
import org.apache.mahout.clustering.evaluation.RepresentativePointsMapper;
import org.apache.mahout.clustering.iterator.ClusterWritable;
import org.apache.mahout.common.AbstractJob;
import org.apache.mahout.common.Pair;
import org.apache.mahout.common.distance.DistanceMeasure;
import org.apache.mahout.common.iterator.sequencefile.PathFilters;
import org.apache.mahout.common.iterator.sequencefile.PathType;
import org.apache.mahout.common.iterator.sequencefile.SequenceFileDirIterable;
import org.apache.mahout.common.iterator.sequencefile.SequenceFileDirValueIterable;
import org.apache.mahout.math.NamedVector;
import org.apache.mahout.math.Vector;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.talend.datascience.mahout.clustering.TalendJob;

/* loaded from: input_file:org/apache/mahout/utils/clustering/ClusterDumperSpe.class */
public class ClusterDumperSpe extends AbstractJob {
    public static final String SAMPLE_POINTS = "samplePoints";
    public static final String OUTPUT_OPTION = "output";
    public static final String DICTIONARY_TYPE_OPTION = "dictionaryType";
    public static final String DICTIONARY_OPTION = "dictionary";
    public static final String POINTS_DIR_OPTION = "pointsDir";
    public static final String NUM_WORDS_OPTION = "numWords";
    public static final String SUBSTRING_OPTION = "substring";
    public static final String SEQ_FILE_DIR_OPTION = "seqFileDir";
    public static final String EVALUATE_CLUSTERS = "evaluate";
    public static final String OUTPUT_FORMAT_OPT = "outputFormat";
    private static final Logger log = LoggerFactory.getLogger(ClusterDumperSpe.class);
    private Path seqFileDir;
    private Path pointsDir;
    private String termDictionary;
    private Map<Integer, List<WeightedPropertyVectorWritable>> clusterIdToPoints;
    List<Cluster> clusters = Lists.newArrayList();
    private long maxPointsPerCluster = Long.MAX_VALUE;
    private int subString = Integer.MAX_VALUE;
    private int numTopFeatures = 10;
    private boolean runEvaluation = true;

    /* loaded from: input_file:org/apache/mahout/utils/clustering/ClusterDumperSpe$OUTPUT_FORMAT.class */
    public enum OUTPUT_FORMAT {
        TEXT,
        CSV,
        GRAPH_ML,
        JSON;

        /* renamed from: values, reason: to resolve conflict with enum method */
        public static OUTPUT_FORMAT[] valuesCustom() {
            OUTPUT_FORMAT[] valuesCustom = values();
            int length = valuesCustom.length;
            OUTPUT_FORMAT[] output_formatArr = new OUTPUT_FORMAT[length];
            System.arraycopy(valuesCustom, 0, output_formatArr, 0, length);
            return output_formatArr;
        }
    }

    public ClusterDumperSpe(Path path, Path path2) {
        this.seqFileDir = path;
        this.pointsDir = path2;
        init();
    }

    public ClusterDumperSpe(Configuration configuration, Path path, Path path2, Path path3) {
        this.seqFileDir = path;
        this.pointsDir = path2;
        this.outputPath = path3;
        init(configuration);
    }

    public ClusterDumperSpe() {
        setConf(new Configuration());
    }

    public static void main(String[] strArr) throws Exception {
        new ClusterDumperSpe().run(strArr);
    }

    public int run(String[] strArr) throws Exception {
        return 0;
    }

    public void hdfsWriteClusters(Configuration configuration, String str, String str2) throws Exception {
        FileSystem fileSystem = FileSystem.get(this.outputPath.toUri(), configuration);
        if (fileSystem.exists(this.outputPath)) {
            fileSystem.delete(getOutputPath(), true);
        }
        FSDataOutputStream create = fileSystem.create(this.outputPath, true);
        Iterator it = new SequenceFileDirValueIterable(new Path(this.seqFileDir, "part-*"), PathType.GLOB, configuration).iterator();
        while (it.hasNext()) {
            ClusterWritable clusterWritable = (ClusterWritable) it.next();
            this.clusters.add(clusterWritable.getValue());
            int id = clusterWritable.getValue().getId();
            List<WeightedPropertyVectorWritable> list = this.clusterIdToPoints.get(Integer.valueOf(id));
            if (list != null) {
                for (WeightedPropertyVectorWritable weightedPropertyVectorWritable : list) {
                    NamedVector vector = weightedPropertyVectorWritable.getVector();
                    StringBuilder sb = new StringBuilder();
                    Vector vector2 = weightedPropertyVectorWritable.getVector();
                    String[] split = vector.getName().split(str);
                    int i = 0;
                    int i2 = 0;
                    for (int i3 = 0; i3 < str2.length(); i3++) {
                        char charAt = str2.charAt(i3);
                        if (charAt == 'U') {
                            sb.append(vector2.get(i));
                            sb.append(str);
                            i++;
                        } else {
                            if (charAt != 'N') {
                                throw new IllegalStateException("Unknown format!!!");
                            }
                            sb.append(split[i2]);
                            sb.append(str);
                            i2++;
                        }
                    }
                    sb.append(String.valueOf(id) + "\n");
                    create.writeBytes(sb.toString());
                }
            }
        }
        create.close();
        if (this.runEvaluation) {
            DistanceMeasure distanceMeasure = TalendJob.distanceMeasure;
            Path path = new Path(this.seqFileDir, "tmp/representative");
            RepresentativePointsDriver.run(configuration, this.seqFileDir, this.pointsDir, path, distanceMeasure, 5, true);
            configuration.set("org.apache.mahout.clustering.measure", distanceMeasure.getClass().getName());
            configuration.set("org.apache.mahout.clustering.stateIn", path + "/representativePoints-5");
            log.info(evaluationStrBuilder(new ClusterEvaluator(RepresentativePointsMapper.getRepresentativePoints(configuration), this.clusters, distanceMeasure), new CDbwEvaluator(RepresentativePointsMapper.getRepresentativePoints(configuration), this.clusters, distanceMeasure)).toString());
        }
    }

    private StringBuilder evaluationStrBuilder(ClusterEvaluator clusterEvaluator, CDbwEvaluator cDbwEvaluator) {
        StringBuilder sb = new StringBuilder();
        sb.append("\n -------------------------- Evaluation -------------------------- \n");
        sb.append(" Inter-Cluster Density: \t");
        sb.append(String.valueOf(String.valueOf(clusterEvaluator.interClusterDensity())) + "\n");
        sb.append(" Intra-Cluster Density: \t");
        sb.append(String.valueOf(String.valueOf(clusterEvaluator.intraClusterDensity())) + "\n");
        sb.append(" CDbw Inter-Cluster Density: \t");
        sb.append(String.valueOf(String.valueOf(cDbwEvaluator.interClusterDensity())) + "\n");
        sb.append(" CDbw Intra-Cluster Density: \t");
        sb.append(String.valueOf(String.valueOf(cDbwEvaluator.intraClusterDensity())) + "\n");
        sb.append(" CDbw Separation: \t");
        sb.append(String.valueOf(String.valueOf(cDbwEvaluator.separation())) + "\n");
        sb.append(" ------------------------------------------------------------------ \n");
        return sb;
    }

    private void init() {
        init(new Configuration());
    }

    private void init(Configuration configuration) {
        if (this.pointsDir != null) {
            this.clusterIdToPoints = readPoints(this.pointsDir, this.maxPointsPerCluster, configuration);
        } else {
            this.clusterIdToPoints = Collections.emptyMap();
        }
    }

    public int getSubString() {
        return this.subString;
    }

    public void setSubString(int i) {
        this.subString = i;
    }

    public Map<Integer, List<WeightedPropertyVectorWritable>> getClusterIdToPoints() {
        return this.clusterIdToPoints;
    }

    public String getTermDictionary() {
        return this.termDictionary;
    }

    public void setTermDictionary(String str, String str2) {
        this.termDictionary = str;
    }

    public void setNumTopFeatures(int i) {
        this.numTopFeatures = i;
    }

    public int getNumTopFeatures() {
        return this.numTopFeatures;
    }

    public long getMaxPointsPerCluster() {
        return this.maxPointsPerCluster;
    }

    public void setMaxPointsPerCluster(long j) {
        this.maxPointsPerCluster = j;
    }

    public static Map<Integer, List<WeightedPropertyVectorWritable>> readPoints(Path path, long j, Configuration configuration) {
        TreeMap treeMap = new TreeMap();
        Iterator it = new SequenceFileDirIterable(path, PathType.LIST, PathFilters.logsCRCFilter(), configuration).iterator();
        while (it.hasNext()) {
            Pair pair = (Pair) it.next();
            int i = ((IntWritable) pair.getFirst()).get();
            List list = (List) treeMap.get(Integer.valueOf(i));
            if (list == null) {
                list = Lists.newArrayList();
                treeMap.put(Integer.valueOf(i), list);
            }
            if (list.size() < j) {
                list.add((WeightedPropertyVectorWritable) pair.getSecond());
            }
        }
        return treeMap;
    }
}
