package org.apache.mahout.clustering.kmeans;

import com.google.common.collect.Lists;
import com.google.common.io.Closeables;
import java.io.IOException;
import java.util.ArrayList;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat;
import org.apache.mahout.clustering.canopy.CanopyDriverSpe;
import org.apache.mahout.clustering.classify.ClusterClassificationDriverSpe;
import org.apache.mahout.clustering.classify.ClusterClassifierSpe;
import org.apache.mahout.clustering.iterator.CIMapperSpe;
import org.apache.mahout.clustering.iterator.CIReducerSpe;
import org.apache.mahout.clustering.iterator.ClusterIterator;
import org.apache.mahout.clustering.iterator.ClusterWritable;
import org.apache.mahout.clustering.iterator.ClusteringPolicy;
import org.apache.mahout.clustering.iterator.KMeansClusteringPolicy;
import org.apache.mahout.common.distance.DistanceMeasure;
import org.apache.mahout.common.iterator.sequencefile.PathFilters;
import org.apache.mahout.common.iterator.sequencefile.SequenceFileValueIterator;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:org/apache/mahout/clustering/kmeans/KMeansDriverSpe.class */
public class KMeansDriverSpe extends KMeansDriver {
    private static final Logger log = LoggerFactory.getLogger(KMeansDriverSpe.class);
    public static final String PRIOR_PATH_KEY = "org.apache.mahout.clustering.prior.path";

    public static Path buildClustersSpe(Configuration configuration, Path path, Path path2, Path path3, DistanceMeasure distanceMeasure, int i, String str, boolean z) throws IOException, InterruptedException, ClassNotFoundException {
        double parseDouble = Double.parseDouble(str);
        ArrayList newArrayList = Lists.newArrayList();
        KMeansUtil.configureWithClusterInfo(configuration, path2, newArrayList);
        if (newArrayList.isEmpty()) {
            throw new IllegalStateException("No input clusters found in " + path2 + ". Check your -c argument.");
        }
        Path path4 = new Path(path3, "clusters-0");
        new ClusterClassifierSpe(newArrayList, new KMeansClusteringPolicy(parseDouble)).writeToSeqFiles(configuration, path4);
        if (z) {
            ClusterIterator.iterateSeq(configuration, path, path4, path3, i);
        } else {
            iterateMRSpe(configuration, path, path4, path3, i);
        }
        return path3;
    }

    public static void runSpe(Configuration configuration, Path path, Path path2, Path path3, DistanceMeasure distanceMeasure, double d, int i, boolean z, double d2, boolean z2) throws IOException, InterruptedException, ClassNotFoundException {
        String d3 = Double.toString(d);
        if (log.isInfoEnabled()) {
            log.info("convergence: {} max Iterations: {}", Double.valueOf(d), Integer.valueOf(i));
        }
        Path buildClustersSpe = buildClustersSpe(configuration, path, path2, path3, distanceMeasure, i, d3, z2);
        if (z) {
            log.info("Clustering data");
            clusterDataSpe(configuration, path, buildClustersSpe, path3, distanceMeasure, d2, z2);
        }
    }

    public static void iterateMRSpe(Configuration configuration, Path path, Path path2, Path path3, int i) throws IOException, InterruptedException, ClassNotFoundException {
        ClusteringPolicy readPolicy = ClusterClassifierSpe.readPolicy(configuration, path2);
        Path path4 = null;
        int i2 = 1;
        while (i2 <= i) {
            configuration.set("org.apache.mahout.clustering.prior.path", path2.toString());
            Job job = new Job(configuration, "Cluster Iterator running iteration " + i2 + " over priorPath: " + path2);
            job.setMapOutputKeyClass(IntWritable.class);
            job.setMapOutputValueClass(ClusterWritable.class);
            job.setOutputKeyClass(IntWritable.class);
            job.setOutputValueClass(ClusterWritable.class);
            job.setInputFormatClass(SequenceFileInputFormat.class);
            job.setOutputFormatClass(SequenceFileOutputFormat.class);
            job.setMapperClass(CIMapperSpe.class);
            job.setReducerClass(CIReducerSpe.class);
            FileInputFormat.addInputPath(job, path);
            path4 = new Path(path3, "clusters-" + i2);
            path2 = path4;
            FileOutputFormat.setOutputPath(job, path4);
            job.setJarByClass(ClusterIterator.class);
            if (!job.waitForCompletion(true)) {
                throw new InterruptedException("Cluster Iteration " + i2 + " failed processing " + path2);
            }
            ClusterClassifierSpe.writePolicy(configuration, readPolicy, path4);
            i2++;
            if (isConverged(path4, configuration, FileSystem.get(path3.toUri(), configuration))) {
                break;
            }
        }
        FileSystem.get(path4.toUri(), configuration).rename(path4, new Path(path3, "clusters-" + (i2 - 1) + "-final"));
    }

    private static boolean isConverged(Path path, Configuration configuration, FileSystem fileSystem) throws IOException {
        for (FileStatus fileStatus : fileSystem.listStatus(path, PathFilters.partFilter())) {
            SequenceFileValueIterator sequenceFileValueIterator = new SequenceFileValueIterator(fileStatus.getPath(), true, configuration);
            while (sequenceFileValueIterator.hasNext()) {
                if (!((ClusterWritable) sequenceFileValueIterator.next()).getValue().isConverged()) {
                    Closeables.close(sequenceFileValueIterator, true);
                    return false;
                }
            }
        }
        return true;
    }

    public static void clusterDataSpe(Configuration configuration, Path path, Path path2, Path path3, DistanceMeasure distanceMeasure, double d, boolean z) throws IOException, InterruptedException, ClassNotFoundException {
        if (log.isInfoEnabled()) {
            log.info("Running Clustering");
            log.info("Input: {} Clusters In: {} Out: {} Distance: {}", new Object[]{path, path2, path3, distanceMeasure});
        }
        ClusterClassifierSpe.writePolicy(configuration, new KMeansClusteringPolicy(), path2);
        ClusterClassificationDriverSpe.run(configuration, path, path3, new Path(path3, CanopyDriverSpe.DEFAULT_CLUSTERED_POINTS_DIRECTORY), Double.valueOf(d), true, z);
    }
}
