package ai.djl.basicdataset.nlp;

import ai.djl.Application;
import ai.djl.basicdataset.nlp.TextDataset;
import ai.djl.modality.nlp.embedding.EmbeddingException;
import ai.djl.ndarray.NDList;
import ai.djl.ndarray.NDManager;
import ai.djl.ndarray.types.DataType;
import ai.djl.repository.Artifact;
import ai.djl.repository.MRL;
import ai.djl.training.dataset.Record;
import ai.djl.util.Progress;
import com.ibm.icu.impl.locale.BaseLocale;
import java.io.File;
import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.ArrayList;
import java.util.List;

/* loaded from: input_file:ai/djl/basicdataset/nlp/StanfordMovieReview.class */
public class StanfordMovieReview extends TextDataset {
    private static final String VERSION = "1.0";
    private static final String ARTIFACT_ID = "stanford-movie-review";
    private List<Boolean> reviewSentiments;
    private List<Integer> reviewImdbScore;

    /* loaded from: input_file:ai/djl/basicdataset/nlp/StanfordMovieReview$Builder.class */
    public static class Builder extends TextDataset.Builder<Builder> {
        public Builder() {
            this.artifactId = StanfordMovieReview.ARTIFACT_ID;
        }

        /* JADX INFO: Access modifiers changed from: protected */
        @Override // ai.djl.training.dataset.RandomAccessDataset.BaseBuilder
        public Builder self() {
            return this;
        }

        public StanfordMovieReview build() {
            return new StanfordMovieReview(this);
        }

        MRL getMrl() {
            return this.repository.dataset(Application.NLP.ANY, this.groupId, this.artifactId, StanfordMovieReview.VERSION);
        }
    }

    protected StanfordMovieReview(Builder builder) {
        super(builder);
        this.usage = builder.usage;
        this.mrl = builder.getMrl();
    }

    public static Builder builder() {
        return new Builder();
    }

    @Override // ai.djl.training.dataset.Dataset
    public void prepare(Progress progress) throws IOException, EmbeddingException {
        Path path;
        if (this.prepared) {
            return;
        }
        Artifact defaultArtifact = this.mrl.getDefaultArtifact();
        this.mrl.prepare(defaultArtifact, progress);
        Path resolve = this.mrl.getRepository().getCacheDirectory().resolve(defaultArtifact.getResourceUri().getPath()).resolve("aclImdb").resolve("aclImdb");
        switch (this.usage) {
            case TRAIN:
                path = Paths.get("train", new String[0]);
                break;
            case TEST:
                path = Paths.get("test", new String[0]);
                break;
            case VALIDATION:
            default:
                throw new UnsupportedOperationException("Validation data not available.");
        }
        Path resolve2 = resolve.resolve(path);
        ArrayList arrayList = new ArrayList();
        this.reviewSentiments = new ArrayList();
        this.reviewImdbScore = new ArrayList();
        prepareDataSentiment(resolve2.resolve("pos"), true, arrayList);
        prepareDataSentiment(resolve2.resolve("neg"), false, arrayList);
        preprocess(arrayList, true);
        this.prepared = true;
    }

    private void prepareDataSentiment(Path path, boolean z, List<String> list) throws IOException {
        File file = path.toFile();
        if (!file.exists()) {
            throw new IllegalArgumentException("Could not find Stanford Movie Review dataset");
        }
        File[] listFiles = file.listFiles((v0) -> {
            return v0.isFile();
        });
        if (listFiles == null) {
            throw new IllegalArgumentException("Could not find files in Stanford Movie Review dataset");
        }
        for (File file2 : listFiles) {
            String str = new String(Files.readAllBytes(file2.toPath()), StandardCharsets.UTF_8);
            String[] split = file2.getName().split("\\.")[0].split(BaseLocale.SEP);
            list.add(str);
            this.reviewSentiments.add(Boolean.valueOf(z));
            this.reviewImdbScore.add(Integer.valueOf(Integer.parseInt(split[1])));
        }
    }

    @Override // ai.djl.training.dataset.RandomAccessDataset
    public Record get(NDManager nDManager, long j) {
        NDList nDList = new NDList();
        nDList.add(this.sourceTextData.getEmbedding(nDManager, j));
        return new Record(nDList, new NDList(nDManager.create(this.reviewSentiments.get(Math.toIntExact(j)).booleanValue()).toType(DataType.INT32, false)));
    }

    /* JADX INFO: Access modifiers changed from: protected */
    @Override // ai.djl.training.dataset.RandomAccessDataset
    public long availableSize() {
        return this.sourceTextData.getSize();
    }
}
