package org.talend.windowkey;

import java.util.Iterator;
import java.util.TreeSet;
import java.util.regex.Pattern;

/* loaded from: input_file:org/talend/windowkey/NGramFingerprintKeyer.class */
public class NGramFingerprintKeyer extends FingerprintKeyer {
    private static final Pattern alphanum = Pattern.compile("\\p{Punct}|\\p{Cntrl}|\\p{Space}");

    @Override // org.talend.windowkey.FingerprintKeyer
    public String key(String str) {
        return key(str, 2);
    }

    public String key(String str, int i) {
        TreeSet<String> ngram_split = ngram_split(alphanum.matcher(str.toLowerCase()).replaceAll(""), i);
        StringBuilder sb = new StringBuilder();
        Iterator<String> it = ngram_split.iterator();
        while (it.hasNext()) {
            sb.append(it.next());
        }
        return asciify(sb.toString());
    }

    protected TreeSet<String> ngram_split(String str, int i) {
        TreeSet<String> treeSet = new TreeSet<>();
        for (int i2 = 0; i2 + i <= str.codePoints().count(); i2++) {
            treeSet.add(str.substring(str.offsetByCodePoints(0, i2), str.offsetByCodePoints(0, i2 + i)));
        }
        return treeSet;
    }
}
