/*
 * Decompiled with CFR 0.152.
 */
package org.dizitart.no2.fulltext;

import java.io.IOException;
import java.util.HashSet;
import java.util.Set;
import java.util.StringTokenizer;
import org.dizitart.no2.fulltext.TextTokenizer;

public abstract class BaseTextTokenizer
implements TextTokenizer {
    private static final String WHITESPACE_CHARS = " \t\n\r\f+\"*%&/()=?'!,.;:-_#@|^~`{}[]<>\\";

    @Override
    public Set<String> tokenize(String text) throws IOException {
        HashSet<String> words = new HashSet<String>();
        if (text != null) {
            StringTokenizer tokenizer = new StringTokenizer(text, WHITESPACE_CHARS);
            while (tokenizer.hasMoreTokens()) {
                String word = tokenizer.nextToken();
                if ((word = this.convertWord(word)) == null) continue;
                words.add(word);
            }
        }
        return words;
    }

    protected String convertWord(String word) {
        word = word.toLowerCase();
        if (this.stopWords().contains(word)) {
            return null;
        }
        return word;
    }
}

