package uk.ac.shef.wit.simmetrics.tokenisers;

import java.io.Serializable;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.Set;
import net.sf.ehcache.config.TimeoutBehaviorConfiguration;
import uk.ac.shef.wit.simmetrics.wordhandlers.DummyStopTermHandler;
import uk.ac.shef.wit.simmetrics.wordhandlers.InterfaceTermHandler;

/* loaded from: input_file:uk/ac/shef/wit/simmetrics/tokenisers/TokeniserCSVBasic.class */
public final class TokeniserCSVBasic implements InterfaceTokeniser, Serializable {
    private InterfaceTermHandler stopWordHandler = new DummyStopTermHandler();
    private final String delimiters = TimeoutBehaviorConfiguration.DEFAULT_PROPERTY_SEPARATOR;

    @Override // uk.ac.shef.wit.simmetrics.tokenisers.InterfaceTokeniser
    public final String getShortDescriptionString() {
        return "TokeniserCSVBasic";
    }

    @Override // uk.ac.shef.wit.simmetrics.tokenisers.InterfaceTokeniser
    public final String getDelimiters() {
        return TimeoutBehaviorConfiguration.DEFAULT_PROPERTY_SEPARATOR;
    }

    @Override // uk.ac.shef.wit.simmetrics.tokenisers.InterfaceTokeniser
    public InterfaceTermHandler getStopWordHandler() {
        return this.stopWordHandler;
    }

    @Override // uk.ac.shef.wit.simmetrics.tokenisers.InterfaceTokeniser
    public void setStopWordHandler(InterfaceTermHandler interfaceTermHandler) {
        this.stopWordHandler = interfaceTermHandler;
    }

    @Override // uk.ac.shef.wit.simmetrics.tokenisers.InterfaceTokeniser
    public final ArrayList<String> tokenizeToArrayList(String str) {
        ArrayList<String> arrayList = new ArrayList<>();
        int i = 0;
        while (true) {
            int i2 = i;
            if (i2 >= str.length()) {
                return arrayList;
            }
            if (Character.isWhitespace(str.charAt(i2))) {
                i2++;
            }
            int length = str.length();
            for (int i3 = 0; i3 < TimeoutBehaviorConfiguration.DEFAULT_PROPERTY_SEPARATOR.length(); i3++) {
                int indexOf = str.indexOf(TimeoutBehaviorConfiguration.DEFAULT_PROPERTY_SEPARATOR.charAt(i3), i2);
                if (indexOf < length && indexOf != -1) {
                    length = indexOf;
                }
            }
            String substring = str.substring(i2, length);
            if (!this.stopWordHandler.isWord(substring) && !substring.equals(" ")) {
                arrayList.add(substring);
            }
            i = length;
        }
    }

    @Override // uk.ac.shef.wit.simmetrics.tokenisers.InterfaceTokeniser
    public Set<String> tokenizeToSet(String str) {
        HashSet hashSet = new HashSet();
        hashSet.addAll(tokenizeToArrayList(str));
        return hashSet;
    }
}
