package org.apache.pig.piggybank.evaluation.util.apachelogparser;

import java.io.IOException;
import java.io.UnsupportedEncodingException;
import java.net.MalformedURLException;
import java.net.URL;
import java.net.URLDecoder;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.pig.EvalFunc;
import org.apache.pig.FuncSpec;
import org.apache.pig.data.Tuple;
import org.apache.pig.impl.logicalLayer.FrontendException;
import org.apache.pig.impl.logicalLayer.schema.Schema;

/* loaded from: input_file:org/apache/pig/piggybank/evaluation/util/apachelogparser/SearchTermExtractor.class */
public class SearchTermExtractor extends EvalFunc<String> {
    private static Matcher TERM_MATCHER;
    private static Matcher P_TERM_MATCHER;
    private static HashMap<String, Boolean> HOSTS = new HashMap<>();

    private String myDecode(String str) {
        try {
            str = URLDecoder.decode(str, "UTF-8");
        } catch (UnsupportedEncodingException e) {
            e.printStackTrace();
        }
        return str;
    }

    /* renamed from: exec, reason: merged with bridge method [inline-methods] */
    public String m104exec(Tuple tuple) throws IOException {
        URL url;
        String host;
        String replaceFirst;
        String path;
        if (tuple == null || tuple.size() == 0) {
            return null;
        }
        try {
            String str = (String) tuple.get(0);
            if (str == null || (url = new URL(str)) == null || (host = url.getHost()) == null || (replaceFirst = host.replaceFirst("^www\\.", "")) == null) {
                return null;
            }
            String lowerCase = replaceFirst.toLowerCase();
            if (HOSTS.containsKey(lowerCase) || lowerCase.contains("google.co") || lowerCase.contains("search.yahoo")) {
                String query = url.getQuery();
                if (query == null) {
                    return null;
                }
                TERM_MATCHER.reset(query);
                if (TERM_MATCHER.find()) {
                    return myDecode(TERM_MATCHER.group(1));
                }
                P_TERM_MATCHER.reset(query);
                if (P_TERM_MATCHER.find()) {
                    return myDecode(P_TERM_MATCHER.group(1));
                }
            }
            if ((lowerCase.endsWith("feedster.com") || lowerCase.endsWith("technorati.com")) && (path = url.getPath()) != null) {
                return myDecode(path.replaceFirst("^/search/", ""));
            }
            return null;
        } catch (MalformedURLException e) {
            return null;
        } catch (Exception e2) {
            throw new IOException("Caught exception processing input row ", e2);
        }
    }

    public List<FuncSpec> getArgToFuncMapping() throws FrontendException {
        ArrayList arrayList = new ArrayList();
        arrayList.add(new FuncSpec(getClass().getName(), new Schema(new Schema.FieldSchema((String) null, (byte) 55))));
        return arrayList;
    }

    static {
        TERM_MATCHER = null;
        P_TERM_MATCHER = null;
        TERM_MATCHER = Pattern.compile("\\b(?:q|buscar|key|qry|qs|query|s|searchfor|su|w)=([^&]+)").matcher("");
        P_TERM_MATCHER = Pattern.compile("\\bp=([^&]+)").matcher("");
        HOSTS.put("alltheweb.com", true);
        HOSTS.put("altavista.com", true);
        HOSTS.put("aolsearch.aol.com", true);
        HOSTS.put("arianna.libero.it", true);
        HOSTS.put("as.starware.com", true);
        HOSTS.put("ask.com", true);
        HOSTS.put("blogs.icerocket.com", true);
        HOSTS.put("blueyonder.co.uk", true);
        HOSTS.put("busca.orange.es", true);
        HOSTS.put("buscador.lycos.es", true);
        HOSTS.put("buscador.terra.es", true);
        HOSTS.put("buscar.ozu.es", true);
        HOSTS.put("categorico.it", true);
        HOSTS.put("cerca.lycos.it", true);
        HOSTS.put("cuil.com", true);
        HOSTS.put("excite.it", true);
        HOSTS.put("godado.com", true);
        HOSTS.put("godado.it", true);
        HOSTS.put("gps.virgin.net", true);
        HOSTS.put("hotbot.com", true);
        HOSTS.put("ilmotore.com", true);
        HOSTS.put("it.altavista.com", true);
        HOSTS.put("ithaki.net", true);
        HOSTS.put("libero.it", true);
        HOSTS.put("lycos.es", true);
        HOSTS.put("lycos.it", true);
        HOSTS.put("mamma.com", true);
        HOSTS.put("megasearching.net", true);
        HOSTS.put("mirago.co.uk", true);
        HOSTS.put("netscape.com", true);
        HOSTS.put("ozu.es", true);
        HOSTS.put("ricerca.alice.it", true);
        HOSTS.put("search.aol.co.uk", true);
        HOSTS.put("search.bbc.co.uk", true);
        HOSTS.put("search.conduit.com", true);
        HOSTS.put("search.icq.com", true);
        HOSTS.put("search.live.com", true);
        HOSTS.put("search.lycos.co.uk", true);
        HOSTS.put("search.lycos.com", true);
        HOSTS.put("search.msn.co.uk", true);
        HOSTS.put("search.msn.com", true);
        HOSTS.put("search.myway.com", true);
        HOSTS.put("search.mywebsearch.com", true);
        HOSTS.put("search.ntlworld.com", true);
        HOSTS.put("search.orange.co.uk", true);
        HOSTS.put("search.sweetim.com", true);
        HOSTS.put("search.virginmedia.com", true);
        HOSTS.put("simpatico.ws", true);
        HOSTS.put("soso.com", true);
        HOSTS.put("suche.fireball.de", true);
        HOSTS.put("suche.web.de", true);
        HOSTS.put("terra.es", true);
        HOSTS.put("tesco.net", true);
        HOSTS.put("thespider.it", true);
        HOSTS.put("tiscali.co.uk", true);
        HOSTS.put("uk.altavista.com", true);
        HOSTS.put("uk.ask.com", true);
    }
}
