/*
 * Decompiled with CFR 0.152.
 */
package opennlp.grok.preprocess.postag;

import java.io.BufferedReader;
import java.io.IOException;
import java.io.Reader;
import java.io.StringReader;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Map;
import java.util.Set;
import java.util.StringTokenizer;
import opennlp.common.util.Pair;
import opennlp.grok.preprocess.postag.POSContextGenerator;
import opennlp.maxent.ContextGenerator;
import opennlp.maxent.Counter;
import opennlp.maxent.Event;
import opennlp.maxent.EventCollector;

public class POSEventCollector
implements EventCollector {
    private BufferedReader br;
    private ContextGenerator cg;

    public POSEventCollector(Reader data, ContextGenerator gen) {
        this.br = new BufferedReader(data);
        this.cg = gen;
    }

    private static Pair split(String s) {
        int split = s.lastIndexOf("/");
        if (split == -1) {
            System.out.println("There is a problem in your training data: " + s + " does not conform to the format WORD/TAG.");
            return new Pair((Object)s, (Object)"UNKNOWN");
        }
        return new Pair((Object)s.substring(0, split), (Object)s.substring(split + 1));
    }

    public static Pair convertAnnotatedString(String s) {
        ArrayList<Object> tokens = new ArrayList<Object>();
        ArrayList<Object> outcomes = new ArrayList<Object>();
        StringTokenizer st = new StringTokenizer(s);
        while (st.hasMoreTokens()) {
            Pair p = POSEventCollector.split(st.nextToken());
            tokens.add(p.a);
            outcomes.add(p.b);
        }
        return new Pair(tokens, outcomes);
    }

    public Event[] getEvents() {
        return this.getEvents(false);
    }

    private Set getFrequent(BufferedReader br) {
        HashMap<String, Counter> map = new HashMap<String, Counter>();
        try {
            String s = br.readLine();
            while (s != null) {
                StringTokenizer st = new StringTokenizer(s);
                while (st.hasMoreTokens()) {
                    String tok = (String)POSEventCollector.split((String)st.nextToken()).a;
                    Counter c = (Counter)map.get(tok);
                    if (c != null) {
                        c.increment();
                        continue;
                    }
                    map.put(tok, new Counter());
                }
                s = br.readLine();
            }
        }
        catch (IOException e) {
            e.printStackTrace();
        }
        HashSet set = new HashSet();
        Iterator i = map.entrySet().iterator();
        while (i.hasNext()) {
            Map.Entry entry = i.next();
            if (!((Counter)entry.getValue()).passesCutoff(5)) continue;
            set.add(entry.getKey());
        }
        return set;
    }

    public Event[] getEvents(boolean evalMode) {
        ArrayList<Event> elist = new ArrayList<Event>();
        if (!evalMode) {
            System.out.println("Reading in all the data");
            try {
                StringBuffer sb = new StringBuffer();
                String s = this.br.readLine();
                while (s != null) {
                    sb.append(s + "\n");
                    s = this.br.readLine();
                }
                System.out.println("Getting most frequent words");
                Set frequent = this.getFrequent(new BufferedReader(new StringReader(sb.toString())));
                this.br = new BufferedReader(new StringReader(sb.toString()));
                sb = null;
            }
            catch (IOException e) {
                e.printStackTrace();
            }
        }
        try {
            String s = this.br.readLine();
            while (s != null) {
                Pair p = POSEventCollector.convertAnnotatedString(s);
                ArrayList tokens = (ArrayList)p.a;
                ArrayList outcomes = (ArrayList)p.b;
                ArrayList tags = new ArrayList();
                int i = 0;
                while (i < tokens.size()) {
                    Object[] params = new Object[]{tokens, tags, new Integer(i)};
                    String[] context = this.cg.getContext((Object)params);
                    Event e = new Event((String)outcomes.get(i), context);
                    tags.add(outcomes.get(i));
                    elist.add(e);
                    ++i;
                }
                s = this.br.readLine();
            }
        }
        catch (Exception e) {
            e.printStackTrace();
        }
        Event[] events = new Event[elist.size()];
        int i = 0;
        while (i < events.length) {
            events[i] = (Event)elist.get(i);
            ++i;
        }
        return events;
    }

    public static void main(String[] args) {
        String data = "the/DT stories/NNS about/IN well-heeled/JJ communities/NNS and/CC developers/NNS";
        POSEventCollector ec = new POSEventCollector(new StringReader(data), new POSContextGenerator());
        Event[] events = ec.getEvents();
        int i = 0;
        while (i < events.length) {
            System.out.println(events[i].getOutcome());
            ++i;
        }
    }
}

