package com.ibm.nlu.tools.ac;

import com.ibm.nlu.adt.Int;
import com.ibm.nlu.engines.AC;
import com.ibm.nlu.engines.Extractor;
import com.ibm.nlu.engines.Feature;
import com.ibm.nlu.tools.SentenceList;
import com.ibm.nlu.tools.SentenceTree;
import com.ibm.nlu.util.IO;
import com.ibm.nlu.util.StringList;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;

/* loaded from: input_file:plugins/com.ibm.nlutools.utilities_6.0.0/nlu.jar:com/ibm/nlu/tools/ac/TFIDFTrainer.class */
public class TFIDFTrainer extends AC implements Trainer {
    ArrayList[] ac_ngl_cutoff;
    StringList nglist = new StringList();
    int unigramMaxDocs = 5;
    int bigramMaxDocs = 5;
    int trigramMaxDocs = 5;
    int quadgramMaxDocs = 5;
    int unigramCutoff = 95;
    int bigramCutoff = 90;
    int trigramCutoff = 90;
    int quadgramCutoff = 90;
    protected boolean doSmooth = true;
    protected int ngramOrder = 4;
    HashMap counts = new HashMap(1000);

    public TFIDFTrainer() {
        this.extractor = new Extractor();
    }

    @Override // com.ibm.nlu.tools.ac.Trainer
    public void addSentence(String str, String str2, double d) {
        Feature[] extract = this.extractor.extract(str);
        for (int i = 0; i < extract.length; i++) {
            HashMap hashMap = (HashMap) this.counts.get(extract[i].name);
            if (hashMap == null) {
                hashMap = new HashMap();
                this.counts.put(extract[i].name, hashMap);
                this.nglist.add(extract[i].name);
            }
            Int r0 = (Int) hashMap.get(str2);
            if (r0 == null) {
                hashMap.put(str2, new Int((int) d));
            } else {
                r0.v += (int) d;
            }
        }
        if (this.targetMap.containsKey(str2)) {
            return;
        }
        this.targetMap.put(str2, new Int(this.targetList.size()));
        this.targetList.add(str2);
    }

    @Override // com.ibm.nlu.tools.ac.Trainer
    public void addSentence(SentenceData sentenceData) {
        addSentence(sentenceData.text, sentenceData.target, sentenceData.weight);
    }

    @Override // com.ibm.nlu.tools.ac.Trainer
    public void addSentence(SentenceData[] sentenceDataArr) {
        for (SentenceData sentenceData : sentenceDataArr) {
            addSentence(sentenceData);
        }
    }

    private void computeTFIDF() {
        HashMap hashMap = new HashMap();
        HashMap hashMap2 = new HashMap();
        double d = 0.0d;
        int size = this.targetMap.size();
        Iterator it = this.nglist.iterator();
        while (it.hasNext()) {
            String str = (String) it.next();
            HashMap hashMap3 = (HashMap) this.counts.get(str);
            double log = Math.log((size + 1) / hashMap3.size());
            hashMap2.put(str, new Double(log));
            for (Map.Entry entry : hashMap3.entrySet()) {
                double log2 = Math.log(((Int) entry.getValue()).v) + 1.0d;
                String str2 = (String) entry.getKey();
                ArrayList arrayList = (ArrayList) hashMap.get(str);
                if (arrayList == null) {
                    arrayList = new ArrayList();
                    hashMap.put(str, arrayList);
                }
                arrayList.add(new AC.TargetWt(((Int) this.targetMap.get(str2)).v, log2));
                d += log2 * log * log2 * log;
            }
        }
        double sqrt = Math.sqrt(d);
        System.out.println(new StringBuffer().append("RMS=").append(Double.toString(sqrt)).toString());
        double d2 = 1.0d / sqrt;
        for (Map.Entry entry2 : hashMap.entrySet()) {
            String str3 = (String) entry2.getKey();
            Iterator it2 = ((ArrayList) entry2.getValue()).iterator();
            while (it2.hasNext()) {
                ((AC.TargetWt) it2.next()).wt = (float) (r0.wt * ((Double) hashMap2.get(str3)).doubleValue() * d2);
            }
        }
        this.model = hashMap;
    }

    @Override // com.ibm.nlu.tools.ac.Trainer
    public AC train(String str) throws Exception {
        computeTFIDF();
        if (this.doSmooth) {
            smooth("tmpac.unsmooth", str, this.unigramMaxDocs, this.bigramMaxDocs, this.trigramMaxDocs, this.quadgramMaxDocs, this.unigramCutoff, this.bigramCutoff, this.trigramCutoff, this.quadgramCutoff);
            save("tmpac.unsmooth", true);
        }
        return this;
    }

    @Override // com.ibm.nlu.tools.ac.Trainer
    public AC trainIncremental(AC ac) {
        return ac;
    }

    @Override // com.ibm.nlu.tools.ac.Trainer
    public void setIterations(int i) {
    }

    @Override // com.ibm.nlu.tools.ac.Trainer
    public int getIterations() {
        return 0;
    }

    @Override // com.ibm.nlu.tools.ac.Trainer
    public int numSteps() {
        return 0;
    }

    @Override // com.ibm.nlu.tools.ac.Trainer
    public void addIterListener(TrainerListener trainerListener) {
    }

    @Override // com.ibm.nlu.tools.ac.Trainer
    public List trainingReport() {
        return new ArrayList();
    }

    @Override // com.ibm.nlu.tools.ac.Trainer
    public void setSpeedVsAccuracy(double d) {
    }

    @Override // com.ibm.nlu.tools.ac.Trainer
    public int getSpeedVsAccuracy() {
        return 0;
    }

    @Override // com.ibm.nlu.tools.ac.Trainer
    public void setParam(String str, String str2) {
    }

    @Override // com.ibm.nlu.tools.ac.Trainer
    public void setParam(String str, double d) {
    }

    @Override // com.ibm.nlu.tools.ac.Trainer
    public void setParam(String str, int i) {
    }

    public void setSmooth(boolean z) {
        this.doSmooth = z;
    }

    private native long init();

    private native void add(long j, String str, String str2, double d);

    private native void build(String str);

    private native void write(String str, boolean z);

    int ngramlen(String str) {
        int i = 1;
        for (int i2 = 0; i2 < str.length(); i2++) {
            if (str.charAt(i2) == ' ') {
                i++;
            }
        }
        return i;
    }

    /* JADX WARN: Can't fix incorrect switch cases order, some code will duplicate */
    /* JADX WARN: Code restructure failed: missing block: B:33:0x01ab, code lost:
    
        if (r15 >= r0[r12][r13].size()) goto L43;
     */
    /* JADX WARN: Code restructure failed: missing block: B:34:0x01ae, code lost:
    
        r6.ac_ngl_cutoff[r12].add(r0[r12][r13].get(r15));
     */
    /* JADX WARN: Code restructure failed: missing block: B:36:0x01c6, code lost:
    
        r13 = r13 + 1;
     */
    /* JADX WARN: Multi-variable type inference failed */
    /* JADX WARN: Removed duplicated region for block: B:26:0x0125  */
    /*
        Code decompiled incorrectly, please refer to instructions dump.
        To view partially-correct add '--show-bad-code' argument
    */
    void setcutoffs(int r7, int r8, int r9, int r10) {
        /*
            Method dump skipped, instructions count: 467
            To view this dump add '--comments-level debug' option
        */
        throw new UnsupportedOperationException("Method not decompiled: com.ibm.nlu.tools.ac.TFIDFTrainer.setcutoffs(int, int, int, int):void");
    }

    /* JADX WARN: Failed to find 'out' block for switch in B:5:0x005e. Please report as an issue. */
    void smooth(String str, String str2, int i, int i2, int i3, int i4, int i5, int i6, int i7, int i8) {
        setcutoffs(i5, i6, i7, i8);
        for (Map.Entry entry : this.model.entrySet()) {
            String str3 = (String) entry.getKey();
            ArrayList arrayList = (ArrayList) entry.getValue();
            int ngramlen = ngramlen(str3);
            int size = arrayList.size();
            int i9 = 0;
            switch (ngramlen) {
                case 1:
                    i9 = i;
                    break;
                case 2:
                    i9 = i2;
                    break;
                case 3:
                    i9 = i3;
                    break;
                case 4:
                    i9 = i4;
                    break;
            }
            if (arrayList.contains("MUMBLE")) {
                size--;
            }
            if (size > i9) {
                double d = ((AC.TargetWt) Collections.max(arrayList)).wt;
                for (int i10 = 0; i10 < arrayList.size(); i10++) {
                    AC.TargetWt targetWt = (AC.TargetWt) arrayList.get(i10);
                    if (targetWt.wt < ((Double) this.ac_ngl_cutoff[targetWt.id].get(ngramlen - 1)).doubleValue()) {
                        targetWt.wt = (float) d;
                    }
                }
            }
        }
    }

    public static void main(String[] strArr) throws Exception {
        boolean z;
        String str;
        String str2;
        long currentTimeMillis = System.currentTimeMillis();
        if (strArr.length < 2) {
            System.err.println("train [-s] doclist outprefix");
            System.exit(-1);
        }
        if (strArr[0].equals("-s")) {
            z = true;
            str = strArr[1];
            str2 = strArr[2];
        } else {
            z = true;
            str = strArr[0];
            str2 = strArr[1];
        }
        TFIDFTrainer tFIDFTrainer = new TFIDFTrainer();
        tFIDFTrainer.setSmooth(z);
        if (!IO.getExtension(str).equals("xml")) {
            DocList docList = new DocList(str);
            while (true) {
                SentenceData[] next = docList.next();
                if (next == null) {
                    break;
                } else {
                    tFIDFTrainer.addSentence(next);
                }
            }
        } else {
            SentenceList sentenceList = new SentenceList(str);
            while (true) {
                SentenceTree next2 = sentenceList.next();
                if (next2 == null) {
                    break;
                } else {
                    tFIDFTrainer.addSentence(new SentenceData(next2.getClassedText(), next2.getActions(0), next2.getWeight()));
                }
            }
        }
        tFIDFTrainer.train("").save(new StringBuffer().append(str2).append(".acmod").toString(), true);
        System.out.println(new StringBuffer().append("Elapsed time = ").append((System.currentTimeMillis() - currentTimeMillis) / 1000).toString());
    }
}
