package com.ibm.dltj.crf.feature;

import com.ibm.dltj.DLTException;
import com.ibm.dltj.crf.CRFDictionary;
import com.ibm.dltj.gloss.ZhLemmaGloss;
import com.ibm.dltj.util.FileUtils;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.nio.charset.Charset;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

/* loaded from: input_file:dlt.jar:com/ibm/dltj/crf/feature/CSVFeatureTableParser.class */
public class CSVFeatureTableParser {
    private final CRFDictionary _dic;
    private static final String CSV = "(?:^|,)(?:\"((?:[^\"]|\"\")*)\"|([^,\"]*))";
    private static final String TSV = "(?:^|\t)(?:\"((?:[^\"]|\"\")*)\"|([^\t\"]*))";
    private static final String EMPTY = "[,]+";
    static final /* synthetic */ boolean $assertionsDisabled;

    static String getCopyright() {
        return "\n\n(C) Copyright IBM Corp. 2003, 2010.\n\n";
    }

    public CSVFeatureTableParser(CRFDictionary cRFDictionary) {
        if (!$assertionsDisabled && cRFDictionary == null) {
            throw new AssertionError();
        }
        this._dic = cRFDictionary;
    }

    public List<FeatureTable> parse(File file, Charset charset) throws IOException, DLTException {
        if (file == null || charset == null) {
            throw new IllegalArgumentException();
        }
        Pattern compile = Pattern.compile(file.getName().endsWith(FileUtils.SUFFIX_CSV) ? CSV : TSV);
        Matcher matcher = Pattern.compile(EMPTY).matcher(ZhLemmaGloss.ZHLEMMA_SAME);
        ArrayList arrayList = new ArrayList();
        ArrayList arrayList2 = new ArrayList();
        BufferedReader bufferedReader = null;
        try {
            bufferedReader = new BufferedReader(new InputStreamReader(new FileInputStream(file), charset));
            while (true) {
                String readLine = bufferedReader.readLine();
                if (readLine == null) {
                    break;
                }
                if (readLine.length() != 0 && !matcher.reset(readLine).matches()) {
                    arrayList2.add(readLine);
                } else if (!arrayList2.isEmpty()) {
                    arrayList.add(parse(arrayList2, compile));
                    arrayList2.clear();
                }
            }
            if (bufferedReader != null) {
                bufferedReader.close();
            }
            if (!arrayList2.isEmpty()) {
                arrayList.add(parse(arrayList2, compile));
            }
            return arrayList;
        } catch (Throwable th) {
            if (bufferedReader != null) {
                bufferedReader.close();
            }
            throw th;
        }
    }

    public FeatureTable parse(List<String> list, Pattern pattern) throws DLTException {
        if (list == null) {
            throw new IllegalArgumentException();
        }
        int colSize = getColSize(list, pattern);
        CSVFeatureTable cSVFeatureTable = new CSVFeatureTable(this._dic);
        cSVFeatureTable.setHandler(this._dic.getFeatureHandlerList());
        Matcher matcher = pattern.matcher(ZhLemmaGloss.ZHLEMMA_SAME);
        for (int i = 0; i < list.size(); i++) {
            matcher.reset(list.get(i));
            int i2 = 0;
            while (matcher.find()) {
                String group = matcher.group(1);
                if (group.contains("\"\"")) {
                    group = group.replaceAll("\"\"", "\"");
                }
                if (i2 + 1 == colSize && group.equals("'")) {
                    group = "''";
                }
                int i3 = i2;
                i2++;
                cSVFeatureTable.setFeature(i, i3, group);
            }
            if (!$assertionsDisabled && i2 != colSize) {
                throw new AssertionError();
            }
        }
        return cSVFeatureTable;
    }

    private int getColSize(List<String> list, Pattern pattern) {
        int i = 0;
        Matcher matcher = pattern.matcher(ZhLemmaGloss.ZHLEMMA_SAME);
        Iterator<String> it = list.iterator();
        while (it.hasNext()) {
            matcher.reset(it.next());
            int i2 = 0;
            while (matcher.find()) {
                i2++;
            }
            if (i < i2) {
                i = i2;
            }
        }
        return i;
    }

    static {
        $assertionsDisabled = !CSVFeatureTableParser.class.desiredAssertionStatus();
    }
}
