package trp.db;

import java.io.FileWriter;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import processing.core.PApplet;
import rita.RiTaException;
import trp.layout.ReadersPApplet;
import trp.reader.MachineReader;
import trp.util.ReaderConstants;

/* loaded from: input_file:trp/db/NGramSequenceInserter.class */
public abstract class NGramSequenceInserter extends ReadersPApplet {
    static final String ALLOWABLE_SEQ = "[\\w '‘`’\\-àâçééèêëïîôùûüÿœ]+";
    static final String DIGRAMS = "digrams";
    static final String PERIGRAMS = "perigrams";
    static final String GRAM_BREAKERS = ".+[\\?!:;\\.)\\]—]";
    static boolean TEST_WITHOUT_DB = false;
    static boolean PRINT_SEQUENCES = false;
    static boolean PRINT_INCREMENTAL_TOTALS = true;
    static String INPUT_FILE_NAME = "Beginnings";
    static String INPUT_FILE = "coover/beginnings.txt";
    static boolean PRINT_GRAM_BREAKING_REJECTS = true;

    @Override // processing.core.PApplet
    public void setup() {
    }

    public void loadAndInsert(String str, String str2, List list) {
        String[] loadPerigrams;
        list.size();
        if (str.equals(ReaderConstants.BIGRAMS)) {
            loadPerigrams = loadBigrams(list);
        } else if (str.equals(DIGRAMS)) {
            loadPerigrams = loadDigrams(list, Integer.MAX_VALUE);
        } else if (str.equals(ReaderConstants.TRIGRAMS)) {
            loadPerigrams = loadTrigrams(list, Integer.MAX_VALUE);
        } else {
            if (!str.equals(PERIGRAMS)) {
                throw new RiTaException("Unimplemented n-gram type: " + str);
            }
            loadPerigrams = loadPerigrams(list, Integer.MAX_VALUE);
        }
        if (PRINT_SEQUENCES) {
            for (int i = 0; i < loadPerigrams.length; i++) {
                System.out.println(String.valueOf(i) + ") " + loadPerigrams[i]);
            }
        }
        System.out.println("[INFO] Found " + loadPerigrams.length + " " + str.toLowerCase());
        if (TEST_WITHOUT_DB) {
            return;
        }
        MySQLDirectAccess mySQLDirectAccess = new MySQLDirectAccess();
        System.out.println("\nStarting insert ignore on " + str);
        if (str.equals(ReaderConstants.BIGRAMS)) {
            try {
                mySQLDirectAccess.insertBigrams(loadPerigrams, true);
                System.out.println("Linking " + str.toLowerCase());
                System.out.println("Linked " + mySQLDirectAccess.linkBigrams(str2, loadPerigrams) + " records in " + MySQLDirectAccess.BIGRAMS_LINK);
                return;
            } catch (Exception e) {
                e.printStackTrace();
                return;
            }
        }
        if (!str.equals(PERIGRAMS)) {
            throw new RuntimeException();
        }
        try {
            System.out.println("Linking " + str.toLowerCase());
            System.out.println("Linked " + mySQLDirectAccess.linkPerigrams(str2, loadPerigrams) + " records in " + MySQLDirectAccess.PERIGRAMS_LINK);
        } catch (Exception e2) {
            e2.printStackTrace();
        }
    }

    public abstract String[] loadPerigrams(List list, int i);

    public String[] loadTrigrams(List list, int i) {
        HashSet hashSet = new HashSet();
        loop0: for (int i2 = 0; i2 < list.size() - 2; i2++) {
            String cleanWord = cleanWord(list, i2, GRAM_BREAKERS);
            if (cleanWord != null) {
                for (int i3 = 0; i3 < list.size() - 1; i3++) {
                    String cleanWord2 = cleanWord(list, i3, GRAM_BREAKERS);
                    if (cleanWord2 == null) {
                        if (PRINT_GRAM_BREAKING_REJECTS) {
                            System.out.println("Rejected(2): " + cleanWord + " + " + list.get(i3) + " ...");
                        }
                    } else if (cleanWord2.equals(cleanWord)) {
                        continue;
                    } else {
                        for (int i4 = 0; i4 < list.size(); i4++) {
                            String cleanWord3 = cleanWord(list, i4);
                            if (cleanWord3 != null && !cleanWord3.equals(cleanWord2)) {
                                hashSet.add(String.valueOf(cleanWord) + ' ' + cleanWord2 + ' ' + cleanWord3);
                                int size = hashSet.size();
                                if (PRINT_INCREMENTAL_TOTALS && size % 10000 == 0) {
                                    System.out.println(String.valueOf(size) + " digrams created");
                                }
                                if (size >= i) {
                                    break loop0;
                                }
                            }
                        }
                    }
                }
            } else if (PRINT_GRAM_BREAKING_REJECTS) {
                System.out.println("Rejected(1): " + list.get(i2) + " + " + list.get(i2 + 1) + " ...");
            }
        }
        return shuffledArray(hashSet);
    }

    /* JADX INFO: Access modifiers changed from: package-private */
    public static String[] shuffledArray(Set set) {
        ArrayList arrayList = new ArrayList(set);
        Collections.shuffle(arrayList);
        return (String[]) arrayList.toArray(new String[set.size()]);
    }

    public static String[] loadDigrams(List list, int i) {
        String cleanWord;
        HashSet hashSet = new HashSet();
        loop0: for (int i2 = 0; i2 < list.size() - 1; i2++) {
            String cleanWord2 = cleanWord(list, i2, GRAM_BREAKERS);
            String cleanWord3 = cleanWord(list, i2 + 1);
            if (cleanWord2 == null) {
                if (PRINT_GRAM_BREAKING_REJECTS) {
                    System.out.println("Rejected: " + list.get(i2) + " + " + list.get(i2 + 1) + " ...");
                }
            } else if (cleanWord3 == null) {
                continue;
            } else {
                for (int i3 = 0; i3 < list.size(); i3++) {
                    if (i2 != i3 && (cleanWord = cleanWord(list, i3)) != null && !cleanWord3.equals(cleanWord)) {
                        String str = String.valueOf(cleanWord2) + ' ' + cleanWord3 + ' ' + cleanWord;
                        if (!str.matches(ALLOWABLE_SEQ)) {
                            System.out.println("[WARN] unexpected char(s): '" + str + "'");
                        }
                        if (!hashSet.contains(str)) {
                            hashSet.add(str);
                            int size = hashSet.size();
                            if (PRINT_INCREMENTAL_TOTALS && size % 10000 == 0) {
                                System.out.println(String.valueOf(size) + " digrams created");
                            }
                            if (size >= i) {
                                break loop0;
                            }
                        } else {
                            continue;
                        }
                    }
                }
            }
        }
        return shuffledArray(hashSet);
    }

    public static String[] loadBigrams(List list) {
        String cleanWord;
        HashSet hashSet = new HashSet();
        for (int i = 0; i < list.size(); i++) {
            String cleanWord2 = cleanWord(list, i, GRAM_BREAKERS);
            if (cleanWord2 != null) {
                for (int i2 = 0; i2 < list.size(); i2++) {
                    if (i != i2 && (cleanWord = cleanWord(list, i2)) != null) {
                        hashSet.add(String.valueOf(cleanWord2) + ' ' + cleanWord);
                        int size = hashSet.size();
                        if (PRINT_INCREMENTAL_TOTALS && size % 10000 == 0) {
                            System.out.println(String.valueOf(size) + " digrams created");
                        }
                    }
                }
            } else if (PRINT_GRAM_BREAKING_REJECTS) {
                System.out.println("Rejected: " + list.get(i) + " ...");
            }
        }
        return (String[]) hashSet.toArray(new String[hashSet.size()]);
    }

    /* JADX INFO: Access modifiers changed from: package-private */
    public static String cleanWord(List list, int i) {
        return MachineReader.stripPunctuation((String) list.get(i)).toLowerCase();
    }

    static void writeFile(String str, String str2) {
        try {
            FileWriter fileWriter = new FileWriter(str);
            fileWriter.write(str2);
            fileWriter.flush();
            fileWriter.close();
        } catch (Exception e) {
            e.printStackTrace();
        }
    }

    /* JADX INFO: Access modifiers changed from: package-private */
    public static String cleanWord(List list, int i, String str) {
        if (!str.equals("") && ((String) list.get(i)).matches(str)) {
            return null;
        }
        return cleanWord(list, i);
    }

    public static void main(String[] strArr) {
        PApplet.main(new String[]{NGramSequenceInserter.class.getName()});
    }
}
