35 static const int kMaxNumEdges = 30000000;
37 int main(
int argc,
char** argv) {
40 if (!(argc == 4 || (argc == 5 && strcmp(argv[1],
"-t") == 0) ||
41 (argc == 6 && strcmp(argv[1],
"-r") == 0) ||
42 (argc == 7 && strcmp(argv[1],
"-l") == 0 &&
43 sscanf(argv[2],
"%d", &min_word_length) == 1 &&
44 sscanf(argv[3],
"%d", &max_word_length) == 1))) {
45 printf(
"Usage: %s [-t | -r [reverse policy] |"
46 " -l min_len max_len] word_list_file"
47 " dawg_file unicharset_file\n", argv[0]);
52 if (argc == 5) ++argv_index;
58 sscanf(argv[++argv_index],
"%d", &tmp_int);
60 tprintf(
"Set reverse_policy to %s\n",
63 if (argc == 7) argv_index += 3;
64 const char* wordlist_filename = argv[++argv_index];
65 const char* dawg_filename = argv[++argv_index];
66 const char* unicharset_file = argv[++argv_index];
67 tprintf(
"Loading unicharset from '%s'\n", unicharset_file);
69 tprintf(
"Failed to load unicharset from '%s'\n", unicharset_file);
74 if (argc == 4 || argc == 6) {
78 kMaxNumEdges, unicharset.
size(),
80 tprintf(
"Reading word list from '%s'\n", wordlist_filename);
81 if (!trie.read_word_list(wordlist_filename, unicharset, reverse_policy)) {
82 tprintf(
"Failed to read word list from '%s'\n", wordlist_filename);
85 tprintf(
"Reducing Trie to SquishedDawg\n");
88 tprintf(
"Writing squished DAWG to '%s'\n", dawg_filename);
91 tprintf(
"Dawg is empty, skip producing the output file\n");
94 }
else if (argc == 5) {
95 tprintf(
"Loading dawg DAWG from '%s'\n", dawg_filename);
101 tprintf(
"Checking word list from '%s'\n", wordlist_filename);
102 words.check_for_words(wordlist_filename, unicharset,
true);
103 }
else if (argc == 7) {
106 FILE *word_file = fopen(wordlist_filename,
"rb");
107 if (word_file ==
NULL) {
108 tprintf(
"Failed to open wordlist file %s\n", wordlist_filename);
111 FILE *dawg_file = fopen(dawg_filename,
"wb");
112 if (dawg_file ==
NULL) {
113 tprintf(
"Failed to open dawg output file %s\n", dawg_filename);
116 tprintf(
"Reading word list from '%s'\n", wordlist_filename);
119 for (i = min_word_length; i <= max_word_length; ++i) {
123 kMaxNumEdges, unicharset.
size(),
130 tprintf(
"String '%s' not compatible with unicharset. "
131 "Bad chars here: '%s'\n", str, str + badpos);
140 if (word.
length() >= min_word_length &&
141 word.
length() <= max_word_length &&
146 tprintf(
"Failed to add the following word to dawg:\n");
154 tprintf(
"Error: word '%s' not in DAWG after adding it\n", str);
161 tprintf(
"Writing fixed length dawgs to '%s'\n", dawg_filename);
163 for (i = 0; i <= max_word_length; ++i) {
165 trie_vec[i-min_word_length]->trie_to_dawg());
168 dawg_vec, max_word_length - min_word_length + 1,
174 tprintf(
"Invalid command-line options\n");