-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcs202hw1.cpp
295 lines (225 loc) · 6.8 KB
/
cs202hw1.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
//Captain-Price-TF-141
/*
* Description: Unigram Bigram Extractor.
* Input: gutenberg-the-time-machine.txt.
* Output: Expected to output unigram (count and print to txt, single unique words)
* and unigram (count and print to txt, sets of two unique words).
*/
#include <iostream>
#include <iomanip>
#include <fstream>
#include <cstdlib>
#include <string>
#include <cctype>
using namespace std;
const int ArrayMax = 50000;
struct unigram
{
string word;
int frequency;
};
struct bigram
{
string word1;
string word2;
int frequency;
};
/*
* function_identifier: Reads data from file.
* parameters: word[], int& wordCount, string infile
* return value: Returns word count and stored string word array.
*/
int readdata(string word[], int& wordCount, string infile) //read in data from file
{
wordCount = 0;
ifstream fin;
do
{
fin.open("/home/pricec3/cs202/gutenberg-the-time-machine.txt"); //opens the file for reading,
//NOTE TO GRADER, please RENAME "fin.open" to point to the correct file path.
if (!fin)//checks if the file unable to open for reading display's error message and stop
{
fflush(stdin);
continue;
}
else//otherwise valid file name come out of the loop
break;
} while (1);
while (!fin.eof())//loops till end of the file for reading
{
fin >> word[wordCount];//reads word
wordCount++;
}
return wordCount;
}
/*
* function_identifier: Identifies longest word and displays longest word.
* parameters: string w[]
* return value: Expected to return longest word, algorithm is incomplete.
*/
/*
int getlongestword(string w[])
{
//for (int i = 0; i < ; i++)
{
}
//for (int j = 0; j < ; j++)
{
}
}
*/
/*
* function_identifier: brief description of what the function does.
* parameters: what to pass into the function
* return value: It is expected to log all words with lower case and no punctuation. Does not remove punctuation, lower cases all letters,
* does not log to file, displays lower case letters.
* NOTE TO GRADER: This is not looping, please allow it to finsh outputting
* all letters, output takes a few seconds to finish, menu will be presented on complete.
*/
void cleanup (string word[], int wordCount) //another way to do array
{
char answer;
cout << "Log cleanup? Y/n:" << endl; //prompts user to save Clean Up Log to file
cin >> answer;
if (answer == 'y' || answer == 'Y')//Acknowledges the user wants to save log file
{
for (int i = 0; i < wordCount; i++)//reads words to characters
{
int j;
string cw;
cw = word[i][j];
for (int j = 0; j < word[i].length(); j++)//reads characters to lower case
{
string cw;
ofstream outFile;
cw = tolower(word[i][j]);
cout << cw << endl;
/*outFile.open("ast1log.txt"); //Unable to log to file
outFile << cw;*/
}
}
}
}
/*
* function_identifier: Checks if word already exists in our array, if it does it returns the index.
* parameters: string word, struct Unigram list[], int count
* return value: Returns unigram list and word count, algorithm is incomplete.
*/
/*
int inList(string word, struct Unigram list[], int count) //Good practice for function overloading
{
}
*/
/*
* function_identifier: Checks if word already exists in our array, if it does it returns the index.
* parameters: string word, struct bigram list[], int count
* return value: Returns bigram list and word count, algorithm is incomplete.
*/
/*
int inList (string word, struct bigram list[], int count) //Good practice for function overloading
{
}
*/
/*
* function_identifier: Will sort the unigrams and bigrams alphabetically and by count.
* parameters:
* return value: Returns sorted unigrams or bigrams, algorithm is incomplete.
*/
/*
void sortgms () //function to sort unigrams and function to sort bigrams
{
}
*/
/*
* function_identifier: Will generate array of unigrams.
* parameters: struct Unigram unigram[], int& unigramCount, string word[], int wordCount
* return value: Array of sorted unigrams for log, algorithm is incomplete.
*/
/*
void generate_unigrams (struct Unigram unigram[], int& unigramCount, string word[], int wordCount) //Unigram Extractor
{
}
*/
/*
* function_identifier: Will generate array of bigrams.
* parameters: struct bigram bigram[], int& bigramCount, string word[], int wordCount
* return value: Array of sorted bigrams for log, algorithm is incomplete.
*/
/*
void generate_bigrams (struct bigram bigram[], int& bigramCount, string word[], int wordCount) //Bigram Extractor
{
}
*/
/*
* function_identifier: Creates log file for unigrams.
* parameters:
* return value: Creates log file for unigrams.
*/
/*
void write1gmslog ()
{
outFile.open("1gmslog"); //Unable to log to file
outFile << ;
}
*/
/*
* function_identifier: Creates log file for bigrams.
* parameters:
* return value: Creates log file for bigrams.
*/
/*
void write2gmslog ()
{
/outFile.open("2gmslog.txt"); //Unable to log to file
outFile << ;
}
*/
int main(int argc, char* argv[])
{
string word[ArrayMax]; //Declared ArrayMax set to 50000
int wordCount;
string infile;
int menu; //integer driven menu variable
cout << "Array Size: " << ArrayMax << endl; // displays array size
int count = 0;
count = readdata(word, wordCount, infile); // calls readdata function and displays word count
cout << "Finished Reading " << count << " words." << endl;
cleanup(word, wordCount); // calls clean up function
do //Menu Driven Do-While Loop, using numbers to navigate.
{
cout << "Commands " << endl;
cout << "(1): Unigram: search for unigram " << endl;
cout << "(2): Bigram : search for bigram " << endl;
cout << "(3): Save : save 1gms.txt and 2gms.txt " << endl;
cout << "(4): Index : print index of unigram or bigram " << endl;
cout << "(5): Quit / q : terminate program " << endl;
cin >> menu;
if (menu == 1)
{
cout << "unigram" << endl;
//generate_unigrams(struct Unigram unigram[], int& unigramCount, string word[], int wordCount) //Unigram Extractor
}
else if (menu == 2)
{
cout << "bigram" << endl;
//generate_bigrams(struct bigram bigram[], int& bigramCount, string word[], int wordCount) //Bigram Extractor
}
else if (menu == 3)
{
cout << "save" << endl;
//write1gmslog()
//write2gmslog()
}
else if (menu == 4)
{
cout << "index" << endl;
//sortgms() //function to sort unigrams and function to sort bigrams
}
else if (menu == 5)//terminates program
{
cout << "quit" << endl;
exit(1);
}
} while (menu != 5 && menu <= 1 && menu >= 5); //loops until "5" is entered to terminate program
return 0;
}