-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathwords.go
502 lines (443 loc) · 12.7 KB
/
words.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
package main
import (
"bytes"
"fmt"
"io/ioutil"
"log"
"os"
"runtime/pprof"
"sort"
"strconv"
"strings"
"time"
"github.com/gilwo/wordscvc/cvc"
"github.com/gilwo/workqueue/pool"
"github.com/jessevdk/go-flags"
)
type flagOpts struct {
// flag able vars
MaxGroups int `short:"G" description:"1 number of result groups to generate" default:"20"`
MaxSets int `short:"S" description:"2 number of sets per group" default:"15"`
MaxWords int `short:"W" description:"3 number of words per set" default:"10"`
FreqCutoff int `short:"f" description:"4 frequency cutoff threshold for words, lower is more common" default:"25"`
FreqWordsPerLineAboveCutoff int `short:"a" description:"5 how many words to be above cutoff threshold per line" default:"3"`
VowelLimit int `long:"vowel" description:"6 how many time each vowel repeat per set" hidden:"1"`// 2
InConsonantFile string `short:"C" description:"7 input file name for consonants to use" optional:"1" default:"consonants.txt"`
InVowelFile string `short:"V" description:"8 input file name for vowels to use" optional:"1" default:"vowels.txt"`
InWordsFile string `short:"i" description:"9 input file name for words list to use for creating the lines groups results" optional:"1" default:"words_list.txt"`
FilterFile string `short:"F" description:"10 input file name for filtered words"`
OutResultFile string `short:"o" description:"11 output file for generated results" default:"words_result.txt" default-mask:"-"`
TimeToRun int `short:"t" description:"12 how much time to run (in seconds)" default:"30"`
CpuProfile string `short:"c" description:"13 enable cpu profiling and save to file"`
MemProfile string `short:"m" description:"14 enable memory profiling and save to file"`
DebugEnabled bool `short:"d" description:"15 enable debugging information"`
Verbose []bool `short:"v" description:"16 show verbose information"`
UsePool bool `short:"p" description:"17 enable using the worker pool logic" hidden:"1"`
UseJobDispose bool `short:"D" description:"18 enable using the worker job dispose logic" hidden:"1"`
Workers uint `short:"w" description:"19 how many workers to use" default:"30" hidden:"1"`
}
func (fo flagOpts) String() string {
return fmt.Sprintf("options setting:\n"+
"\tmax groups: '%v'\n"+
"\tmax sets: '%v'\n"+
"\tmax words: '%v'\n"+
"\tfrequency cutoff: '%v'\n"+
"\tabove frequency words per set: '%v'\n"+
//"\tvowels limit: '%v'\n"+
"\n"+
"\tconsonant file : '%v'\n"+
"\tvowels file: '%v'\n"+
"\twords file: '%v'\n"+
"\n"+
"\tfilter file: '%v'\n"+
"\tresult output file: '%v'\n"+
"\n"+
"\ttime to run: '%v'\n"+
"\n"+
"\tworkers: '%v'\n"+
"\tuse pool: '%v'\n"+
"\n"+
"\tcpu profile file: '%v'\n"+
"\tmemory profile file: '%v'\n"+
"\tdebug enabled: '%v'\n"+
"\tverbose: '%v'\n",
fo.MaxGroups,
fo.MaxSets,
fo.MaxWords,
fo.FreqCutoff,
fo.FreqWordsPerLineAboveCutoff,
//fo.VowelLimit,
fo.InConsonantFile,
fo.InVowelFile,
fo.InWordsFile,
fo.FilterFile,
fo.OutResultFile,
fo.TimeToRun,
fo.Workers,
fo.UsePool,
fo.CpuProfile,
fo.MemProfile,
fo.DebugEnabled,
fo.Verbose,
)
}
type varOpts struct {
flagOpts
// internal vars
countGroups int
finishSignal bool
maxWorkers int
currentWorkers int
}
var GenVarOpts varOpts
var pool *workerpool.WPool
var consonants, vowels map[string]int
var waitForWorkers = make(chan bool)
var collectingDone = make(chan struct{})
var msgs = make(chan string, 100)
var startedWorkers = make(chan struct{}, 100)
var stoppedWorkers = make(chan struct{}, 100)
var maxSize int = 0
var disposeChan = make (chan *workerpool.WorkerJob, 1000)
var disposeDone = make (chan bool)
type findArg struct {
group *cvc.GroupSet
wordmap *cvc.WordMap
}
func findGroups(iarg interface{}, job *workerpool.WorkerJob, stop workerpool.CheckStop) (none interface{}) {
arg, _ := iarg.(findArg)
defer func() {
if fail := recover(); fail != nil {
verbose("recovered from %s\n", fail)
}
stoppedWorkers <- struct{}{}
arg.group = nil
arg.wordmap = nil
if GenVarOpts.UsePool && GenVarOpts.UseJobDispose {
if !GenVarOpts.finishSignal {
disposeChan <- job
}
}
// runtime.GC()
return
}()
startedWorkers <- struct{}{}
zmap := *arg.wordmap.GetCm()
if !arg.group.Checkifavailable(arg.wordmap) {
return
}
if float64(arg.group.CurrentSize())/float64(arg.group.MaxSize()) > float64(0.9) {
s := fmt.Sprintf("status: reached depth %d of %d\n",
int(arg.group.CurrentSize()), int(arg.group.MaxSize()))
msgs <- s
}
if arg.group.CurrentSize() > maxSize {
msgs <- "depth: " + strconv.Itoa(arg.group.CurrentSize())
}
Loop:
for k := range zmap {
if GenVarOpts.finishSignal {
info("finishSignal issued, exiting\n")
break Loop
}
if GenVarOpts.countGroups >= GenVarOpts.MaxGroups {
info("groups count %d reached max groups %d", GenVarOpts.countGroups, GenVarOpts.MaxGroups)
break Loop
}
if added, full := arg.group.AddWord(k); full == true {
msg := fmt.Sprintf("group completed\n%s\n", arg.group.StringWithFreq())
if GenVarOpts.DebugEnabled {
msg += arg.group.DumpGroup() + "\n"
}
msgs <- msg
break Loop
} else if added {
arg.wordmap.DelWord(k)
if !GenVarOpts.UsePool {
go findGroups(
findArg{
arg.group.CopyGroupSet(),
arg.wordmap.CopyWordMap(),
}, nil, nil)
} else {
_, err := pool.NewJobQueue(findGroups,
findArg{
arg.group.CopyGroupSet(),
arg.wordmap.CopyWordMap(),
})
if err !=nil {
info("error queuing job %v\n", err)
}
trace("%v\n", pool.PoolStats())
}
}
}
return
}
func main() {
var out string
a, err := flags.NewParser(&GenVarOpts, flags.Default).Parse()
if err != nil {
if e, ok := err.(*flags.Error); ok {
switch e.Type {
case flags.ErrHelp:
os.Exit(0)
default:
fmt.Printf("error parsing opts: %v\n", e.Type)
os.Exit(1)
}
}
}
info("opts:\n%v\na:\n%v\n", GenVarOpts, a)
if GenVarOpts.UsePool {
if GenVarOpts.DebugEnabled {
workerpool.WorkerPoolSetLogLevel(workerpool.DebugLevel)
}
if pool, err = workerpool.NewWPool(GenVarOpts.Workers); err != nil {
fmt.Printf("failed to create pool %v\n", err)
os.Exit(1)
}
if _, err = pool.StartDispatcher(); err != nil {
fmt.Printf("failed to start pool dispatcher %v\n", err)
os.Exit(1)
}
}
if GenVarOpts.MemProfile != "" {
f, err := os.Create(GenVarOpts.MemProfile)
if err != nil {
log.Fatal(err)
}
info("enable memprofiling, write to '%v'\n", GenVarOpts.MemProfile)
defer func() {
pprof.WriteHeapProfile(f)
f.Close()
return
}()
}
if GenVarOpts.CpuProfile != "" {
f, err := os.Create(GenVarOpts.CpuProfile)
if err != nil {
log.Fatal(err)
}
info("enable cpuprofiling, write to '%v'\n", GenVarOpts.CpuProfile)
pprof.StartCPUProfile(f)
defer pprof.StopCPUProfile()
}
consonants = getMap(GenVarOpts.InConsonantFile)
vowels = getMap(GenVarOpts.InVowelFile)
verbose("consonants: %d\n%s\n", len(consonants), getOrderedMapString(consonants))
verbose("vowels: %d\n%s\n", len(vowels), getOrderedMapString(vowels))
wmap := getWordsMap(GenVarOpts.InWordsFile)
verbose("map size: %d\ncontent:\n%s\n", wmap.Size(), wmap)
// set the base group according to the required settings
baseGroup := cvc.NewGroupSetLimitFreq(
GenVarOpts.MaxSets,
GenVarOpts.MaxWords,
GenVarOpts.FreqCutoff,
GenVarOpts.FreqWordsPerLineAboveCutoff)
// start time measuring
t0 := time.Now()
if GenVarOpts.UsePool && GenVarOpts.UseJobDispose {
// job disposer
go func() {
for j := range disposeChan {
if j.JobStatus() != workerpool.Jfinished {
if !GenVarOpts.finishSignal {
disposeChan <- j
}
} else {
j.JobDispose()
}
}
close(disposeDone)
}()
}
// wait for all goroutines to finish
go func() {
count := 0
i := 0
for {
verbose("going to wait\n")
select {
case <-startedWorkers:
trace("startedWorkers")
count++
if count > GenVarOpts.maxWorkers {
GenVarOpts.maxWorkers = count
}
GenVarOpts.currentWorkers = count
i = 0
case <-stoppedWorkers:
// println("stoppedWorkers")
count--
GenVarOpts.currentWorkers = count
i = 0
case <-time.After(1 * time.Second):
if GenVarOpts.UsePool {
trace("%v\n", pool.PoolStats())
}
if count > 0 {
info("there are still %d active workers\n", count)
trace("count = %d and i = %d", count, i)
} else {
verbose("count = 0 and i = %d\n", i)
i++
if i == 3 {
waitForWorkers <- true
}
}
}
}
}()
// msg collector
go func() {
for {
select {
case s := <-msgs:
if strings.HasPrefix(s, "depth: ") {
size, _ := strconv.Atoi(s[len("depth: "):])
if size > maxSize {
maxSize = size
verbose("max depth : %d\n", maxSize)
}
} else if strings.HasPrefix(s, "status:") {
info("%s", s)
} else {
GenVarOpts.countGroups++
out += s
info("%d\n%s", GenVarOpts.countGroups, s)
if GenVarOpts.countGroups == GenVarOpts.MaxGroups {
close(msgs)
close(collectingDone)
return
}
}
default:
time.Sleep(1 * time.Second)
verbose("%s passed\n", time.Now().Sub(t0))
if GenVarOpts.UsePool {
info("%v\n", pool.PoolStats())
}
if GenVarOpts.finishSignal {
info("finishSignal issued, exiting")
close(msgs)
return
}
debug("current workers %d, max workers %d\n",
GenVarOpts.currentWorkers, GenVarOpts.maxWorkers)
}
}
}()
if GenVarOpts.UsePool {
pool.NewJobQueue(findGroups, findArg{baseGroup, wmap})
} else {
go findGroups(findArg{baseGroup, wmap}, nil, nil)
}
dur := time.Duration(GenVarOpts.TimeToRun)
select {
case <-collectingDone:
info("required results collected after %s\n", time.Now().Sub(t0))
case <-time.After(dur * time.Second):
info("stopped after %s\n", time.Now().Sub(t0))
}
GenVarOpts.finishSignal = true
// pool cleanup
if GenVarOpts.UsePool {
ch := make(chan struct{})
if GenVarOpts.UseJobDispose {
close(disposeChan)
}
pool.StopDispatcher(func(){
select {
case <-disposeDone:
}
close(ch)
})
pool.Dispose()
<-ch
}
info("waiting for waitForWorkers, %d workers\n", GenVarOpts.currentWorkers)
<-waitForWorkers
fmt.Printf("exiting... after %s\n", time.Now().Sub(t0))
fmt.Println(out)
}
func getOrderedMapString(m map[string]int) string {
out := ""
var sortedkeys []string
for k := range m {
sortedkeys = append(sortedkeys, k)
}
sort.Slice(sortedkeys,
func(i, j int) bool {
return strings.Compare(sortedkeys[i], sortedkeys[j]) == -1
})
for _, k := range sortedkeys {
out += fmt.Sprintf("%s, ", k)
}
return out[:len(out)-2]
// return out
}
func getMap(mapfile string) map[string]int {
var ret = make(map[string]int)
for _, wf := range getWordsFromFile(mapfile) {
ret[wf.word] = wf.number
}
return ret
}
func getWordsMap(fname string) *cvc.WordMap {
wmap := cvc.NewWordMap()
for _, wf := range getWordsFromFile(fname) {
var cvcw *cvc.Word
wfV := string(wf.word[1])
if _, ok := vowels[wfV]; ok {
cvcw = cvc.NewWord(
string(wf.word[0]),
string(wf.word[1]),
string(wf.word[2:]),
wf.number)
} else {
cvcw = cvc.NewWord(
string(wf.word[0:2]),
string(wf.word[2]),
string(wf.word[3:]),
wf.number)
}
if wf.word != cvcw.String() {
panic("loaded word: " + wf.word + " and built word: " +
cvcw.String() + " are NOT the same")
}
wmap.AddWord(cvcw)
}
return wmap
}
func checkErr(e error) {
if e != nil {
panic(e)
}
}
// WF - word number bundle
type WF struct {
word string
number int
}
func getLinesFromFile(fname string) []string {
data, err := ioutil.ReadFile(fname)
checkErr(err)
data = bytes.TrimRight(data, "\n")
lines := strings.Split(string(data), "\n")
return lines
}
func getWordsFromFile(fname string) []WF {
resList := []WF{}
for _, line := range getLinesFromFile(fname) {
tmp := strings.Split(line, " ")
w := strings.TrimRight(tmp[0], ":")
f, _ := strconv.Atoi(tmp[1])
resList = append(resList, WF{w, f})
}
return resList
}
func debug(f string, v ...interface{}) { if GenVarOpts.DebugEnabled { fmt.Printf("debug: " + f, v...) } }
func info(f string, v ...interface{}) { if len(GenVarOpts.Verbose) >= 1 && GenVarOpts.Verbose[0] { fmt.Printf("info: "+f, v...) } }
func verbose(f string, v ...interface{}) { if len(GenVarOpts.Verbose) >= 2 && GenVarOpts.Verbose[1] { fmt.Printf("verbose" + f, v...) } }
func trace(f string, v ...interface{}) { if len(GenVarOpts.Verbose) >= 3 && GenVarOpts.Verbose[2] { fmt.Printf("trace" + f, v...) } }