-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathv2.toml
130 lines (103 loc) · 4.98 KB
/
v2.toml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
##################################################################################################
# Defining the model
##################################################################################################
# hidden layers in order or explicitly number them: which?
# [layer]
# type = "conv"
# image = { x = 24, y = 24 } # don't think we need this since prior layer conv settings define it
# filter = "something"
# stride = "something"
# pad = "something"
# activation = "relu"
# [layer]
# activation = "tanh" # without specifying type, a fully connected linear layer is assumed by default
# units = 256
# dropout = false
# droplim = 0.8 # or for multiple layers [1.0, 0.8, 1.0], if for all except output
# [layer]
# activation = "relu"
# units = 128
# [layer]
# activation = "tanh"
# units = 64
# [layer] # do we need to make this explicit or leave it implicit???
# type = "linear" # or "affine" USE THIS TO FORCE FULLY CONNECTED OR DENSE AFTER CONV,RNN
[layer.1]
type = "conv"
image = { x = 24, y = 24, z = 3 } # don't think we need this since prior layer conv settings define it
filter = "something"
stride = "something"
pad = "something"
activation = "relu"
trainable = true
[layer.2]
activation = "tanh" # without specifying type, a fully connected linear layer is assumed by default
units = 256
dropout = false
droplim = 0.8 # or for multiple layers [1.0, 0.8, 1.0], if for all except output
trainable = true # this is the default. OK to leave this out. use primarily for transfer
# learning with trainable = false
[layer.3]
activation = "relu"
units = 128
[layer.4]
activation = "tanh"
units = 64
[layer.5] # do we need to make this explicit or leave it implicit???
type = "linear"
[output] # should this be an element in the layer array?--we can always stick it there
# required
classify = "softmax"
target = "something" # we need a variable
##################################################################################################
# Defining the data
##################################################################################################
[data.train] # should this be an element in the layer array?--we can always stick it there NO: use layer to mean hidden layer
# required
norm_mode = "none" # maybe put this in training?
input = "something" # we need a variable
image = { x = 28, y = 28, z = 3 } # need to know if it is stacked or flat
target = "something"
# should we put the input variables in here?
[data.test]
input = "something"
target = "something"
##################################################################################################
# Controlling the training process
##################################################################################################
[training] # required
epochs = 24 # required
alpha = 0.74 # learning rate or nu in some notation; required
learn_decay = [0.5, 4.0] # or {factor = 0.5, steps = 4.0}
quiet = true
initializer = "xavier"
scale_init = 2.0
bias_initializer = 0.3
input_norm = "none" # put in training or input layer of model
[minibatch] # global--applies to all layers; if excluded no minibatches
do_batch = true
do_batch_norm = true # this can be by layer, but minibatch applies to the entire training run
mb_size_in = 50
norm_after = "linear" # or "activation"--not implemented
[optimization] # optional: if absent, no optimzation
opt = "adam"
opt_params = [0.9, 0.999]
[regularization] # global--applies to all hidden layers--this could be done per layer--not implemented
# optional--if absent, no regularization
reg = "Maxnorm" # or "L2", "L1"
maxnorm_lim = 4.0
lambda = 0.000191 # only applies to L2 or L1--ignored otherwise
[plotting] # optional: if absent, no plotting or plot data
stats = ["Train", "Learning", "Test", "Cost", "epoch"] # or "batch"
plot_now = true
[results]
# all = true # this is the default--and what you get if you don't include the results table
train_inputs = true
train_targets = true
train_preds = true
test_inputs = true
test_targets = true
test_preds = true
hyper_params = true
wgts = true
batchnorm_params = true