Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add data and retrain model with new keywords #54

Open
wants to merge 3 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ For local development, you can run the server by doing `python server.py`, which

### Training the NLP Model

You can train a new version of the neural network on the `data/survey.csv` data by doing `python train.py`. This will begin training of a basic 64 cell LSTM model (which is defined in `net.py`). You can configure the training parameters which are constants at the top of `train.py`.
You can train a new version of the neural network on the `data/cumulative.csv` data by doing `python train.py`. This will begin training of a basic 64 cell LSTM model (which is defined in `net.py`). You can configure the training parameters which are constants at the top of `train.py`.

```python
TOKENIZER_VOCAB_SIZE = 500 # Vocabulary size of the tokenizer
Expand Down
26 changes: 25 additions & 1 deletion nlp/data/cumulative.csv
Original file line number Diff line number Diff line change
Expand Up @@ -1259,4 +1259,28 @@ https://twitter.com/Adobe,taking a break,yes,2020-03-31 17:05:00 +0000 UTC
,procrastinate on my chinese school hw,no
,check the side effects of a medication,yes
,get to know the people im going to school with enxt year,yes
,reach my friends new article,yes
,reach my friends new article,yes
,watch tutorials on knitting,yes
,focusing on my social and emotional wellbeing,yes
,check on my friend’s progress,yes
,trying to fix my broken computer,yes
,getting some inspo for decorating my house,yes
,doing some gift shopping for the holidays,yes
,check when the restaurant opens,yes
,discovering ways to take care of my dog,yes
,learning to tie a tie,yes
,read self-help guides,yes
,managing stress,yes
,taking care of my finances,yes
,find some fitness videos,yes
,add something to my resume,yes
,looking for some new webtoons,no
,getting away from homework to procastinate later,no
,idk im just bored,no
,getting ready for my next class,yes
,look at some fashion trends,no
,browse cool website designs,no
,looking at tech twitter stuff,no
,get some nfts,no
,buy some crypto,no
,keep track of my stocks,no
2 changes: 1 addition & 1 deletion nlp/data_proc.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
"havent": "have not",
"doesnt": "does not",
"he'll": "he will",
"she'll": "she will"
"she'll": "she will"
}

# remove punctuation from string
Expand Down
7 changes: 7 additions & 0 deletions nlp/models/acc82.49/details.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
BATCH_SIZE: 128
NUM_EPOCHS: 20
SEQUENCE_MAX_LENGTH: 35
TOKENIZER_VOCAB_SIZE: 1000
TRAINED_AT: 2021-10-31 18:21:05.743503
TRAIN_TEST_SPLIT: 0.2
VALIDATION_SPLIT: 0.2
1 change: 1 addition & 0 deletions nlp/models/acc82.49/model.json
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"class_name": "Functional", "config": {"name": "model", "layers": [{"class_name": "InputLayer", "config": {"batch_input_shape": [null, 35], "dtype": "float32", "sparse": false, "ragged": false, "name": "inputs"}, "name": "inputs", "inbound_nodes": []}, {"class_name": "Embedding", "config": {"name": "embedding", "trainable": true, "batch_input_shape": [null, 35], "dtype": "float32", "input_dim": 1000, "output_dim": 128, "embeddings_initializer": {"class_name": "RandomUniform", "config": {"minval": -0.05, "maxval": 0.05, "seed": null}}, "embeddings_regularizer": null, "activity_regularizer": null, "embeddings_constraint": null, "mask_zero": false, "input_length": 35}, "name": "embedding", "inbound_nodes": [[["inputs", 0, 0, {}]]]}, {"class_name": "LSTM", "config": {"name": "lstm", "trainable": true, "dtype": "float32", "return_sequences": true, "return_state": false, "go_backwards": false, "stateful": false, "unroll": false, "time_major": false, "units": 128, "activation": "tanh", "recurrent_activation": "sigmoid", "use_bias": true, "kernel_initializer": {"class_name": "GlorotUniform", "config": {"seed": null}, "shared_object_id": 3}, "recurrent_initializer": {"class_name": "Orthogonal", "config": {"gain": 1.0, "seed": null}, "shared_object_id": 4}, "bias_initializer": {"class_name": "Zeros", "config": {}, "shared_object_id": 5}, "unit_forget_bias": true, "kernel_regularizer": null, "recurrent_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "recurrent_constraint": null, "bias_constraint": null, "dropout": 0.0, "recurrent_dropout": 0.0, "implementation": 2}, "name": "lstm", "inbound_nodes": [[["embedding", 0, 0, {}]]]}, {"class_name": "Dropout", "config": {"name": "dropout", "trainable": true, "dtype": "float32", "rate": 0.5, "noise_shape": null, "seed": null}, "name": "dropout", "inbound_nodes": [[["lstm", 0, 0, {}]]]}, {"class_name": "LSTM", "config": {"name": "lstm_1", "trainable": true, "dtype": "float32", "return_sequences": false, "return_state": false, "go_backwards": false, "stateful": false, "unroll": false, "time_major": false, "units": 128, "activation": "tanh", "recurrent_activation": "sigmoid", "use_bias": true, "kernel_initializer": {"class_name": "GlorotUniform", "config": {"seed": null}, "shared_object_id": 9}, "recurrent_initializer": {"class_name": "Orthogonal", "config": {"gain": 1.0, "seed": null}, "shared_object_id": 10}, "bias_initializer": {"class_name": "Zeros", "config": {}, "shared_object_id": 11}, "unit_forget_bias": true, "kernel_regularizer": null, "recurrent_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "recurrent_constraint": null, "bias_constraint": null, "dropout": 0.0, "recurrent_dropout": 0.0, "implementation": 2}, "name": "lstm_1", "inbound_nodes": [[["dropout", 0, 0, {}]]]}, {"class_name": "Dense", "config": {"name": "FC1", "trainable": true, "dtype": "float32", "units": 256, "activation": "linear", "use_bias": true, "kernel_initializer": {"class_name": "GlorotUniform", "config": {"seed": null}}, "bias_initializer": {"class_name": "Zeros", "config": {}}, "kernel_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "bias_constraint": null}, "name": "FC1", "inbound_nodes": [[["lstm_1", 0, 0, {}]]]}, {"class_name": "Dropout", "config": {"name": "dropout_1", "trainable": true, "dtype": "float32", "rate": 0.5, "noise_shape": null, "seed": null}, "name": "dropout_1", "inbound_nodes": [[["FC1", 0, 0, {}]]]}, {"class_name": "Dense", "config": {"name": "out_layer", "trainable": true, "dtype": "float32", "units": 1, "activation": "linear", "use_bias": true, "kernel_initializer": {"class_name": "GlorotUniform", "config": {"seed": null}}, "bias_initializer": {"class_name": "Zeros", "config": {}}, "kernel_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "bias_constraint": null}, "name": "out_layer", "inbound_nodes": [[["dropout_1", 0, 0, {}]]]}, {"class_name": "Activation", "config": {"name": "activation", "trainable": true, "dtype": "float32", "activation": "sigmoid"}, "name": "activation", "inbound_nodes": [[["out_layer", 0, 0, {}]]]}], "input_layers": [["inputs", 0, 0]], "output_layers": [["activation", 0, 0]]}, "keras_version": "2.5.0", "backend": "tensorflow"}
1 change: 1 addition & 0 deletions nlp/models/acc82.49/tokenizer.json

Large diffs are not rendered by default.

Binary file added nlp/models/acc82.49/weights.h5
Binary file not shown.
14 changes: 7 additions & 7 deletions nlp/net.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
from keras.layers import Activation
from keras.layers import Dense
from keras.layers import Dropout
from keras.layers import Embedding
from keras.layers import Input
from keras.layers import LSTM
from keras.models import Model
from tensorflow.keras.layers import Activation
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import Dropout
from tensorflow.keras.layers import Embedding
from tensorflow.keras.layers import Input
from tensorflow.keras.layers import LSTM
from tensorflow.keras.models import Model

# Define RNN Architecture
def RNN(max_seq_len, vocab_size):
Expand Down
10 changes: 5 additions & 5 deletions nlp/train.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,11 @@
import datetime

# -- Deep Learning Libraries --
from keras.callbacks import EarlyStopping
from keras.optimizers import RMSprop
from keras.preprocessing import sequence
from keras.preprocessing.text import Tokenizer
from keras.utils import to_categorical
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.optimizers import RMSprop
from tensorflow.keras.preprocessing import sequence
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.utils import to_categorical
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

Expand Down