-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathpredictor.py
91 lines (76 loc) · 2.92 KB
/
predictor.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
import nltk
import random
from nltk.classify.scikitlearn import SklearnClassifier
import pickle
from sklearn.naive_bayes import MultinomialNB, BernoulliNB
from sklearn.linear_model import LogisticRegression, SGDClassifier
from sklearn.svm import SVC
from nltk.classify import ClassifierI
from statistics import mode
from nltk.tokenize import word_tokenize
class EnsembleClassifier(ClassifierI):
def __init__(self, *classifiers):
self._classifiers = classifiers
def classify(self, features):
votes = []
for c in self._classifiers:
v = c.classify(features)
votes.append(v)
return mode(votes)
def confidence(self, features):
votes = []
for c in self._classifiers:
v = c.classify(features)
votes.append(v)
choice_votes = votes.count(mode(votes))
conf = choice_votes / len(votes)
return conf
class EnsembleBuilder:
def __init__(self):
pass
def load_documents(self):
documents_f = open("pickled_algos/documents.pickle", "rb")
documents = pickle.load(documents_f)
documents_f.close()
return documents
def load_frequent_words(self):
word_features5k_f = open("pickled_algos/frequent_words5k.pickle", "rb")
word_features = pickle.load(word_features5k_f)
word_features5k_f.close()
return word_features
def load_model(self, file_path):
classifier_f = open(file_path, "rb")
classifier = pickle.load(classifier_f)
classifier_f.close()
return classifier
def predict_ensemble(self, features):
ONB_Clf = self.load_model('pickled_algos/ONB_clf.pickle')
MNB_Clf = self.load_model('pickled_algos/MNB_clf.pickle')
BNB_Clf = self.load_model('pickled_algos/BNB_clf.pickle')
LogReg_Clf = self.load_model('pickled_algos/LogReg_clf.pickle')
SGD_Clf = self.load_model('pickled_algos/SGD_clf.pickle')
ensemble_clf = EnsembleClassifier(ONB_Clf, MNB_Clf, BNB_Clf, LogReg_Clf, SGD_Clf)
result = []
classified = ensemble_clf.classify(features)
result.append(classified)
confidence = ensemble_clf.confidence(features)
result.append(confidence)
return result
def parse_text(self, document):
words = word_tokenize(document)
features = {}
word_features = self.load_frequent_words()
for w in word_features:
features[w] = (w in words)
return features
def make_prediction(self, text):
features = self.parse_text(text)
result = self.predict_ensemble(features)
revcat = ""
if result[0] == "pos":
revcat = "POSITIVE"
elif result[0] == "neg":
revcat = "NEGATIVE"
print("Result = {}".format(result))
output_string = "Your Review is {} with a Score of {}%.".format(revcat, result[1]*100)
return output_string