forked from hichenway/stock_predict_with_LSTM
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathstock_predict_2.py
executable file
·134 lines (118 loc) · 5.92 KB
/
stock_predict_2.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
#coding=utf-8
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
rnn_unit=10 #隐层数量
input_size=7
output_size=1
lr=0.0006 #学习率
#——————————————————导入数据——————————————————————
f=open('dataset_2.csv')
df=pd.read_csv(f) #读入股票数据
data=df.iloc[:,2:10].values #取第3-10列
#获取训练集
def get_train_data(batch_size=60,time_step=20,train_begin=0,train_end=5800):
batch_index=[]
data_train=data[train_begin:train_end]
normalized_train_data=(data_train-np.mean(data_train,axis=0))/np.std(data_train,axis=0) #标准化
train_x,train_y=[],[] #训练集
for i in range(len(normalized_train_data)-time_step):
if i % batch_size==0:
batch_index.append(i)
x=normalized_train_data[i:i+time_step,:7]
y=normalized_train_data[i:i+time_step,7,np.newaxis]
train_x.append(x.tolist())
train_y.append(y.tolist())
batch_index.append((len(normalized_train_data)-time_step))
return batch_index,train_x,train_y
#获取测试集
def get_test_data(time_step=20,test_begin=5800):
data_test=data[test_begin:]
mean=np.mean(data_test,axis=0)
std=np.std(data_test,axis=0)
normalized_test_data=(data_test-mean)/std #标准化
size=(len(normalized_test_data)+time_step-1)//time_step #有size个sample
test_x,test_y=[],[]
for i in range(size-1):
x=normalized_test_data[i*time_step:(i+1)*time_step,:7]
y=normalized_test_data[i*time_step:(i+1)*time_step,7]
test_x.append(x.tolist())
test_y.extend(y)
test_x.append((normalized_test_data[(i+1)*time_step:,:7]).tolist())
test_y.extend((normalized_test_data[(i+1)*time_step:,7]).tolist())
return mean,std,test_x,test_y
#——————————————————定义神经网络变量——————————————————
#输入层、输出层权重、偏置
weights={
'in':tf.Variable(tf.random_normal([input_size,rnn_unit])),
'out':tf.Variable(tf.random_normal([rnn_unit,1]))
}
biases={
'in':tf.Variable(tf.constant(0.1,shape=[rnn_unit,])),
'out':tf.Variable(tf.constant(0.1,shape=[1,]))
}
#——————————————————定义神经网络变量——————————————————
def lstm(X):
batch_size=tf.shape(X)[0]
time_step=tf.shape(X)[1]
w_in=weights['in']
b_in=biases['in']
input=tf.reshape(X,[-1,input_size]) #需要将tensor转成2维进行计算,计算后的结果作为隐藏层的输入
input_rnn=tf.matmul(input,w_in)+b_in
input_rnn=tf.reshape(input_rnn,[-1,time_step,rnn_unit]) #将tensor转成3维,作为lstm cell的输入
cell=tf.nn.rnn_cell.BasicLSTMCell(rnn_unit)
init_state=cell.zero_state(batch_size,dtype=tf.float32)
output_rnn,final_states=tf.nn.dynamic_rnn(cell, input_rnn,initial_state=init_state, dtype=tf.float32)
output=tf.reshape(output_rnn,[-1,rnn_unit])
w_out=weights['out']
b_out=biases['out']
pred=tf.matmul(output,w_out)+b_out
return pred,final_states
#————————————————训练模型————————————————————
def train_lstm(batch_size=60,time_step=20,train_begin=2000,train_end=5800):
X=tf.placeholder(tf.float32, shape=[None,time_step,input_size])
Y=tf.placeholder(tf.float32, shape=[None,time_step,output_size])
batch_index,train_x,train_y=get_train_data(batch_size,time_step,train_begin,train_end)
with tf.variable_scope("sec_lstm"):
pred,_=lstm(X)
loss=tf.reduce_mean(tf.square(tf.reshape(pred,[-1])-tf.reshape(Y, [-1])))
train_op=tf.train.AdamOptimizer(lr).minimize(loss)
saver=tf.train.Saver(tf.global_variables(),max_to_keep=15)
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
for i in range(10): #这个迭代次数,可以更改,越大预测效果会更好,但需要更长时间
for step in range(len(batch_index)-1):
_,loss_=sess.run([train_op,loss],feed_dict={X:train_x[batch_index[step]:batch_index[step+1]],Y:train_y[batch_index[step]:batch_index[step+1]]})
print("Number of iterations:",i," loss:",loss_)
print("model_save: ",saver.save(sess,'model_save2\\modle.ckpt'))
#我是在window下跑的,这个地址是存放模型的地方,模型参数文件名为modle.ckpt
#在Linux下面用 'model_save2/modle.ckpt'
print("The train has finished")
train_lstm()
#————————————————预测模型————————————————————
def prediction(time_step=20):
X=tf.placeholder(tf.float32, shape=[None,time_step,input_size])
mean,std,test_x,test_y=get_test_data(time_step)
with tf.variable_scope("sec_lstm",reuse=True):
pred,_=lstm(X)
saver=tf.train.Saver(tf.global_variables())
with tf.Session() as sess:
#参数恢复
module_file = tf.train.latest_checkpoint('model_save2')
saver.restore(sess, module_file)
test_predict=[]
for step in range(len(test_x)-1):
prob=sess.run(pred,feed_dict={X:[test_x[step]]})
predict=prob.reshape((-1))
test_predict.extend(predict)
test_y=np.array(test_y)*std[7]+mean[7]
test_predict=np.array(test_predict)*std[7]+mean[7]
acc=np.average(np.abs(test_predict-test_y[:len(test_predict)])/test_y[:len(test_predict)]) #偏差程度
print("The accuracy of this predict:",acc)
#以折线图表示结果
plt.figure()
plt.plot(list(range(len(test_predict))), test_predict, color='b',)
plt.plot(list(range(len(test_y))), test_y, color='r')
plt.show()
prediction()