-
Notifications
You must be signed in to change notification settings - Fork 60
/
Copy pathLoadData.py
33 lines (29 loc) · 929 Bytes
/
LoadData.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
from numpy import *
import random
def load_rating_data(file_path='ml-100k/u.data'):
"""
load movie lens 100k ratings from original rating file.
need to download and put rating data in /data folder first.
Source: http://www.grouplens.org/
"""
prefer = []
for line in open(file_path, 'r'): # 打开指定文件
(userid, movieid, rating, ts) = line.split('\t') # 数据集中每行有4项
uid = int(userid)
mid = int(movieid)
rat = float(rating)
prefer.append([uid, mid, rat])
data = array(prefer)
return data
def spilt_rating_dat(data, size=0.2):
train_data = []
test_data = []
for line in data:
rand = random.random()
if rand < size:
test_data.append(line)
else:
train_data.append(line)
train_data = array(train_data)
test_data = array(test_data)
return train_data, test_data