-
Notifications
You must be signed in to change notification settings - Fork 0
/
dimension_manager.py
123 lines (101 loc) · 5.96 KB
/
dimension_manager.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
from apk import APK
from db_context_managers import OpenReaderCursor
from utils import SQLITE_DB_PATH
import sqlite3
_SQL_create_dimensions_table = """CREATE TABLE dimensions
AS
SELECT features.type AS type,
features.name AS name,
sum(apks.malignity) / (SELECT 1.0* sum(apks.malignity) FROM apks
WHERE apks.dataset IN ({0})) AS malware_usage,
(sum(apks.malignity) * 1.0) / count(features.apk_id) AS malware_ratio
FROM apks
JOIN
features
ON apks.id = features.apk_id
WHERE apks.dataset IN ({1})
GROUP BY features.type, features.name"""
_SQL_drop_dimensions_table = "DROP TABLE IF EXISTS dimensions"
_SQL_get_dimensions = """SELECT printf("%s::%s", type, name) AS dimension
FROM dimensions"""
_SQL_get_top_usage_ratio_dimensions = """SELECT printf("%s::%s", type, name) AS dimension
FROM dimensions
WHERE malware_usage >= :malware_usage
AND
malware_ratio >= :malware_ratio
ORDER BY malware_usage DESC, malware_ratio DESC"""
_SQL_get_top_ratio_usage_dimensions = """SELECT printf("%s::%s", type, name) AS dimension
FROM dimensions
WHERE malware_usage >= :malware_usage
AND
malware_ratio >= :malware_ratio
ORDER BY malware_ratio DESC, malware_usage DESC"""
_SQL_get_top_ratio_usage_dimension_in_type = """SELECT printf("%s::%s", type, name) AS dimension
FROM dimensions
WHERE malware_usage >= :malware_usage
AND
malware_ratio >= :malware_ratio
AND
type = :type
ORDER BY malware_ratio DESC, malware_usage DESC"""
_SQL_get_top_usage_ratio_dimension_in_type = """SELECT printf("%s::%s", type, name) AS dimension
FROM dimensions
WHERE malware_usage >= :malware_usage
AND
malware_ratio >= :malware_ratio
AND
type = :type
ORDER BY malware_usage DESC, malware_ratio DESC"""
def create_dimensions_table_from_datasets(datasets: list):
connection = sqlite3.connect(SQLITE_DB_PATH)
cursor = connection.cursor()
cursor.execute(_SQL_drop_dimensions_table)
connection.commit()
dataset_in_clause_string = ",".join(["'" + dataset + "'" for dataset in datasets])
sqlite_creation_table_statement = _SQL_create_dimensions_table.format(dataset_in_clause_string,
dataset_in_clause_string)
cursor.execute(sqlite_creation_table_statement)
connection.commit()
cursor.close()
connection.close()
def get_all_dimensions() -> list:
"""returns all the dimensions of the apks in the DB"""
with OpenReaderCursor(_SQL_get_dimensions) as cursor:
return [dimension[0] for dimension in cursor.fetchall()]
def get_top_usage_ratio_dimensions(malware_usage=0, malware_ratio=0, limit=None) -> list:
return get_limited_dimensions(_SQL_get_top_usage_ratio_dimensions,
{'malware_usage': malware_usage,
'malware_ratio': malware_ratio},
limit=limit)
def get_top_ratio_usage_dimensions(malware_usage=0, malware_ratio=0, limit=None) -> list:
return get_limited_dimensions(_SQL_get_top_ratio_usage_dimensions,
{'malware_usage': malware_usage,
'malware_ratio': malware_ratio},
limit=limit)
def get_top_ratio_usage_dimensions_by_types(malware_usage=0, malware_ratio=0, limit=None) -> list:
dimensions = []
for type in APK.get_feature_types():
type_dimensions = get_limited_dimensions(_SQL_get_top_ratio_usage_dimension_in_type,
{'malware_usage': malware_usage,
'malware_ratio': malware_ratio,
'type': type},
limit=limit)
dimensions.extend(type_dimensions)
return dimensions
def get_top_usage_ratio_dimensions_by_types(malware_usage=0, malware_ratio=0, limit=None) -> list:
dimensions = []
for type in APK.get_feature_types():
type_dimensions = get_limited_dimensions(_SQL_get_top_usage_ratio_dimension_in_type,
{'malware_usage': malware_usage,
'malware_ratio': malware_ratio,
'type': type},
limit=limit)
dimensions.extend(type_dimensions)
return dimensions
def get_limited_dimensions(sql_get_top_dimensions, sql_arg_dict, limit=None) -> list:
with OpenReaderCursor(sql_get_top_dimensions, sql_arg_dict) as curosr:
dimensions = [dimension[0] for dimension in curosr.fetchall()]
if limit and limit <= len(dimensions):
return dimensions[:limit]
else:
return dimensions