-
Notifications
You must be signed in to change notification settings - Fork 0
/
feature_extractor.py
60 lines (52 loc) · 2.71 KB
/
feature_extractor.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
import os
import traceback
from utils import list_files, MALWARE_DATASET_DIR_PATH, BENIGN_DATASET_DIR_PATH, clean_baksmali_output_dir
from apk import APK
from apis_manager import init_in_memory_dicts
from feature_extractor_dex import extract_dex_features
from feature_extractor_manifest import extract_manifest_features
from db_manager import is_apk_in_db
def analyse_apks(path: str, dataset_name: str, malignity: int, use_aapt=False, in_memory=False,
verify_existance=False) -> int:
apks = {APK(apk_path, dataset_name, malignity) for apk_path in list_files(path)}
count = 0
apks_number = len(apks)
analyse_log = open('analyse_log.txt', 'w')
analyse_log.write('####-> path :' + path + ' ####-> malignity' + str(malignity) + '\n')
if in_memory:
init_in_memory_dicts()
for apk in apks:
if verify_existance and is_apk_in_db(apk.get_name(), apk.get_dataset()):
print(apk.get_name() + " ... already in db")
continue
try:
extract_dex_features(apk, in_memory=in_memory)
extract_manifest_features(apk, use_aapt=use_aapt)
apk.save_apk_to_db()
count += 1
print(apk.get_name() + ' analyzed .... ' + str(count) + '/' + str(apks_number))
except Exception as exception:
print('Error for APK ' + apk.get_name())
analyse_log.write('Error for APK ' + apk.get_name() + '\n')
analyse_log.write('Exception \n' + str(exception) + '\n')
traceback.print_exc()
analyse_log.write('####-> analyzed :' + str(count) + ' ####-> total: ' + str(apks_number) + '\n')
analyse_log.close()
return count
def analyse_malware_dataset(dir_name: str, dataset_name: str, use_aapt=False, in_memory=False,
verify_existance=False) -> int:
dataset_dir_path = os.path.join(MALWARE_DATASET_DIR_PATH, dir_name)
return analyse_apks(dataset_dir_path, dataset_name, 1, use_aapt=use_aapt, in_memory=in_memory,
verify_existance=verify_existance)
def analyse_benign_dataset(dir_name: str, dataset_name, use_aapt=False, in_memory=False, verify_existance=False) -> int:
dataset_dir_path = os.path.join(BENIGN_DATASET_DIR_PATH, dir_name)
return analyse_apks(dataset_dir_path, dataset_name, 0, use_aapt=use_aapt, in_memory=in_memory,
verify_existance=verify_existance)
def analyse_apk(apk_path: str, dataset_name, malignity: int, in_memory=False, use_aapt=False):
apk = APK(apk_path, dataset_name, malignity)
if in_memory:
init_in_memory_dicts()
extract_manifest_features(apk, use_aapt=use_aapt)
extract_dex_features(apk)
for feature in apk.get_features():
print(feature)