forked from TsinghuaDatabaseGroup/DB-GPT
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathapi.py
88 lines (72 loc) · 4.33 KB
/
api.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
import os
import ast
# import sys
# sys.path.insert(0, '/home/wuy/DB/new_old_GPT/DB-GPT')
from multiagents.tools.index_advisor.index_selection.selection_utils.postgres_dbms import PostgresDatabaseConnector
from multiagents.tools.index_advisor.index_selection.selection_utils import selec_com
from multiagents.tools.index_advisor.configs import get_index_result
from multiagents.tools.metrics import advisor
from configs import POSTGRESQL_CONFIG
from multiagents.tools.metrics import get_workload_statistics
import pdb
from multiagents.initialization import LANGUAGE
FUNCTION_DEFINITION = {
"optimize_index_selection": {
"name": "optimize_index_selection",
"description":
"使用索引选择算法返回推荐的索引。" if LANGUAGE == "zh"
else "returns the recommended index by running the index selection algorithm.",
"parameters": {'type': 'object', 'properties': {}}
}
}
def optimize_index_selection(start_time: int, end_time: int):
"""optimize_index_selection(start_time : int, end_time : int) returns the recommended index by running the algorithm 'Extend'.
This method uses a recursive algorithm that considers only a limited subset of index candidates.
The method exploits structures and properties that are typical for real-world workloads and the performance of indexes.
It identifies beneficial indexes and does not construct similar indexes.
The recursion only realizes index selections/extensions with significant additional performance per size ratio.
The following is an example:
Thoughts: I will use the \\\'optimize_index_selection\\\' command to recommend the index for the given workload.
Reasoning: I need to recommend the effective index for the given workload. I will use the \\\'optimize_index_selection\\\' command to get the index from 'Extend' and return the result.
Plan: - Use the \\\'optimize_index_selection\\\' command to get the index.
Command: {"name": "optimize_index_selection",
"args": {"workload": "SELECT A.col1 from A join B where A.col2 = B.col2 and B.col3 > 2 group by A.col1"}}
Result: Command optimize_index_selection returned: "A#col2; B#col2,col3"
"""
# 1. Split the workloads by database names
databases = {}
workload_statistics = get_workload_statistics()
if isinstance(workload_statistics, str):
workload_statistics = ast.literal_eval(workload_statistics)
for query_template in workload_statistics:
database_name = query_template["dbname"]
if database_name not in databases:
databases[database_name] = []
databases[database_name].append({"sql": query_template["sql"], "frequency": query_template["calls"]})
index_advice = f"Recommended indexes: \n"
for dbname in databases:
# 2. load db settings
db_config = {"postgresql": POSTGRESQL_CONFIG}
db_config["postgresql"]["dbname"] = dbname
connector = PostgresDatabaseConnector(db_config, autocommit=True)
tables, columns, column_sampled_values = selec_com.get_columns_from_db(connector) # todo sample data for each column
# tables, columns = selec_com.get_columns_from_db(connector) # todo sample data for each column
# 3. read the workload queries
workload = databases[dbname] # list of dict
# script_path = os.path.abspath(__file__)
# script_dir = os.path.dirname(script_path)
# # read from the logged queries recorded by the match_diagnose_knowledge function
# workload_file = script_dir + \
# f"/index_selection/selection_data/data_info/job_templates.sql"
# workload = list()
# with open(workload_file, "r") as rf:
# for line in rf.readlines():
# workload.append(line.strip())
indexes, total_no_cost, total_ind_cost = get_index_result(advisor, workload, connector, columns, column_sampled_values)
if len(indexes) != 0:
index_advice += f"\t For {dbname}, the recommended indexes are: {indexes}, which reduces cost from {total_no_cost} to {total_ind_cost}.\n"
return index_advice
if __name__ == '__main__':
kwargs={'start_time': '2023-10-15 23:09:49', 'end_time': '2023-10-15 23:12:49'}
index_advice=optimize_index_selection(**kwargs)
print(index_advice)