import torch.optim as optim
from torch.distributions import Categorical
torch.autograd.set_detect_anomaly(True)
def generate_query_log(num_queries=1000):
fields = ["id", "name", "age", "email"]
conditions = ["= ?", "> ?", "< ?", "LIKE ?"]
query_log = pd.DataFrame({
"query_id": range(1, num_queries + 1),
"num_fields": [random.randint(1, len(fields)) for _ in range(num_queries)],
"num_conditions": [random.randint(1, 4) for _ in range(num_queries)],
"use_index": [random.choice([True, False]) for _ in range(num_queries)],
"data_volume": [random.randint(100, 10000) for _ in range(num_queries)]
base_resource_usage_per_field = 0.05
base_resource_usage_per_condition = 0.15
base_resource_usage_per_volume_unit = 0.001
query_log["resource_usage"] = query_log["num_fields"] * base_resource_usage_per_field + query_log["num_conditions"] * base_resource_usage_per_condition + query_log["data_volume"] * base_resource_usage_per_volume_unit
base_execution_time = 2.0
execution_time_factor = 0.5
query_log["execution_time"] = base_execution_time + (query_log["resource_usage"] * execution_time_factor)
query_log["query"] = query_log.apply(lambda row: f"SELECT {', '.join(random.sample(fields, int(row['num_fields'])))} FROM users WHERE {' AND '.join([random.choice(fields) + ' ' + random.choice(conditions) for _ in range(int(row['num_conditions']))])}", axis=1)
def extract_features(row):
query_length = len(row['query'])
num_fields = row['num_fields']
num_conditions = row['num_conditions']
use_index = 1 if row['use_index'] else 0
data_volume = row['data_volume']
features = [query_length, num_fields, num_conditions, use_index, data_volume]
class PolicyNetwork(nn.Module):
def __init__(self, num_inputs, num_actions):
super(PolicyNetwork, self).__init__()
self.fc1 = nn.Linear(num_inputs, 128)
self.fc2 = nn.Linear(128, 64)
self.fc3 = nn.Linear(64, num_actions)
x = torch.relu(self.fc1(x))
x = torch.relu(self.fc2(x))
return torch.softmax(self.fc3(x), dim=1)
def reward_function(execution_time, resource_usage, max_time, max_resource_usage):
normalized_time = execution_time / max_time
normalized_resource = resource_usage / max_resource_usage
reward = (1 / normalized_time) * time_weight - normalized_resource * resource_weight
def simulate_index_usage(execution_time, resource_usage):
time_reduction_factor = 0.5
resource_usage_increase_factor = 1.2
adjusted_execution_time = execution_time * (1 - time_reduction_factor)
adjusted_resource_usage = resource_usage * resource_usage_increase_factor
return adjusted_execution_time, adjusted_resource_usage
def simulate_without_index(execution_time, resource_usage):
return execution_time, resource_usage
def train(policy_net, optimizer, query_log, num_episodes=1000):
max_execution_time = query_log['execution_time'].max()
max_resource_usage = query_log['resource_usage'].max()
for episode in range(num_episodes):
for _, row in query_log.iterrows():
state = torch.tensor([extract_features(row)], dtype=torch.float32)
if random.random() < epsilon:
action = torch.tensor([random.choice([0, 1])], dtype=torch.int)
action_probs = policy_net(state)
m = Categorical(action_probs)
execution_time, resource_usage = simulate_index_usage(row['execution_time'], row['resource_usage'])
execution_time, resource_usage = simulate_without_index(row['execution_time'], row['resource_usage'])
reward = reward_function(execution_time, resource_usage, max_execution_time, max_resource_usage)
log_prob = Categorical(policy_net(state)).log_prob(action)
loss = -log_prob * reward
print(f'Episode {episode} Total Reward: {total_reward}')
query_log = generate_query_log()
policy_net = PolicyNetwork(num_features, num_actions)
optimizer = optim.Adam(policy_net.parameters(), lr=0.01)
train(policy_net, optimizer, query_log)
