market_assistant/calculate_action.py

57 lines
1.8 KiB
Python

import json
import pandas as pd
import re
import json # Add this line at the top of your script
from AgentProxy import AgentProxy
with open('./data_intermediate/mapping.json', 'r') as file:
json_data = file.read()
mapping = json.loads(json_data)
print(len(mapping))
# Read the Excel file
df = pd.read_excel('./data_intermediate/analysis_result_top200.xlsx')
statistic = {}
# Iterate through the "example" column
for index, row in df.iterrows():
text = row['销售动作分析']
# Extract text enclosed by '**', by using regular expression
print(index)
stage = row['Sales stage']
if stage not in statistic:
statistic[stage] = {}
statistic[stage]['total'] = 0
statistic[stage]['total'] += 1
matches = re.findall(r'\*\*(.*?)\*\*', text)
for match in matches:
print(f"{match}") # Print as markdown title # iterate all the matches
abstract_action = next((item['abstract_action'] for item in mapping if item['action'] == match), None)
if abstract_action:
print(f"Matched Abstract Action: {abstract_action}")
if abstract_action not in statistic[stage]:
statistic[stage][abstract_action] = 0
statistic[stage][abstract_action] += 1
if len(matches) > 0:
df.at[index, '销售动作'] = ','.join(matches)
else:
df.at[index, '销售动作'] = ''
# df.to_excel('analysis_result_top200.xlsx', index=False)
print(statistic)
action_dict = {}
# Calculate the percentage distribution of each action within each stage
for stage, actions in statistic.items():
total = actions.pop('total')
print(f"Stage: {stage}")
for action, count in actions.items():
percentage = (count / total) * 100
print(f" {action}: {percentage:.2f}%")
action_dict[action] = action