57 lines
1.8 KiB
Python
57 lines
1.8 KiB
Python
import json
|
|
import pandas as pd
|
|
import re
|
|
import json # Add this line at the top of your script
|
|
from AgentProxy import AgentProxy
|
|
|
|
with open('./data_intermediate/mapping.json', 'r') as file:
|
|
json_data = file.read()
|
|
|
|
mapping = json.loads(json_data)
|
|
|
|
print(len(mapping))
|
|
|
|
|
|
# Read the Excel file
|
|
df = pd.read_excel('./data_intermediate/analysis_result_top200.xlsx')
|
|
statistic = {}
|
|
|
|
# Iterate through the "example" column
|
|
for index, row in df.iterrows():
|
|
text = row['销售动作分析']
|
|
# Extract text enclosed by '**', by using regular expression
|
|
print(index)
|
|
stage = row['Sales stage']
|
|
if stage not in statistic:
|
|
statistic[stage] = {}
|
|
statistic[stage]['total'] = 0
|
|
statistic[stage]['total'] += 1
|
|
|
|
matches = re.findall(r'\*\*(.*?)\*\*', text)
|
|
for match in matches:
|
|
print(f"{match}") # Print as markdown title # iterate all the matches
|
|
abstract_action = next((item['abstract_action'] for item in mapping if item['action'] == match), None)
|
|
if abstract_action:
|
|
print(f"Matched Abstract Action: {abstract_action}")
|
|
|
|
if abstract_action not in statistic[stage]:
|
|
statistic[stage][abstract_action] = 0
|
|
statistic[stage][abstract_action] += 1
|
|
if len(matches) > 0:
|
|
df.at[index, '销售动作'] = ','.join(matches)
|
|
else:
|
|
df.at[index, '销售动作'] = ''
|
|
|
|
# df.to_excel('analysis_result_top200.xlsx', index=False)
|
|
|
|
print(statistic)
|
|
|
|
action_dict = {}
|
|
# Calculate the percentage distribution of each action within each stage
|
|
for stage, actions in statistic.items():
|
|
total = actions.pop('total')
|
|
print(f"Stage: {stage}")
|
|
for action, count in actions.items():
|
|
percentage = (count / total) * 100
|
|
print(f" {action}: {percentage:.2f}%")
|
|
action_dict[action] = action |