market_assistant/calculate_action.py

import json
import pandas as pd
import re
import json  # Add this line at the top of your script
from AgentProxy import AgentProxy

with open('./data_intermediate/mapping.json', 'r') as file:
    json_data = file.read()

mapping = json.loads(json_data)

print(len(mapping))


# Read the Excel file
df = pd.read_excel('./data_intermediate/analysis_result_top200.xlsx')
statistic = {}

# Iterate through the "example" column
for index, row in df.iterrows():
    text = row['销售动作分析']
    # Extract text enclosed by '**', by using regular expression
    print(index)
    stage = row['Sales stage']
    if stage not in statistic:
        statistic[stage] = {}
        statistic[stage]['total'] = 0
    statistic[stage]['total'] += 1

    matches = re.findall(r'\*\*(.*?)\*\*', text)
    for match in matches:
        print(f"{match}")  # Print as markdown title        # iterate all the matches
        abstract_action = next((item['abstract_action'] for item in mapping if item['action'] == match), None)
        if abstract_action:
            print(f"Matched Abstract Action: {abstract_action}")

        if abstract_action not in statistic[stage]:
            statistic[stage][abstract_action] = 0
        statistic[stage][abstract_action] += 1
    if len(matches) > 0:
        df.at[index, '销售动作'] = ','.join(matches)
    else:
        df.at[index, '销售动作'] = ''

# df.to_excel('analysis_result_top200.xlsx', index=False)

print(statistic)

action_dict = {}
# Calculate the percentage distribution of each action within each stage
for stage, actions in statistic.items():
    total = actions.pop('total')
    print(f"Stage: {stage}")
    for action, count in actions.items():
        percentage = (count / total) * 100
        print(f"  {action}: {percentage:.2f}%")
        action_dict[action] = action