import json import pandas as pd import re import json # Add this line at the top of your script from AgentProxy import AgentProxy with open('./data_intermediate/mapping.json', 'r') as file: json_data = file.read() mapping = json.loads(json_data) print(len(mapping)) # Read the Excel file df = pd.read_excel('./data_intermediate/analysis_result_top200.xlsx') statistic = {} # Iterate through the "example" column for index, row in df.iterrows(): text = row['销售动作分析'] # Extract text enclosed by '**', by using regular expression print(index) stage = row['Sales stage'] if stage not in statistic: statistic[stage] = {} statistic[stage]['total'] = 0 statistic[stage]['total'] += 1 matches = re.findall(r'\*\*(.*?)\*\*', text) for match in matches: print(f"{match}") # Print as markdown title # iterate all the matches abstract_action = next((item['abstract_action'] for item in mapping if item['action'] == match), None) if abstract_action: print(f"Matched Abstract Action: {abstract_action}") if abstract_action not in statistic[stage]: statistic[stage][abstract_action] = 0 statistic[stage][abstract_action] += 1 if len(matches) > 0: df.at[index, '销售动作'] = ','.join(matches) else: df.at[index, '销售动作'] = '' # df.to_excel('analysis_result_top200.xlsx', index=False) print(statistic) action_dict = {} # Calculate the percentage distribution of each action within each stage for stage, actions in statistic.items(): total = actions.pop('total') print(f"Stage: {stage}") for action, count in actions.items(): percentage = (count / total) * 100 print(f" {action}: {percentage:.2f}%") action_dict[action] = action