import pandas as pd from typing import List from openpyxl import Workbook df = pd.read_excel('./data_intermediate/analysis_result_action_result_stage2.xlsx') # Initialize a dictionary to store the counts result_counts = {} print(df.columns) result_category_list = [ "完成需求确认与收集", "完成技术评估与测试", "完成商务谈判与合同准备", "完成内部审批与预算确认", "完成项目立项与采购流程", "完成关系建立与维护", "完成市场调研与竞争分析", "完成产品推广与市场活动", "完成技术支持与售后服务", "完成续约与增购谈判" ] statistics = {} for index, row in df.iterrows(): sales_stage = row['Sales stage'] action_result = row["销售动作行动结果"] # print(f"------{index}-----") # print(sales_stage) # print(action_result) if sales_stage not in statistics: statistics[sales_stage] = {} statistics[sales_stage]['total'] = 0 statistics[sales_stage]['total'] += 1 for category in result_category_list: count = action_result.count(category) statistics[sales_stage][category] = count action_dict = {} # Calculate the percentage distribution of each action within each stage for stage, actions in statistics.items(): total = actions.pop('total') print(f"Stage: {stage}") for action, count in actions.items(): percentage = (count / total) * 100 print(f" {action}: {percentage:.2f}%") action_dict[action] = action