import pandas as pd
from typing import List
from openpyxl import Workbook

df = pd.read_excel('./data_intermediate/analysis_result_action_result_stage2.xlsx')
# Initialize a dictionary to store the counts
result_counts = {}
print(df.columns)

result_category_list = [
    "完成需求确认与收集",
    "完成技术评估与测试",
    "完成商务谈判与合同准备",
    "完成内部审批与预算确认",
    "完成项目立项与采购流程",
    "完成关系建立与维护",
    "完成市场调研与竞争分析",
    "完成产品推广与市场活动",
    "完成技术支持与售后服务",
    "完成续约与增购谈判"
]

statistics = {}
for index, row in df.iterrows():
    sales_stage = row['Sales stage']
    action_result = row["销售动作行动结果"]
    # print(f"------{index}-----")
    # print(sales_stage)
    # print(action_result)
    if sales_stage not in statistics:
        statistics[sales_stage] = {}
        statistics[sales_stage]['total'] = 0
    statistics[sales_stage]['total'] += 1
    for category in result_category_list:
        count = action_result.count(category)
        statistics[sales_stage][category] = count
        
action_dict = {}
# Calculate the percentage distribution of each action within each stage
for stage, actions in statistics.items():
    total = actions.pop('total')
    print(f"Stage: {stage}")
    for action, count in actions.items():
        percentage = (count / total) * 100
        print(f"  {action}: {percentage:.2f}%")
        action_dict[action] = action