import pandas as pd from typing import List from openpyxl import Workbook def strip_character(column_name, characters: List[str]): new_col_name = column_name for character in characters: new_col_name = new_col_name.replace(character, '') new_col_name = new_col_name.strip() return new_col_name def refine_content(df): strip_character_list = [' ', '\n', ':', ':','其他'] for col in df.columns: df[col] = df[col].apply(lambda x: "其他" if isinstance(x, str) and strip_character(x, strip_character_list) == "" else (strip_character(x, strip_character_list) if isinstance(x, str) else x)) return df def calc_acv_mean(df, acv_name, group_by_column): df_grouped_mean = df.groupby(group_by_column)[acv_name].mean().fillna(0).astype(int).reset_index() df_grouped_mean = df_grouped_mean.sort_values(by=acv_name, ascending=False) df_grouped_mean[acv_name] = df_grouped_mean[acv_name].apply(lambda x: '{:,}'.format(x)) return df_grouped_mean def calc_acv_sum(df, acv_name, group_by_column): df_grouped_sum = df.groupby(group_by_column)[acv_name].sum().astype(int).reset_index() df_grouped_sum = df_grouped_sum.sort_values(by=acv_name, ascending=False) df_grouped_sum[acv_name] = df_grouped_sum[acv_name].apply(lambda x: '{:,}'.format(x)) return df_grouped_sum def calc_acv_sum_and_count(df, acv_name, group_by_column): df_grouped_sum = df.groupby(group_by_column)[acv_name].sum().astype(int).reset_index() df_grouped_count = df.groupby(group_by_column)[acv_name].count().reset_index() df_grouped_sum_count = pd.merge(df_grouped_sum, df_grouped_count, on=group_by_column, how='outer') return df_grouped_sum_count def save_to_excel(dataframes, sheet_names, output_file): with pd.ExcelWriter(output_file, engine='openpyxl') as writer: for df, sheet_name in zip(dataframes, sheet_names): df.to_excel(writer, sheet_name=sheet_name, index=False) # 读取赢单Excel文件 df_win = pd.read_excel('./data_src/pingcap_won.xlsx') acv_name = 'ACV' # 创建一个列表来存储所有的数据帧和对应的sheet名称 dataframes = [] sheet_names = [] # ACV by 客户分类 df_win_grouped_by_industry_sum = calc_acv_sum_and_count(df_win, acv_name, '客户分类') dataframes.append(refine_content(df_win_grouped_by_industry_sum)) sheet_names.append("ACV by 行业") # Group by customer industry and calculate the average ACV for each group df_win_grouped_by_industry_mean = calc_acv_mean(df_win, acv_name, '客户分类') dataframes.append(refine_content(df_win_grouped_by_industry_mean)) sheet_names.append("平均ACV by 行业") df_win_grouped_by_sub_industry_sum = calc_acv_sum_and_count(df_win, acv_name, '客户行业') dataframes.append(refine_content(df_win_grouped_by_sub_industry_sum)) sheet_names.append("ACV by 子行业") df_win_grouped_by_sub_industry_mean = calc_acv_mean(df_win, acv_name, '客户行业') dataframes.append(refine_content(df_win_grouped_by_sub_industry_mean)) sheet_names.append("平均ACV by 子行业") # ���取Excel文件 df = pd.read_excel('./data_src/pingcap_pipeline.xlsx') # 按照"客户分类"列分组,并计算ACV列的和 acv_name = '预估 ACV' df_pipeline_grouped_by_industry_sum = calc_acv_sum_and_count(df, acv_name, '负责人所属行业') dataframes.append(refine_content(df_pipeline_grouped_by_industry_sum)) sheet_names.append("预估ACV by 行业") df_pipeline_grouped_by_industry_mean = calc_acv_mean(df, acv_name, '负责人所属行业') dataframes.append(refine_content(df_pipeline_grouped_by_industry_mean)) sheet_names.append("平均预估ACV by 行业") df_pipeline_grouped_by_sub_industry_sum = calc_acv_sum_and_count(df, acv_name, '客户行业') dataframes.append(refine_content(df_pipeline_grouped_by_sub_industry_sum)) sheet_names.append("预估ACV by 子行业") df_pipeline_grouped_by_sub_industry_mean = calc_acv_mean(df, acv_name, '客户行业') dataframes.append(refine_content(df_pipeline_grouped_by_sub_industry_mean)) sheet_names.append("平均预估ACV by 子行业") # 保存所有数据帧到一个Excel文件中 output_file = './output/acv_analysis_v2(with count).xlsx' save_to_excel(dataframes, sheet_names, output_file) print(f"Analysis results have been saved to {output_file}")