diff --git a/analyze_acv.py b/analyze_acv.py index d598594..75c21e6 100644 --- a/analyze_acv.py +++ b/analyze_acv.py @@ -1,5 +1,7 @@ import pandas as pd from typing import List +from openpyxl import Workbook + def strip_character(column_name, characters: List[str]): new_col_name = column_name @@ -26,28 +28,36 @@ def calc_acv_sum(df, acv_name, group_by_column): df_grouped_sum[acv_name] = df_grouped_sum[acv_name].apply(lambda x: '{:,}'.format(x)) return df_grouped_sum +def save_to_excel(dataframes, sheet_names, output_file): + with pd.ExcelWriter(output_file, engine='openpyxl') as writer: + for df, sheet_name in zip(dataframes, sheet_names): + df.to_excel(writer, sheet_name=sheet_name, index=False) # 读取赢单Excel文件 df_win = pd.read_excel('./data_src/pingcap_won.xlsx') acv_name = 'ACV' +# 创建一个列表来存储所有的数据帧和对应的sheet名称 +dataframes = [] +sheet_names = [] + # ACV by 客户分类 -print("------ACV by 行业------") df_win_grouped_by_industry_sum = calc_acv_sum(df_win, acv_name, '客户分类') -print(refine_content(df_win_grouped_by_industry_sum)) +dataframes.append(refine_content(df_win_grouped_by_industry_sum)) +sheet_names.append("ACV by 行业") # Group by customer industry and calculate the average ACV for each group -print("------平均ACV by 行业------") df_win_grouped_by_industry_mean = calc_acv_mean(df_win, acv_name, '客户分类') -print(refine_content(df_win_grouped_by_industry_mean)) +dataframes.append(refine_content(df_win_grouped_by_industry_mean)) +sheet_names.append("平均ACV by 行业") -print("------ACV by 子行业------") df_win_grouped_by_sub_industry_sum = calc_acv_sum(df_win, acv_name, '客户行业') -print(refine_content(df_win_grouped_by_sub_industry_sum)) +dataframes.append(refine_content(df_win_grouped_by_sub_industry_sum)) +sheet_names.append("ACV by 子行业") -print("------平均ACV by 子行业------") df_win_grouped_by_sub_industry_mean = calc_acv_mean(df_win, acv_name, '客户行业') -print(refine_content(df_win_grouped_by_sub_industry_mean)) +dataframes.append(refine_content(df_win_grouped_by_sub_industry_mean)) +sheet_names.append("平均ACV by 子行业") # 读取Excel文件 df = pd.read_excel('./data_src/pingcap_pipeline.xlsx') @@ -55,22 +65,24 @@ df = pd.read_excel('./data_src/pingcap_pipeline.xlsx') # 按照"客户分类"列分组,并计算ACV列的和 acv_name = '预估 ACV' -print("------预估ACV by 行业------") df_pipeline_grouped_by_industry_sum = calc_acv_sum(df, acv_name, '负责人所属行业') -print(refine_content(df_pipeline_grouped_by_industry_sum)) +dataframes.append(refine_content(df_pipeline_grouped_by_industry_sum)) +sheet_names.append("预估ACV by 行业") - -print("------平均预估ACV by 行业------") df_pipeline_grouped_by_industry_mean = calc_acv_mean(df, acv_name, '负责人所属行业') -print(refine_content(df_pipeline_grouped_by_industry_mean)) +dataframes.append(refine_content(df_pipeline_grouped_by_industry_mean)) +sheet_names.append("平均预估ACV by 行业") - -print("------预估ACV by 子行业------") df_pipeline_grouped_by_sub_industry_sum = calc_acv_sum(df, acv_name, '客户行业') -print(refine_content(df_pipeline_grouped_by_sub_industry_sum)) +dataframes.append(refine_content(df_pipeline_grouped_by_sub_industry_sum)) +sheet_names.append("预估ACV by 子行业") - - -print("------平均预估ACV by 子行业------") df_pipeline_grouped_by_sub_industry_mean = calc_acv_mean(df, acv_name, '客户行业') -print(refine_content(df_pipeline_grouped_by_sub_industry_mean)) \ No newline at end of file +dataframes.append(refine_content(df_pipeline_grouped_by_sub_industry_mean)) +sheet_names.append("平均预估ACV by 子行业") + +# 保存所有数据帧到一个Excel文件中 +output_file = './output/acv_analysis.xlsx' +save_to_excel(dataframes, sheet_names, output_file) + +print(f"Analysis results have been saved to {output_file}") \ No newline at end of file diff --git a/analyze_acv_dist.py b/analyze_acv_dist.py index b116625..1d37ce1 100644 --- a/analyze_acv_dist.py +++ b/analyze_acv_dist.py @@ -1,5 +1,6 @@ import pandas as pd from typing import List +from openpyxl import Workbook def strip_character(column_name, characters: List[str]): @@ -25,27 +26,25 @@ def get_acv_distribution(df, acv_name, industry_col_name): industry_acv_distribution.loc['Total'] = industry_acv_distribution.sum() return industry_acv_distribution -# Define the bins for ACV intervals +# 新增函数:将结果保存到Excel +def save_to_excel(dfs, sheet_names, output_file): + with pd.ExcelWriter(output_file, engine='openpyxl') as writer: + for df, sheet_name in zip(dfs, sheet_names): + df.to_excel(writer, sheet_name=sheet_name) + +# 读取数据 df = pd.read_excel('./data_src/pingcap_won.xlsx') - - -print('---------成单:ACV Distribution by industry--------') - -print(get_acv_distribution(df, 'ACV', '客户分类')) - - -print('---------成单:ACV Distribution by sub-industry--------') - -print(get_acv_distribution(df, 'ACV', '客户行业')) - - df_pipeline = pd.read_excel('./data_src/pingcap_pipeline.xlsx') -print('---------Pipeline:ACV Distribution by industry--------') +# 获取各种分布 +won_industry_dist = get_acv_distribution(df, 'ACV', '客户分类') +won_sub_industry_dist = get_acv_distribution(df, 'ACV', '客户行业') +pipeline_industry_dist = get_acv_distribution(df_pipeline, '预估 ACV', '负责人所属行业') +pipeline_sub_industry_dist = get_acv_distribution(df_pipeline, '预估 ACV', '客户行业') -print(get_acv_distribution(df_pipeline, '预估 ACV', '负责人所属行业')) +# 保存结果到Excel +dfs = [won_industry_dist, won_sub_industry_dist, pipeline_industry_dist, pipeline_sub_industry_dist] +sheet_names = ['成单-行业分布', '成单-子行业分布', 'Pipeline-行业分布', 'Pipeline-子行业分布'] +save_to_excel(dfs, sheet_names, './output/acv_distribution.xlsx') - -print('---------Pipeline:ACV Distribution by sub-industry--------') - -print(get_acv_distribution(df_pipeline, '预估 ACV', '客户行业')) \ No newline at end of file +print("ACV distribution analysis completed. Results saved in './output/acv_distribution.xlsx'") \ No newline at end of file diff --git a/output/acv_analysis.xlsx b/output/acv_analysis.xlsx new file mode 100644 index 0000000..9a7b705 Binary files /dev/null and b/output/acv_analysis.xlsx differ diff --git a/output/acv_distribution.xlsx b/output/acv_distribution.xlsx new file mode 100644 index 0000000..3b7f3d7 Binary files /dev/null and b/output/acv_distribution.xlsx differ