diff --git a/analyze_acv.py b/analyze_acv.py index 3395d43..e75909f 100644 --- a/analyze_acv.py +++ b/analyze_acv.py @@ -14,60 +14,44 @@ def refine_content(df): df[col] = df[col].apply(lambda x: "其他" if strip_character(x, strip_character_list) == "" else strip_character(x, strip_character_list)) return df -def calc_acv_mean(df, acv_name, group_by_column): - df_grouped_mean = df.groupby(group_by_column)[acv_name].mean().fillna(0).astype(int).reset_index() - df_grouped_mean[acv_name] = df_grouped_mean[acv_name].apply(lambda x: '{:,}'.format(x)) - return df_grouped_mean - -def calc_acv_sum(df, acv_name, group_by_column): - df_grouped_sum = df.groupby(group_by_column)[acv_name].sum().astype(int).reset_index() - df_grouped_sum[acv_name] = df_grouped_sum[acv_name].apply(lambda x: '{:,}'.format(x)) - return df_grouped_sum - - # 读取赢单Excel文件 df_win = pd.read_excel('./data_src/pingcap_won.xlsx') acv_name = 'ACV' # ACV by 客户分类 -# df_win_grouped_by_industry = df_win.groupby('客户分类')[acv_name].sum().astype(int).reset_index() -# df_win_grouped_by_industry[acv_name] = df_win_grouped_by_industry[acv_name].apply(lambda x: '{:,}'.format(x)) +df_win_grouped_by_industry = df_win.groupby('客户分类')[acv_name].sum().astype(int).reset_index() +df_win_grouped_by_industry[acv_name] = df_win_grouped_by_industry[acv_name].apply(lambda x: '{:,}'.format(x)) print("------ACV by 行业------") -df_win_grouped_by_industry = calc_acv_sum(df_win, acv_name, '客户分类') print(refine_content(df_win_grouped_by_industry)) # Group by customer industry and calculate the average ACV for each group -# won_average_acv_by_industry = df_win.groupby('客户分类')[acv_name].mean().astype(int).reset_index() -# won_average_acv_by_industry[acv_name] = won_average_acv_by_industry[acv_name].apply(lambda x: '{:,}'.format(x)) +won_average_acv_by_industry = df_win.groupby('客户分类')[acv_name].mean().astype(int).reset_index() +won_average_acv_by_industry[acv_name] = won_average_acv_by_industry[acv_name].apply(lambda x: '{:,}'.format(x)) print("------平均ACV by 行业------") -df_win_grouped_by_industry_mean = calc_acv_mean(df_win, acv_name, '客户分类') -print(refine_content(df_win_grouped_by_industry_mean)) +print(refine_content(won_average_acv_by_industry)) -# df_win_grouped_by_sub_industry = df_win.groupby('客户行业')[acv_name].sum().astype(int).reset_index() -# df_win_grouped_by_sub_industry[acv_name] = df_win_grouped_by_sub_industry[acv_name].apply(lambda x: '{:,}'.format(x)) +df_win_grouped_by_sub_industry = df_win.groupby('客户行业')[acv_name].sum().astype(int).reset_index() +df_win_grouped_by_sub_industry[acv_name] = df_win_grouped_by_sub_industry[acv_name].apply(lambda x: '{:,}'.format(x)) print("------ACV by 子行业------") -df_win_grouped_by_sub_industry = calc_acv_sum(df_win, acv_name, '客户行业') print(refine_content(df_win_grouped_by_sub_industry)) -# won_average_acv_by_sub_industry = df_win.groupby('客户行业')[acv_name].mean().astype(int).reset_index() -# won_average_acv_by_sub_industry[acv_name] = won_average_acv_by_sub_industry[acv_name].apply(lambda x: '{:,}'.format(x)) +won_average_acv_by_sub_industry = df_win.groupby('客户行业')[acv_name].mean().astype(int).reset_index() +won_average_acv_by_sub_industry[acv_name] = won_average_acv_by_sub_industry[acv_name].apply(lambda x: '{:,}'.format(x)) print("------平均ACV by 子行业------") -df_win_grouped_by_sub_industry_mean = calc_acv_mean(df_win, acv_name, '客户行业') -print(refine_content(df_win_grouped_by_sub_industry_mean)) +print(refine_content(won_average_acv_by_sub_industry)) # 读取Excel文件 df = pd.read_excel('./data_src/pingcap_pipeline.xlsx') # 按照"客户分类"列分组,并计算ACV列的和 acv_name = '预估 ACV' -# df_pipeline_grouped_by_industry = df.groupby('负责人所属行业')[acv_name].sum().astype(int).reset_index() -# df_pipeline_grouped_by_industry = df_pipeline_grouped_by_industry.sort_values(by=acv_name, ascending=False) +df_pipeline_grouped_by_industry = df.groupby('负责人所属行业')[acv_name].sum().astype(int).reset_index() +df_pipeline_grouped_by_industry = df_pipeline_grouped_by_industry.sort_values(by=acv_name, ascending=False) +df_pipeline_grouped_by_industry[acv_name] = df_pipeline_grouped_by_industry[acv_name].apply(lambda x: '{:,}'.format(x)) print("------预估ACV by 行业------") -df_pipeline_grouped_by_industry_sum = calc_acv_sum(df, acv_name, '负责人所属行业') -df_pipeline_grouped_by_industry_sum[acv_name] = df_pipeline_grouped_by_industry_sum[acv_name].apply(lambda x: '{:,}'.format(x)) -print(refine_content(df_pipeline_grouped_by_industry_sum)) +print(refine_content(df_pipeline_grouped_by_industry)) pipeline_average_acv_by_industry = df.groupby('负责人所属行业')[acv_name].mean().astype(int).reset_index() pipeline_average_acv_by_industry[acv_name] = pipeline_average_acv_by_industry[acv_name].apply(lambda x: '{:,}'.format(x))