重构ACV分析代码,将类似代码合为function

This commit is contained in:
Tiger Ren 2024-08-23 18:08:45 +08:00
parent 4ef0361f89
commit a2f8c0b3a0
1 changed files with 33 additions and 27 deletions

View File

@ -14,57 +14,63 @@ def refine_content(df):
df[col] = df[col].apply(lambda x: "其他" if strip_character(x, strip_character_list) == "" else strip_character(x, strip_character_list))
return df
def calc_acv_mean(df, acv_name, group_by_column):
df_grouped_mean = df.groupby(group_by_column)[acv_name].mean().fillna(0).astype(int).reset_index()
df_grouped_mean = df_grouped_mean.sort_values(by=acv_name, ascending=False)
df_grouped_mean[acv_name] = df_grouped_mean[acv_name].apply(lambda x: '{:,}'.format(x))
return df_grouped_mean
def calc_acv_sum(df, acv_name, group_by_column):
df_grouped_sum = df.groupby(group_by_column)[acv_name].sum().astype(int).reset_index()
df_grouped_sum = df_grouped_sum.sort_values(by=acv_name, ascending=False)
df_grouped_sum[acv_name] = df_grouped_sum[acv_name].apply(lambda x: '{:,}'.format(x))
return df_grouped_sum
# 读取赢单Excel文件
df_win = pd.read_excel('./data_src/pingcap_won.xlsx')
acv_name = 'ACV'
# ACV by 客户分类
df_win_grouped_by_industry = df_win.groupby('客户分类')[acv_name].sum().astype(int).reset_index()
df_win_grouped_by_industry[acv_name] = df_win_grouped_by_industry[acv_name].apply(lambda x: '{:,}'.format(x))
print("------ACV by 行业------")
print(refine_content(df_win_grouped_by_industry))
df_win_grouped_by_industry_sum = calc_acv_sum(df_win, acv_name, '客户分类')
print(refine_content(df_win_grouped_by_industry_sum))
# Group by customer industry and calculate the average ACV for each group
won_average_acv_by_industry = df_win.groupby('客户分类')[acv_name].mean().astype(int).reset_index()
won_average_acv_by_industry[acv_name] = won_average_acv_by_industry[acv_name].apply(lambda x: '{:,}'.format(x))
print("------平均ACV by 行业------")
print(refine_content(won_average_acv_by_industry))
df_win_grouped_by_industry_mean = calc_acv_mean(df_win, acv_name, '客户分类')
print(refine_content(df_win_grouped_by_industry_mean))
df_win_grouped_by_sub_industry = df_win.groupby('客户行业')[acv_name].sum().astype(int).reset_index()
df_win_grouped_by_sub_industry[acv_name] = df_win_grouped_by_sub_industry[acv_name].apply(lambda x: '{:,}'.format(x))
print("------ACV by 子行业------")
print(refine_content(df_win_grouped_by_sub_industry))
df_win_grouped_by_sub_industry_sum = calc_acv_sum(df_win, acv_name, '客户行业')
print(refine_content(df_win_grouped_by_sub_industry_sum))
won_average_acv_by_sub_industry = df_win.groupby('客户行业')[acv_name].mean().astype(int).reset_index()
won_average_acv_by_sub_industry[acv_name] = won_average_acv_by_sub_industry[acv_name].apply(lambda x: '{:,}'.format(x))
print("------平均ACV by 子行业------")
print(refine_content(won_average_acv_by_sub_industry))
df_win_grouped_by_sub_industry_mean = calc_acv_mean(df_win, acv_name, '客户行业')
print(refine_content(df_win_grouped_by_sub_industry_mean))
# 读取Excel文件
df = pd.read_excel('./data_src/pingcap_pipeline.xlsx')
# 按照"客户分类"列分组并计算ACV列的和
acv_name = '预估 ACV'
df_pipeline_grouped_by_industry = df.groupby('负责人所属行业')[acv_name].sum().astype(int).reset_index()
df_pipeline_grouped_by_industry = df_pipeline_grouped_by_industry.sort_values(by=acv_name, ascending=False)
df_pipeline_grouped_by_industry[acv_name] = df_pipeline_grouped_by_industry[acv_name].apply(lambda x: '{:,}'.format(x))
print("------预估ACV by 行业------")
print(refine_content(df_pipeline_grouped_by_industry))
df_pipeline_grouped_by_industry_sum = calc_acv_sum(df, acv_name, '负责人所属行业')
print(refine_content(df_pipeline_grouped_by_industry_sum))
pipeline_average_acv_by_industry = df.groupby('负责人所属行业')[acv_name].mean().astype(int).reset_index()
pipeline_average_acv_by_industry[acv_name] = pipeline_average_acv_by_industry[acv_name].apply(lambda x: '{:,}'.format(x))
print("------平均预估ACV by 行业------")
print(refine_content(pipeline_average_acv_by_industry))
df_pipeline_grouped_by_industry_mean = calc_acv_mean(df, acv_name, '负责人所属行业')
print(refine_content(df_pipeline_grouped_by_industry_mean))
df_pipeline_grouped_by_sub_industry = df.groupby('客户行业')[acv_name].sum().astype(int).reset_index()
df_pipeline_grouped_by_sub_industry[acv_name] = df_pipeline_grouped_by_sub_industry[acv_name].apply(lambda x: '{:,}'.format(x))
print("------预估ACV by 子行业------")
print(refine_content(df_pipeline_grouped_by_sub_industry))
df_pipeline_grouped_by_sub_industry_sum = calc_acv_sum(df, acv_name, '客户行业')
print(refine_content(df_pipeline_grouped_by_sub_industry_sum))
pipeline_average_acv_by_sub_industry = df.groupby('客户行业')[acv_name].mean().fillna(0).astype(int).reset_index()
pipeline_average_acv_by_sub_industry[acv_name] = pipeline_average_acv_by_sub_industry[acv_name].apply(lambda x: '{:,}'.format(x))
print("------平均预估ACV by 子行业------")
print(refine_content(pipeline_average_acv_by_sub_industry))
df_pipeline_grouped_by_sub_industry_mean = calc_acv_mean(df, acv_name, '客户行业')
print(refine_content(df_pipeline_grouped_by_sub_industry_mean))