diff --git a/ExcelHelper.py b/ExcelHelper.py index 3815dff..39771b3 100644 --- a/ExcelHelper.py +++ b/ExcelHelper.py @@ -59,22 +59,3 @@ class ExcelHelper: print(f"Extracted columns saved to {new_filename}") - - -# # 示例数据 -# data = [ -# [ -# {'title': 'Title 1', 'content': 'Content 1'}, -# {'title': 'Title 2', 'content': 'Content 2'}, -# {'title': 'Title 3', 'content': 'Content 3'} -# ], -# [ -# {'title': 'Title 4', 'content': 'Content 4'}, -# {'title': 'Title 5', 'content': 'Content 5'}, -# {'title': 'Title 6', 'content': 'Content 6'} -# ] -# ] - -# # 创建 ExcelHelper 实例并生成 Excel 文件 -# excel_helper = ExcelHelper(data) -# excel_helper.create_excel('output.xlsx') diff --git a/analysis.py b/analysis.py deleted file mode 100644 index 262ebd8..0000000 --- a/analysis.py +++ /dev/null @@ -1,15 +0,0 @@ -import pandas as pd - -# 读取Excel文件 -df = pd.read_excel('pingcap_pipeline.xlsx') - -# 按照"客户分类"列分组,并计算ACV列的和 -acv_name = '预估 ACV' -grouped_df = df.groupby('负责人所属行业')[acv_name].sum().astype(int).reset_index() -grouped_df = grouped_df.sort_values(by=acv_name, ascending=False) -grouped_df[acv_name] = grouped_df[acv_name].apply(lambda x: '{:,}'.format(x)) - - -# 打印结果 -print(grouped_df) - diff --git a/analyze_acv.py b/analyze_acv.py new file mode 100644 index 0000000..3395d43 --- /dev/null +++ b/analyze_acv.py @@ -0,0 +1,86 @@ +import pandas as pd +from typing import List +def strip_character(column_name, characters: List[str]): + + new_col_name = column_name + for character in characters: + new_col_name = new_col_name.replace(character, '') + new_col_name = new_col_name.strip() + return new_col_name + +def refine_content(df): + strip_character_list = [' ', '\n', ':', ':','其他'] + for col in df.columns: + df[col] = df[col].apply(lambda x: "其他" if strip_character(x, strip_character_list) == "" else strip_character(x, strip_character_list)) + return df + +def calc_acv_mean(df, acv_name, group_by_column): + df_grouped_mean = df.groupby(group_by_column)[acv_name].mean().fillna(0).astype(int).reset_index() + df_grouped_mean[acv_name] = df_grouped_mean[acv_name].apply(lambda x: '{:,}'.format(x)) + return df_grouped_mean + +def calc_acv_sum(df, acv_name, group_by_column): + df_grouped_sum = df.groupby(group_by_column)[acv_name].sum().astype(int).reset_index() + df_grouped_sum[acv_name] = df_grouped_sum[acv_name].apply(lambda x: '{:,}'.format(x)) + return df_grouped_sum + + +# 读取赢单Excel文件 +df_win = pd.read_excel('./data_src/pingcap_won.xlsx') +acv_name = 'ACV' + +# ACV by 客户分类 +# df_win_grouped_by_industry = df_win.groupby('客户分类')[acv_name].sum().astype(int).reset_index() +# df_win_grouped_by_industry[acv_name] = df_win_grouped_by_industry[acv_name].apply(lambda x: '{:,}'.format(x)) +print("------ACV by 行业------") +df_win_grouped_by_industry = calc_acv_sum(df_win, acv_name, '客户分类') +print(refine_content(df_win_grouped_by_industry)) + +# Group by customer industry and calculate the average ACV for each group +# won_average_acv_by_industry = df_win.groupby('客户分类')[acv_name].mean().astype(int).reset_index() +# won_average_acv_by_industry[acv_name] = won_average_acv_by_industry[acv_name].apply(lambda x: '{:,}'.format(x)) +print("------平均ACV by 行业------") +df_win_grouped_by_industry_mean = calc_acv_mean(df_win, acv_name, '客户分类') +print(refine_content(df_win_grouped_by_industry_mean)) + + + +# df_win_grouped_by_sub_industry = df_win.groupby('客户行业')[acv_name].sum().astype(int).reset_index() +# df_win_grouped_by_sub_industry[acv_name] = df_win_grouped_by_sub_industry[acv_name].apply(lambda x: '{:,}'.format(x)) +print("------ACV by 子行业------") +df_win_grouped_by_sub_industry = calc_acv_sum(df_win, acv_name, '客户行业') +print(refine_content(df_win_grouped_by_sub_industry)) + +# won_average_acv_by_sub_industry = df_win.groupby('客户行业')[acv_name].mean().astype(int).reset_index() +# won_average_acv_by_sub_industry[acv_name] = won_average_acv_by_sub_industry[acv_name].apply(lambda x: '{:,}'.format(x)) +print("------平均ACV by 子行业------") +df_win_grouped_by_sub_industry_mean = calc_acv_mean(df_win, acv_name, '客户行业') +print(refine_content(df_win_grouped_by_sub_industry_mean)) + +# 读取Excel文件 +df = pd.read_excel('./data_src/pingcap_pipeline.xlsx') + +# 按照"客户分类"列分组,并计算ACV列的和 +acv_name = '预估 ACV' +# df_pipeline_grouped_by_industry = df.groupby('负责人所属行业')[acv_name].sum().astype(int).reset_index() +# df_pipeline_grouped_by_industry = df_pipeline_grouped_by_industry.sort_values(by=acv_name, ascending=False) +print("------预估ACV by 行业------") +df_pipeline_grouped_by_industry_sum = calc_acv_sum(df, acv_name, '负责人所属行业') +df_pipeline_grouped_by_industry_sum[acv_name] = df_pipeline_grouped_by_industry_sum[acv_name].apply(lambda x: '{:,}'.format(x)) +print(refine_content(df_pipeline_grouped_by_industry_sum)) + +pipeline_average_acv_by_industry = df.groupby('负责人所属行业')[acv_name].mean().astype(int).reset_index() +pipeline_average_acv_by_industry[acv_name] = pipeline_average_acv_by_industry[acv_name].apply(lambda x: '{:,}'.format(x)) +print("------平均预估ACV by 行业------") +print(refine_content(pipeline_average_acv_by_industry)) + +df_pipeline_grouped_by_sub_industry = df.groupby('客户行业')[acv_name].sum().astype(int).reset_index() +df_pipeline_grouped_by_sub_industry[acv_name] = df_pipeline_grouped_by_sub_industry[acv_name].apply(lambda x: '{:,}'.format(x)) +print("------预估ACV by 子行业------") +print(refine_content(df_pipeline_grouped_by_sub_industry)) + + +pipeline_average_acv_by_sub_industry = df.groupby('客户行业')[acv_name].mean().fillna(0).astype(int).reset_index() +pipeline_average_acv_by_sub_industry[acv_name] = pipeline_average_acv_by_sub_industry[acv_name].apply(lambda x: '{:,}'.format(x)) +print("------平均预估ACV by 子行业------") +print(refine_content(pipeline_average_acv_by_sub_industry)) \ No newline at end of file diff --git a/analyze_acv_dist.py b/analyze_acv_dist.py new file mode 100644 index 0000000..e75909f --- /dev/null +++ b/analyze_acv_dist.py @@ -0,0 +1,70 @@ +import pandas as pd +from typing import List +def strip_character(column_name, characters: List[str]): + + new_col_name = column_name + for character in characters: + new_col_name = new_col_name.replace(character, '') + new_col_name = new_col_name.strip() + return new_col_name + +def refine_content(df): + strip_character_list = [' ', '\n', ':', ':','其他'] + for col in df.columns: + df[col] = df[col].apply(lambda x: "其他" if strip_character(x, strip_character_list) == "" else strip_character(x, strip_character_list)) + return df + +# 读取赢单Excel文件 +df_win = pd.read_excel('./data_src/pingcap_won.xlsx') +acv_name = 'ACV' + +# ACV by 客户分类 +df_win_grouped_by_industry = df_win.groupby('客户分类')[acv_name].sum().astype(int).reset_index() +df_win_grouped_by_industry[acv_name] = df_win_grouped_by_industry[acv_name].apply(lambda x: '{:,}'.format(x)) +print("------ACV by 行业------") +print(refine_content(df_win_grouped_by_industry)) + +# Group by customer industry and calculate the average ACV for each group +won_average_acv_by_industry = df_win.groupby('客户分类')[acv_name].mean().astype(int).reset_index() +won_average_acv_by_industry[acv_name] = won_average_acv_by_industry[acv_name].apply(lambda x: '{:,}'.format(x)) +print("------平均ACV by 行业------") +print(refine_content(won_average_acv_by_industry)) + + + +df_win_grouped_by_sub_industry = df_win.groupby('客户行业')[acv_name].sum().astype(int).reset_index() +df_win_grouped_by_sub_industry[acv_name] = df_win_grouped_by_sub_industry[acv_name].apply(lambda x: '{:,}'.format(x)) +print("------ACV by 子行业------") +print(refine_content(df_win_grouped_by_sub_industry)) + +won_average_acv_by_sub_industry = df_win.groupby('客户行业')[acv_name].mean().astype(int).reset_index() +won_average_acv_by_sub_industry[acv_name] = won_average_acv_by_sub_industry[acv_name].apply(lambda x: '{:,}'.format(x)) +print("------平均ACV by 子行业------") +print(refine_content(won_average_acv_by_sub_industry)) + +# 读取Excel文件 +df = pd.read_excel('./data_src/pingcap_pipeline.xlsx') + +# 按照"客户分类"列分组,并计算ACV列的和 +acv_name = '预估 ACV' +df_pipeline_grouped_by_industry = df.groupby('负责人所属行业')[acv_name].sum().astype(int).reset_index() +df_pipeline_grouped_by_industry = df_pipeline_grouped_by_industry.sort_values(by=acv_name, ascending=False) +df_pipeline_grouped_by_industry[acv_name] = df_pipeline_grouped_by_industry[acv_name].apply(lambda x: '{:,}'.format(x)) +print("------预估ACV by 行业------") +print(refine_content(df_pipeline_grouped_by_industry)) + +pipeline_average_acv_by_industry = df.groupby('负责人所属行业')[acv_name].mean().astype(int).reset_index() +pipeline_average_acv_by_industry[acv_name] = pipeline_average_acv_by_industry[acv_name].apply(lambda x: '{:,}'.format(x)) +print("------平均预估ACV by 行业------") +print(refine_content(pipeline_average_acv_by_industry)) + +df_pipeline_grouped_by_sub_industry = df.groupby('客户行业')[acv_name].sum().astype(int).reset_index() +df_pipeline_grouped_by_sub_industry[acv_name] = df_pipeline_grouped_by_sub_industry[acv_name].apply(lambda x: '{:,}'.format(x)) +print("------预估ACV by 子行业------") +print(refine_content(df_pipeline_grouped_by_sub_industry)) + + +pipeline_average_acv_by_sub_industry = df.groupby('客户行业')[acv_name].mean().fillna(0).astype(int).reset_index() +pipeline_average_acv_by_sub_industry[acv_name] = pipeline_average_acv_by_sub_industry[acv_name].apply(lambda x: '{:,}'.format(x)) +print("------平均预估ACV by 子行业------") +print(refine_content(pipeline_average_acv_by_sub_industry)) \ No newline at end of file diff --git a/data_src/pingcap.xlsx b/data_src/pingcap_won.xlsx similarity index 100% rename from data_src/pingcap.xlsx rename to data_src/pingcap_won.xlsx