重构ACV分析部分代码,将中间结果输出到excel中
This commit is contained in:
parent
085f0dc880
commit
2aa7e80cfd
|
|
@ -1,5 +1,7 @@
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
from typing import List
|
from typing import List
|
||||||
|
from openpyxl import Workbook
|
||||||
|
|
||||||
def strip_character(column_name, characters: List[str]):
|
def strip_character(column_name, characters: List[str]):
|
||||||
|
|
||||||
new_col_name = column_name
|
new_col_name = column_name
|
||||||
|
|
@ -26,28 +28,36 @@ def calc_acv_sum(df, acv_name, group_by_column):
|
||||||
df_grouped_sum[acv_name] = df_grouped_sum[acv_name].apply(lambda x: '{:,}'.format(x))
|
df_grouped_sum[acv_name] = df_grouped_sum[acv_name].apply(lambda x: '{:,}'.format(x))
|
||||||
return df_grouped_sum
|
return df_grouped_sum
|
||||||
|
|
||||||
|
def save_to_excel(dataframes, sheet_names, output_file):
|
||||||
|
with pd.ExcelWriter(output_file, engine='openpyxl') as writer:
|
||||||
|
for df, sheet_name in zip(dataframes, sheet_names):
|
||||||
|
df.to_excel(writer, sheet_name=sheet_name, index=False)
|
||||||
|
|
||||||
# 读取赢单Excel文件
|
# 读取赢单Excel文件
|
||||||
df_win = pd.read_excel('./data_src/pingcap_won.xlsx')
|
df_win = pd.read_excel('./data_src/pingcap_won.xlsx')
|
||||||
acv_name = 'ACV'
|
acv_name = 'ACV'
|
||||||
|
|
||||||
|
# 创建一个列表来存储所有的数据帧和对应的sheet名称
|
||||||
|
dataframes = []
|
||||||
|
sheet_names = []
|
||||||
|
|
||||||
# ACV by 客户分类
|
# ACV by 客户分类
|
||||||
print("------ACV by 行业------")
|
|
||||||
df_win_grouped_by_industry_sum = calc_acv_sum(df_win, acv_name, '客户分类')
|
df_win_grouped_by_industry_sum = calc_acv_sum(df_win, acv_name, '客户分类')
|
||||||
print(refine_content(df_win_grouped_by_industry_sum))
|
dataframes.append(refine_content(df_win_grouped_by_industry_sum))
|
||||||
|
sheet_names.append("ACV by 行业")
|
||||||
|
|
||||||
# Group by customer industry and calculate the average ACV for each group
|
# Group by customer industry and calculate the average ACV for each group
|
||||||
print("------平均ACV by 行业------")
|
|
||||||
df_win_grouped_by_industry_mean = calc_acv_mean(df_win, acv_name, '客户分类')
|
df_win_grouped_by_industry_mean = calc_acv_mean(df_win, acv_name, '客户分类')
|
||||||
print(refine_content(df_win_grouped_by_industry_mean))
|
dataframes.append(refine_content(df_win_grouped_by_industry_mean))
|
||||||
|
sheet_names.append("平均ACV by 行业")
|
||||||
|
|
||||||
print("------ACV by 子行业------")
|
|
||||||
df_win_grouped_by_sub_industry_sum = calc_acv_sum(df_win, acv_name, '客户行业')
|
df_win_grouped_by_sub_industry_sum = calc_acv_sum(df_win, acv_name, '客户行业')
|
||||||
print(refine_content(df_win_grouped_by_sub_industry_sum))
|
dataframes.append(refine_content(df_win_grouped_by_sub_industry_sum))
|
||||||
|
sheet_names.append("ACV by 子行业")
|
||||||
|
|
||||||
print("------平均ACV by 子行业------")
|
|
||||||
df_win_grouped_by_sub_industry_mean = calc_acv_mean(df_win, acv_name, '客户行业')
|
df_win_grouped_by_sub_industry_mean = calc_acv_mean(df_win, acv_name, '客户行业')
|
||||||
print(refine_content(df_win_grouped_by_sub_industry_mean))
|
dataframes.append(refine_content(df_win_grouped_by_sub_industry_mean))
|
||||||
|
sheet_names.append("平均ACV by 子行业")
|
||||||
|
|
||||||
# 读取Excel文件
|
# 读取Excel文件
|
||||||
df = pd.read_excel('./data_src/pingcap_pipeline.xlsx')
|
df = pd.read_excel('./data_src/pingcap_pipeline.xlsx')
|
||||||
|
|
@ -55,22 +65,24 @@ df = pd.read_excel('./data_src/pingcap_pipeline.xlsx')
|
||||||
# 按照"客户分类"列分组,并计算ACV列的和
|
# 按照"客户分类"列分组,并计算ACV列的和
|
||||||
acv_name = '预估 ACV'
|
acv_name = '预估 ACV'
|
||||||
|
|
||||||
print("------预估ACV by 行业------")
|
|
||||||
df_pipeline_grouped_by_industry_sum = calc_acv_sum(df, acv_name, '负责人所属行业')
|
df_pipeline_grouped_by_industry_sum = calc_acv_sum(df, acv_name, '负责人所属行业')
|
||||||
print(refine_content(df_pipeline_grouped_by_industry_sum))
|
dataframes.append(refine_content(df_pipeline_grouped_by_industry_sum))
|
||||||
|
sheet_names.append("预估ACV by 行业")
|
||||||
|
|
||||||
|
|
||||||
print("------平均预估ACV by 行业------")
|
|
||||||
df_pipeline_grouped_by_industry_mean = calc_acv_mean(df, acv_name, '负责人所属行业')
|
df_pipeline_grouped_by_industry_mean = calc_acv_mean(df, acv_name, '负责人所属行业')
|
||||||
print(refine_content(df_pipeline_grouped_by_industry_mean))
|
dataframes.append(refine_content(df_pipeline_grouped_by_industry_mean))
|
||||||
|
sheet_names.append("平均预估ACV by 行业")
|
||||||
|
|
||||||
|
|
||||||
print("------预估ACV by 子行业------")
|
|
||||||
df_pipeline_grouped_by_sub_industry_sum = calc_acv_sum(df, acv_name, '客户行业')
|
df_pipeline_grouped_by_sub_industry_sum = calc_acv_sum(df, acv_name, '客户行业')
|
||||||
print(refine_content(df_pipeline_grouped_by_sub_industry_sum))
|
dataframes.append(refine_content(df_pipeline_grouped_by_sub_industry_sum))
|
||||||
|
sheet_names.append("预估ACV by 子行业")
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
print("------平均预估ACV by 子行业------")
|
|
||||||
df_pipeline_grouped_by_sub_industry_mean = calc_acv_mean(df, acv_name, '客户行业')
|
df_pipeline_grouped_by_sub_industry_mean = calc_acv_mean(df, acv_name, '客户行业')
|
||||||
print(refine_content(df_pipeline_grouped_by_sub_industry_mean))
|
dataframes.append(refine_content(df_pipeline_grouped_by_sub_industry_mean))
|
||||||
|
sheet_names.append("平均预估ACV by 子行业")
|
||||||
|
|
||||||
|
# 保存所有数据帧到一个Excel文件中
|
||||||
|
output_file = './output/acv_analysis.xlsx'
|
||||||
|
save_to_excel(dataframes, sheet_names, output_file)
|
||||||
|
|
||||||
|
print(f"Analysis results have been saved to {output_file}")
|
||||||
|
|
@ -1,5 +1,6 @@
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
from typing import List
|
from typing import List
|
||||||
|
from openpyxl import Workbook
|
||||||
|
|
||||||
def strip_character(column_name, characters: List[str]):
|
def strip_character(column_name, characters: List[str]):
|
||||||
|
|
||||||
|
|
@ -25,27 +26,25 @@ def get_acv_distribution(df, acv_name, industry_col_name):
|
||||||
industry_acv_distribution.loc['Total'] = industry_acv_distribution.sum()
|
industry_acv_distribution.loc['Total'] = industry_acv_distribution.sum()
|
||||||
return industry_acv_distribution
|
return industry_acv_distribution
|
||||||
|
|
||||||
# Define the bins for ACV intervals
|
# 新增函数:将结果保存到Excel
|
||||||
|
def save_to_excel(dfs, sheet_names, output_file):
|
||||||
|
with pd.ExcelWriter(output_file, engine='openpyxl') as writer:
|
||||||
|
for df, sheet_name in zip(dfs, sheet_names):
|
||||||
|
df.to_excel(writer, sheet_name=sheet_name)
|
||||||
|
|
||||||
|
# 读取数据
|
||||||
df = pd.read_excel('./data_src/pingcap_won.xlsx')
|
df = pd.read_excel('./data_src/pingcap_won.xlsx')
|
||||||
|
|
||||||
|
|
||||||
print('---------成单:ACV Distribution by industry--------')
|
|
||||||
|
|
||||||
print(get_acv_distribution(df, 'ACV', '客户分类'))
|
|
||||||
|
|
||||||
|
|
||||||
print('---------成单:ACV Distribution by sub-industry--------')
|
|
||||||
|
|
||||||
print(get_acv_distribution(df, 'ACV', '客户行业'))
|
|
||||||
|
|
||||||
|
|
||||||
df_pipeline = pd.read_excel('./data_src/pingcap_pipeline.xlsx')
|
df_pipeline = pd.read_excel('./data_src/pingcap_pipeline.xlsx')
|
||||||
|
|
||||||
print('---------Pipeline:ACV Distribution by industry--------')
|
# 获取各种分布
|
||||||
|
won_industry_dist = get_acv_distribution(df, 'ACV', '客户分类')
|
||||||
|
won_sub_industry_dist = get_acv_distribution(df, 'ACV', '客户行业')
|
||||||
|
pipeline_industry_dist = get_acv_distribution(df_pipeline, '预估 ACV', '负责人所属行业')
|
||||||
|
pipeline_sub_industry_dist = get_acv_distribution(df_pipeline, '预估 ACV', '客户行业')
|
||||||
|
|
||||||
print(get_acv_distribution(df_pipeline, '预估 ACV', '负责人所属行业'))
|
# 保存结果到Excel
|
||||||
|
dfs = [won_industry_dist, won_sub_industry_dist, pipeline_industry_dist, pipeline_sub_industry_dist]
|
||||||
|
sheet_names = ['成单-行业分布', '成单-子行业分布', 'Pipeline-行业分布', 'Pipeline-子行业分布']
|
||||||
|
save_to_excel(dfs, sheet_names, './output/acv_distribution.xlsx')
|
||||||
|
|
||||||
|
print("ACV distribution analysis completed. Results saved in './output/acv_distribution.xlsx'")
|
||||||
print('---------Pipeline:ACV Distribution by sub-industry--------')
|
|
||||||
|
|
||||||
print(get_acv_distribution(df_pipeline, '预估 ACV', '客户行业'))
|
|
||||||
Binary file not shown.
Binary file not shown.
Loading…
Reference in New Issue