95 lines
4.2 KiB
Python
95 lines
4.2 KiB
Python
import pandas as pd
|
||
from typing import List
|
||
from openpyxl import Workbook
|
||
|
||
def strip_character(column_name, characters: List[str]):
|
||
|
||
new_col_name = column_name
|
||
for character in characters:
|
||
new_col_name = new_col_name.replace(character, '')
|
||
new_col_name = new_col_name.strip()
|
||
return new_col_name
|
||
|
||
def refine_content(df):
|
||
strip_character_list = [' ', '\n', ':', ':','其他']
|
||
for col in df.columns:
|
||
df[col] = df[col].apply(lambda x: "其他" if isinstance(x, str) and strip_character(x, strip_character_list) == ""
|
||
else (strip_character(x, strip_character_list) if isinstance(x, str) else x))
|
||
return df
|
||
|
||
def calc_acv_mean(df, acv_name, group_by_column):
|
||
df_grouped_mean = df.groupby(group_by_column)[acv_name].mean().fillna(0).astype(int).reset_index()
|
||
df_grouped_mean = df_grouped_mean.sort_values(by=acv_name, ascending=False)
|
||
df_grouped_mean[acv_name] = df_grouped_mean[acv_name].apply(lambda x: '{:,}'.format(x))
|
||
return df_grouped_mean
|
||
|
||
def calc_acv_sum(df, acv_name, group_by_column):
|
||
df_grouped_sum = df.groupby(group_by_column)[acv_name].sum().astype(int).reset_index()
|
||
df_grouped_sum = df_grouped_sum.sort_values(by=acv_name, ascending=False)
|
||
df_grouped_sum[acv_name] = df_grouped_sum[acv_name].apply(lambda x: '{:,}'.format(x))
|
||
return df_grouped_sum
|
||
|
||
def calc_acv_sum_and_count(df, acv_name, group_by_column):
|
||
df_grouped_sum = df.groupby(group_by_column)[acv_name].sum().astype(int).reset_index()
|
||
df_grouped_count = df.groupby(group_by_column)[acv_name].count().reset_index()
|
||
df_grouped_sum_count = pd.merge(df_grouped_sum, df_grouped_count, on=group_by_column, how='outer')
|
||
return df_grouped_sum_count
|
||
|
||
def save_to_excel(dataframes, sheet_names, output_file):
|
||
with pd.ExcelWriter(output_file, engine='openpyxl') as writer:
|
||
for df, sheet_name in zip(dataframes, sheet_names):
|
||
df.to_excel(writer, sheet_name=sheet_name, index=False)
|
||
|
||
# 读取赢单Excel文件
|
||
df_win = pd.read_excel('./data_src/pingcap_won.xlsx')
|
||
acv_name = 'ACV'
|
||
|
||
# 创建一个列表来存储所有的数据帧和对应的sheet名称
|
||
dataframes = []
|
||
sheet_names = []
|
||
|
||
# ACV by 客户分类
|
||
df_win_grouped_by_industry_sum = calc_acv_sum_and_count(df_win, acv_name, '客户分类')
|
||
dataframes.append(refine_content(df_win_grouped_by_industry_sum))
|
||
sheet_names.append("ACV by 行业")
|
||
|
||
# Group by customer industry and calculate the average ACV for each group
|
||
df_win_grouped_by_industry_mean = calc_acv_mean(df_win, acv_name, '客户分类')
|
||
dataframes.append(refine_content(df_win_grouped_by_industry_mean))
|
||
sheet_names.append("平均ACV by 行业")
|
||
|
||
df_win_grouped_by_sub_industry_sum = calc_acv_sum_and_count(df_win, acv_name, '客户行业')
|
||
dataframes.append(refine_content(df_win_grouped_by_sub_industry_sum))
|
||
sheet_names.append("ACV by 子行业")
|
||
|
||
df_win_grouped_by_sub_industry_mean = calc_acv_mean(df_win, acv_name, '客户行业')
|
||
dataframes.append(refine_content(df_win_grouped_by_sub_industry_mean))
|
||
sheet_names.append("平均ACV by 子行业")
|
||
|
||
# <20><><EFBFBD>取Excel文件
|
||
df = pd.read_excel('./data_src/pingcap_pipeline.xlsx')
|
||
|
||
# 按照"客户分类"列分组,并计算ACV列的和
|
||
acv_name = '预估 ACV'
|
||
|
||
df_pipeline_grouped_by_industry_sum = calc_acv_sum_and_count(df, acv_name, '负责人所属行业')
|
||
dataframes.append(refine_content(df_pipeline_grouped_by_industry_sum))
|
||
sheet_names.append("预估ACV by 行业")
|
||
|
||
df_pipeline_grouped_by_industry_mean = calc_acv_mean(df, acv_name, '负责人所属行业')
|
||
dataframes.append(refine_content(df_pipeline_grouped_by_industry_mean))
|
||
sheet_names.append("平均预估ACV by 行业")
|
||
|
||
df_pipeline_grouped_by_sub_industry_sum = calc_acv_sum_and_count(df, acv_name, '客户行业')
|
||
dataframes.append(refine_content(df_pipeline_grouped_by_sub_industry_sum))
|
||
sheet_names.append("预估ACV by 子行业")
|
||
|
||
df_pipeline_grouped_by_sub_industry_mean = calc_acv_mean(df, acv_name, '客户行业')
|
||
dataframes.append(refine_content(df_pipeline_grouped_by_sub_industry_mean))
|
||
sheet_names.append("平均预估ACV by 子行业")
|
||
|
||
# 保存所有数据帧到一个Excel文件中
|
||
output_file = './output/acv_analysis_v2(with count).xlsx'
|
||
save_to_excel(dataframes, sheet_names, output_file)
|
||
|
||
print(f"Analysis results have been saved to {output_file}") |