70 lines
3.6 KiB
Python
70 lines
3.6 KiB
Python
import pandas as pd
|
||
from typing import List
|
||
def strip_character(column_name, characters: List[str]):
|
||
|
||
new_col_name = column_name
|
||
for character in characters:
|
||
new_col_name = new_col_name.replace(character, '')
|
||
new_col_name = new_col_name.strip()
|
||
return new_col_name
|
||
|
||
def refine_content(df):
|
||
strip_character_list = [' ', '\n', ':', ':','其他']
|
||
for col in df.columns:
|
||
df[col] = df[col].apply(lambda x: "其他" if strip_character(x, strip_character_list) == "" else strip_character(x, strip_character_list))
|
||
return df
|
||
|
||
# 读取赢单Excel文件
|
||
df_win = pd.read_excel('./data_src/pingcap_won.xlsx')
|
||
acv_name = 'ACV'
|
||
|
||
# ACV by 客户分类
|
||
df_win_grouped_by_industry = df_win.groupby('客户分类')[acv_name].sum().astype(int).reset_index()
|
||
df_win_grouped_by_industry[acv_name] = df_win_grouped_by_industry[acv_name].apply(lambda x: '{:,}'.format(x))
|
||
print("------ACV by 行业------")
|
||
print(refine_content(df_win_grouped_by_industry))
|
||
|
||
# Group by customer industry and calculate the average ACV for each group
|
||
won_average_acv_by_industry = df_win.groupby('客户分类')[acv_name].mean().astype(int).reset_index()
|
||
won_average_acv_by_industry[acv_name] = won_average_acv_by_industry[acv_name].apply(lambda x: '{:,}'.format(x))
|
||
print("------平均ACV by 行业------")
|
||
print(refine_content(won_average_acv_by_industry))
|
||
|
||
|
||
|
||
df_win_grouped_by_sub_industry = df_win.groupby('客户行业')[acv_name].sum().astype(int).reset_index()
|
||
df_win_grouped_by_sub_industry[acv_name] = df_win_grouped_by_sub_industry[acv_name].apply(lambda x: '{:,}'.format(x))
|
||
print("------ACV by 子行业------")
|
||
print(refine_content(df_win_grouped_by_sub_industry))
|
||
|
||
won_average_acv_by_sub_industry = df_win.groupby('客户行业')[acv_name].mean().astype(int).reset_index()
|
||
won_average_acv_by_sub_industry[acv_name] = won_average_acv_by_sub_industry[acv_name].apply(lambda x: '{:,}'.format(x))
|
||
print("------平均ACV by 子行业------")
|
||
print(refine_content(won_average_acv_by_sub_industry))
|
||
|
||
# 读取Excel文件
|
||
df = pd.read_excel('./data_src/pingcap_pipeline.xlsx')
|
||
|
||
# 按照"客户分类"列分组,并计算ACV列的和
|
||
acv_name = '预估 ACV'
|
||
df_pipeline_grouped_by_industry = df.groupby('负责人所属行业')[acv_name].sum().astype(int).reset_index()
|
||
df_pipeline_grouped_by_industry = df_pipeline_grouped_by_industry.sort_values(by=acv_name, ascending=False)
|
||
df_pipeline_grouped_by_industry[acv_name] = df_pipeline_grouped_by_industry[acv_name].apply(lambda x: '{:,}'.format(x))
|
||
print("------预估ACV by 行业------")
|
||
print(refine_content(df_pipeline_grouped_by_industry))
|
||
|
||
pipeline_average_acv_by_industry = df.groupby('负责人所属行业')[acv_name].mean().astype(int).reset_index()
|
||
pipeline_average_acv_by_industry[acv_name] = pipeline_average_acv_by_industry[acv_name].apply(lambda x: '{:,}'.format(x))
|
||
print("------平均预估ACV by 行业------")
|
||
print(refine_content(pipeline_average_acv_by_industry))
|
||
|
||
df_pipeline_grouped_by_sub_industry = df.groupby('客户行业')[acv_name].sum().astype(int).reset_index()
|
||
df_pipeline_grouped_by_sub_industry[acv_name] = df_pipeline_grouped_by_sub_industry[acv_name].apply(lambda x: '{:,}'.format(x))
|
||
print("------预估ACV by 子行业------")
|
||
print(refine_content(df_pipeline_grouped_by_sub_industry))
|
||
|
||
|
||
pipeline_average_acv_by_sub_industry = df.groupby('客户行业')[acv_name].mean().fillna(0).astype(int).reset_index()
|
||
pipeline_average_acv_by_sub_industry[acv_name] = pipeline_average_acv_by_sub_industry[acv_name].apply(lambda x: '{:,}'.format(x))
|
||
print("------平均预估ACV by 子行业------")
|
||
print(refine_content(pipeline_average_acv_by_sub_industry)) |