market_assistant/analyze_acv.py

86 lines
4.5 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import pandas as pd
from typing import List
def strip_character(column_name, characters: List[str]):
new_col_name = column_name
for character in characters:
new_col_name = new_col_name.replace(character, '')
new_col_name = new_col_name.strip()
return new_col_name
def refine_content(df):
strip_character_list = [' ', '\n', ':', '','其他']
for col in df.columns:
df[col] = df[col].apply(lambda x: "其他" if strip_character(x, strip_character_list) == "" else strip_character(x, strip_character_list))
return df
def calc_acv_mean(df, acv_name, group_by_column):
df_grouped_mean = df.groupby(group_by_column)[acv_name].mean().fillna(0).astype(int).reset_index()
df_grouped_mean[acv_name] = df_grouped_mean[acv_name].apply(lambda x: '{:,}'.format(x))
return df_grouped_mean
def calc_acv_sum(df, acv_name, group_by_column):
df_grouped_sum = df.groupby(group_by_column)[acv_name].sum().astype(int).reset_index()
df_grouped_sum[acv_name] = df_grouped_sum[acv_name].apply(lambda x: '{:,}'.format(x))
return df_grouped_sum
# 读取赢单Excel文件
df_win = pd.read_excel('./data_src/pingcap_won.xlsx')
acv_name = 'ACV'
# ACV by 客户分类
# df_win_grouped_by_industry = df_win.groupby('客户分类')[acv_name].sum().astype(int).reset_index()
# df_win_grouped_by_industry[acv_name] = df_win_grouped_by_industry[acv_name].apply(lambda x: '{:,}'.format(x))
print("------ACV by 行业------")
df_win_grouped_by_industry = calc_acv_sum(df_win, acv_name, '客户分类')
print(refine_content(df_win_grouped_by_industry))
# Group by customer industry and calculate the average ACV for each group
# won_average_acv_by_industry = df_win.groupby('客户分类')[acv_name].mean().astype(int).reset_index()
# won_average_acv_by_industry[acv_name] = won_average_acv_by_industry[acv_name].apply(lambda x: '{:,}'.format(x))
print("------平均ACV by 行业------")
df_win_grouped_by_industry_mean = calc_acv_mean(df_win, acv_name, '客户分类')
print(refine_content(df_win_grouped_by_industry_mean))
# df_win_grouped_by_sub_industry = df_win.groupby('客户行业')[acv_name].sum().astype(int).reset_index()
# df_win_grouped_by_sub_industry[acv_name] = df_win_grouped_by_sub_industry[acv_name].apply(lambda x: '{:,}'.format(x))
print("------ACV by 子行业------")
df_win_grouped_by_sub_industry = calc_acv_sum(df_win, acv_name, '客户行业')
print(refine_content(df_win_grouped_by_sub_industry))
# won_average_acv_by_sub_industry = df_win.groupby('客户行业')[acv_name].mean().astype(int).reset_index()
# won_average_acv_by_sub_industry[acv_name] = won_average_acv_by_sub_industry[acv_name].apply(lambda x: '{:,}'.format(x))
print("------平均ACV by 子行业------")
df_win_grouped_by_sub_industry_mean = calc_acv_mean(df_win, acv_name, '客户行业')
print(refine_content(df_win_grouped_by_sub_industry_mean))
# 读取Excel文件
df = pd.read_excel('./data_src/pingcap_pipeline.xlsx')
# 按照"客户分类"列分组并计算ACV列的和
acv_name = '预估 ACV'
# df_pipeline_grouped_by_industry = df.groupby('负责人所属行业')[acv_name].sum().astype(int).reset_index()
# df_pipeline_grouped_by_industry = df_pipeline_grouped_by_industry.sort_values(by=acv_name, ascending=False)
print("------预估ACV by 行业------")
df_pipeline_grouped_by_industry_sum = calc_acv_sum(df, acv_name, '负责人所属行业')
df_pipeline_grouped_by_industry_sum[acv_name] = df_pipeline_grouped_by_industry_sum[acv_name].apply(lambda x: '{:,}'.format(x))
print(refine_content(df_pipeline_grouped_by_industry_sum))
pipeline_average_acv_by_industry = df.groupby('负责人所属行业')[acv_name].mean().astype(int).reset_index()
pipeline_average_acv_by_industry[acv_name] = pipeline_average_acv_by_industry[acv_name].apply(lambda x: '{:,}'.format(x))
print("------平均预估ACV by 行业------")
print(refine_content(pipeline_average_acv_by_industry))
df_pipeline_grouped_by_sub_industry = df.groupby('客户行业')[acv_name].sum().astype(int).reset_index()
df_pipeline_grouped_by_sub_industry[acv_name] = df_pipeline_grouped_by_sub_industry[acv_name].apply(lambda x: '{:,}'.format(x))
print("------预估ACV by 子行业------")
print(refine_content(df_pipeline_grouped_by_sub_industry))
pipeline_average_acv_by_sub_industry = df.groupby('客户行业')[acv_name].mean().fillna(0).astype(int).reset_index()
pipeline_average_acv_by_sub_industry[acv_name] = pipeline_average_acv_by_sub_industry[acv_name].apply(lambda x: '{:,}'.format(x))
print("------平均预估ACV by 子行业------")
print(refine_content(pipeline_average_acv_by_sub_industry))