market_assistant/analyze_acv.py

95 lines
4.2 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import pandas as pd
from typing import List
from openpyxl import Workbook
def strip_character(column_name, characters: List[str]):
new_col_name = column_name
for character in characters:
new_col_name = new_col_name.replace(character, '')
new_col_name = new_col_name.strip()
return new_col_name
def refine_content(df):
strip_character_list = [' ', '\n', ':', '','其他']
for col in df.columns:
df[col] = df[col].apply(lambda x: "其他" if isinstance(x, str) and strip_character(x, strip_character_list) == ""
else (strip_character(x, strip_character_list) if isinstance(x, str) else x))
return df
def calc_acv_mean(df, acv_name, group_by_column):
df_grouped_mean = df.groupby(group_by_column)[acv_name].mean().fillna(0).astype(int).reset_index()
df_grouped_mean = df_grouped_mean.sort_values(by=acv_name, ascending=False)
df_grouped_mean[acv_name] = df_grouped_mean[acv_name].apply(lambda x: '{:,}'.format(x))
return df_grouped_mean
def calc_acv_sum(df, acv_name, group_by_column):
df_grouped_sum = df.groupby(group_by_column)[acv_name].sum().astype(int).reset_index()
df_grouped_sum = df_grouped_sum.sort_values(by=acv_name, ascending=False)
df_grouped_sum[acv_name] = df_grouped_sum[acv_name].apply(lambda x: '{:,}'.format(x))
return df_grouped_sum
def calc_acv_sum_and_count(df, acv_name, group_by_column):
df_grouped_sum = df.groupby(group_by_column)[acv_name].sum().astype(int).reset_index()
df_grouped_count = df.groupby(group_by_column)[acv_name].count().reset_index()
df_grouped_sum_count = pd.merge(df_grouped_sum, df_grouped_count, on=group_by_column, how='outer')
return df_grouped_sum_count
def save_to_excel(dataframes, sheet_names, output_file):
with pd.ExcelWriter(output_file, engine='openpyxl') as writer:
for df, sheet_name in zip(dataframes, sheet_names):
df.to_excel(writer, sheet_name=sheet_name, index=False)
# 读取赢单Excel文件
df_win = pd.read_excel('./data_src/pingcap_won.xlsx')
acv_name = 'ACV'
# 创建一个列表来存储所有的数据帧和对应的sheet名称
dataframes = []
sheet_names = []
# ACV by 客户分类
df_win_grouped_by_industry_sum = calc_acv_sum_and_count(df_win, acv_name, '客户分类')
dataframes.append(refine_content(df_win_grouped_by_industry_sum))
sheet_names.append("ACV by 行业")
# Group by customer industry and calculate the average ACV for each group
df_win_grouped_by_industry_mean = calc_acv_mean(df_win, acv_name, '客户分类')
dataframes.append(refine_content(df_win_grouped_by_industry_mean))
sheet_names.append("平均ACV by 行业")
df_win_grouped_by_sub_industry_sum = calc_acv_sum_and_count(df_win, acv_name, '客户行业')
dataframes.append(refine_content(df_win_grouped_by_sub_industry_sum))
sheet_names.append("ACV by 子行业")
df_win_grouped_by_sub_industry_mean = calc_acv_mean(df_win, acv_name, '客户行业')
dataframes.append(refine_content(df_win_grouped_by_sub_industry_mean))
sheet_names.append("平均ACV by 子行业")
# <20><><EFBFBD>取Excel文件
df = pd.read_excel('./data_src/pingcap_pipeline.xlsx')
# 按照"客户分类"列分组并计算ACV列的和
acv_name = '预估 ACV'
df_pipeline_grouped_by_industry_sum = calc_acv_sum_and_count(df, acv_name, '负责人所属行业')
dataframes.append(refine_content(df_pipeline_grouped_by_industry_sum))
sheet_names.append("预估ACV by 行业")
df_pipeline_grouped_by_industry_mean = calc_acv_mean(df, acv_name, '负责人所属行业')
dataframes.append(refine_content(df_pipeline_grouped_by_industry_mean))
sheet_names.append("平均预估ACV by 行业")
df_pipeline_grouped_by_sub_industry_sum = calc_acv_sum_and_count(df, acv_name, '客户行业')
dataframes.append(refine_content(df_pipeline_grouped_by_sub_industry_sum))
sheet_names.append("预估ACV by 子行业")
df_pipeline_grouped_by_sub_industry_mean = calc_acv_mean(df, acv_name, '客户行业')
dataframes.append(refine_content(df_pipeline_grouped_by_sub_industry_mean))
sheet_names.append("平均预估ACV by 子行业")
# 保存所有数据帧到一个Excel文件中
output_file = './output/acv_analysis_v2(with count).xlsx'
save_to_excel(dataframes, sheet_names, output_file)
print(f"Analysis results have been saved to {output_file}")