From 085f0dc880b0e50aad569b9f131336ef0f3a656e Mon Sep 17 00:00:00 2001 From: Tiger Ren Date: Fri, 23 Aug 2024 19:50:35 +0800 Subject: [PATCH] =?UTF-8?q?=E8=AE=A1=E7=AE=97ACV=E5=88=86=E5=B8=83?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- analyze_acv_dist.py | 77 +++++++++++++++++---------------------------- 1 file changed, 29 insertions(+), 48 deletions(-) diff --git a/analyze_acv_dist.py b/analyze_acv_dist.py index e75909f..b116625 100644 --- a/analyze_acv_dist.py +++ b/analyze_acv_dist.py @@ -1,5 +1,6 @@ import pandas as pd from typing import List + def strip_character(column_name, characters: List[str]): new_col_name = column_name @@ -14,57 +15,37 @@ def refine_content(df): df[col] = df[col].apply(lambda x: "其他" if strip_character(x, strip_character_list) == "" else strip_character(x, strip_character_list)) return df -# 读取赢单Excel文件 -df_win = pd.read_excel('./data_src/pingcap_won.xlsx') -acv_name = 'ACV' +def get_acv_distribution(df, acv_name, industry_col_name): + # Define the bins for ACV intervals + bins = [0, 1e6, 5e6, float('inf')] + labels = ['<100万', '100万-500万', '>500万'] + # Create a new column 'ACV Interval' based on the bins + df['ACV Interval'] = pd.cut(df[acv_name], bins=bins, labels=labels, right=False) + industry_acv_distribution = df.groupby([industry_col_name, 'ACV Interval']).size().unstack(fill_value=0) + industry_acv_distribution.loc['Total'] = industry_acv_distribution.sum() + return industry_acv_distribution -# ACV by 客户分类 -df_win_grouped_by_industry = df_win.groupby('客户分类')[acv_name].sum().astype(int).reset_index() -df_win_grouped_by_industry[acv_name] = df_win_grouped_by_industry[acv_name].apply(lambda x: '{:,}'.format(x)) -print("------ACV by 行业------") -print(refine_content(df_win_grouped_by_industry)) - -# Group by customer industry and calculate the average ACV for each group -won_average_acv_by_industry = df_win.groupby('客户分类')[acv_name].mean().astype(int).reset_index() -won_average_acv_by_industry[acv_name] = won_average_acv_by_industry[acv_name].apply(lambda x: '{:,}'.format(x)) -print("------平均ACV by 行业------") -print(refine_content(won_average_acv_by_industry)) +# Define the bins for ACV intervals +df = pd.read_excel('./data_src/pingcap_won.xlsx') +print('---------成单:ACV Distribution by industry--------') -df_win_grouped_by_sub_industry = df_win.groupby('客户行业')[acv_name].sum().astype(int).reset_index() -df_win_grouped_by_sub_industry[acv_name] = df_win_grouped_by_sub_industry[acv_name].apply(lambda x: '{:,}'.format(x)) -print("------ACV by 子行业------") -print(refine_content(df_win_grouped_by_sub_industry)) - -won_average_acv_by_sub_industry = df_win.groupby('客户行业')[acv_name].mean().astype(int).reset_index() -won_average_acv_by_sub_industry[acv_name] = won_average_acv_by_sub_industry[acv_name].apply(lambda x: '{:,}'.format(x)) -print("------平均ACV by 子行业------") -print(refine_content(won_average_acv_by_sub_industry)) - -# 读取Excel文件 -df = pd.read_excel('./data_src/pingcap_pipeline.xlsx') - -# 按照"客户分类"列分组,并计算ACV列的和 -acv_name = '预估 ACV' -df_pipeline_grouped_by_industry = df.groupby('负责人所属行业')[acv_name].sum().astype(int).reset_index() -df_pipeline_grouped_by_industry = df_pipeline_grouped_by_industry.sort_values(by=acv_name, ascending=False) -df_pipeline_grouped_by_industry[acv_name] = df_pipeline_grouped_by_industry[acv_name].apply(lambda x: '{:,}'.format(x)) -print("------预估ACV by 行业------") -print(refine_content(df_pipeline_grouped_by_industry)) - -pipeline_average_acv_by_industry = df.groupby('负责人所属行业')[acv_name].mean().astype(int).reset_index() -pipeline_average_acv_by_industry[acv_name] = pipeline_average_acv_by_industry[acv_name].apply(lambda x: '{:,}'.format(x)) -print("------平均预估ACV by 行业------") -print(refine_content(pipeline_average_acv_by_industry)) - -df_pipeline_grouped_by_sub_industry = df.groupby('客户行业')[acv_name].sum().astype(int).reset_index() -df_pipeline_grouped_by_sub_industry[acv_name] = df_pipeline_grouped_by_sub_industry[acv_name].apply(lambda x: '{:,}'.format(x)) -print("------预估ACV by 子行业------") -print(refine_content(df_pipeline_grouped_by_sub_industry)) +print(get_acv_distribution(df, 'ACV', '客户分类')) -pipeline_average_acv_by_sub_industry = df.groupby('客户行业')[acv_name].mean().fillna(0).astype(int).reset_index() -pipeline_average_acv_by_sub_industry[acv_name] = pipeline_average_acv_by_sub_industry[acv_name].apply(lambda x: '{:,}'.format(x)) -print("------平均预估ACV by 子行业------") -print(refine_content(pipeline_average_acv_by_sub_industry)) \ No newline at end of file +print('---------成单:ACV Distribution by sub-industry--------') + +print(get_acv_distribution(df, 'ACV', '客户行业')) + + +df_pipeline = pd.read_excel('./data_src/pingcap_pipeline.xlsx') + +print('---------Pipeline:ACV Distribution by industry--------') + +print(get_acv_distribution(df_pipeline, '预估 ACV', '负责人所属行业')) + + +print('---------Pipeline:ACV Distribution by sub-industry--------') + +print(get_acv_distribution(df_pipeline, '预估 ACV', '客户行业')) \ No newline at end of file