market_assistant/analyze_acv_dist.py

51 lines
1.8 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import pandas as pd
from typing import List
def strip_character(column_name, characters: List[str]):
new_col_name = column_name
for character in characters:
new_col_name = new_col_name.replace(character, '')
new_col_name = new_col_name.strip()
return new_col_name
def refine_content(df):
strip_character_list = [' ', '\n', ':', '','其他']
for col in df.columns:
df[col] = df[col].apply(lambda x: "其他" if strip_character(x, strip_character_list) == "" else strip_character(x, strip_character_list))
return df
def get_acv_distribution(df, acv_name, industry_col_name):
# Define the bins for ACV intervals
bins = [0, 1e6, 5e6, float('inf')]
labels = ['<100万', '100万-500万', '>500万']
# Create a new column 'ACV Interval' based on the bins
df['ACV Interval'] = pd.cut(df[acv_name], bins=bins, labels=labels, right=False)
industry_acv_distribution = df.groupby([industry_col_name, 'ACV Interval']).size().unstack(fill_value=0)
industry_acv_distribution.loc['Total'] = industry_acv_distribution.sum()
return industry_acv_distribution
# Define the bins for ACV intervals
df = pd.read_excel('./data_src/pingcap_won.xlsx')
print('---------成单:ACV Distribution by industry--------')
print(get_acv_distribution(df, 'ACV', '客户分类'))
print('---------成单:ACV Distribution by sub-industry--------')
print(get_acv_distribution(df, 'ACV', '客户行业'))
df_pipeline = pd.read_excel('./data_src/pingcap_pipeline.xlsx')
print('---------Pipeline:ACV Distribution by industry--------')
print(get_acv_distribution(df_pipeline, '预估 ACV', '负责人所属行业'))
print('---------Pipeline:ACV Distribution by sub-industry--------')
print(get_acv_distribution(df_pipeline, '预估 ACV', '客户行业'))