暂存analyze_acv相关代码
This commit is contained in:
parent
e05fdc538e
commit
2049baa5f8
|
|
@ -59,22 +59,3 @@ class ExcelHelper:
|
||||||
print(f"Extracted columns saved to {new_filename}")
|
print(f"Extracted columns saved to {new_filename}")
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
# # 示例数据
|
|
||||||
# data = [
|
|
||||||
# [
|
|
||||||
# {'title': 'Title 1', 'content': 'Content 1'},
|
|
||||||
# {'title': 'Title 2', 'content': 'Content 2'},
|
|
||||||
# {'title': 'Title 3', 'content': 'Content 3'}
|
|
||||||
# ],
|
|
||||||
# [
|
|
||||||
# {'title': 'Title 4', 'content': 'Content 4'},
|
|
||||||
# {'title': 'Title 5', 'content': 'Content 5'},
|
|
||||||
# {'title': 'Title 6', 'content': 'Content 6'}
|
|
||||||
# ]
|
|
||||||
# ]
|
|
||||||
|
|
||||||
# # 创建 ExcelHelper 实例并生成 Excel 文件
|
|
||||||
# excel_helper = ExcelHelper(data)
|
|
||||||
# excel_helper.create_excel('output.xlsx')
|
|
||||||
|
|
|
||||||
15
analysis.py
15
analysis.py
|
|
@ -1,15 +0,0 @@
|
||||||
import pandas as pd
|
|
||||||
|
|
||||||
# 读取Excel文件
|
|
||||||
df = pd.read_excel('pingcap_pipeline.xlsx')
|
|
||||||
|
|
||||||
# 按照"客户分类"列分组,并计算ACV列的和
|
|
||||||
acv_name = '预估 ACV'
|
|
||||||
grouped_df = df.groupby('负责人所属行业')[acv_name].sum().astype(int).reset_index()
|
|
||||||
grouped_df = grouped_df.sort_values(by=acv_name, ascending=False)
|
|
||||||
grouped_df[acv_name] = grouped_df[acv_name].apply(lambda x: '{:,}'.format(x))
|
|
||||||
|
|
||||||
|
|
||||||
# 打印结果
|
|
||||||
print(grouped_df)
|
|
||||||
|
|
||||||
|
|
@ -0,0 +1,86 @@
|
||||||
|
import pandas as pd
|
||||||
|
from typing import List
|
||||||
|
def strip_character(column_name, characters: List[str]):
|
||||||
|
|
||||||
|
new_col_name = column_name
|
||||||
|
for character in characters:
|
||||||
|
new_col_name = new_col_name.replace(character, '')
|
||||||
|
new_col_name = new_col_name.strip()
|
||||||
|
return new_col_name
|
||||||
|
|
||||||
|
def refine_content(df):
|
||||||
|
strip_character_list = [' ', '\n', ':', ':','其他']
|
||||||
|
for col in df.columns:
|
||||||
|
df[col] = df[col].apply(lambda x: "其他" if strip_character(x, strip_character_list) == "" else strip_character(x, strip_character_list))
|
||||||
|
return df
|
||||||
|
|
||||||
|
def calc_acv_mean(df, acv_name, group_by_column):
|
||||||
|
df_grouped_mean = df.groupby(group_by_column)[acv_name].mean().fillna(0).astype(int).reset_index()
|
||||||
|
df_grouped_mean[acv_name] = df_grouped_mean[acv_name].apply(lambda x: '{:,}'.format(x))
|
||||||
|
return df_grouped_mean
|
||||||
|
|
||||||
|
def calc_acv_sum(df, acv_name, group_by_column):
|
||||||
|
df_grouped_sum = df.groupby(group_by_column)[acv_name].sum().astype(int).reset_index()
|
||||||
|
df_grouped_sum[acv_name] = df_grouped_sum[acv_name].apply(lambda x: '{:,}'.format(x))
|
||||||
|
return df_grouped_sum
|
||||||
|
|
||||||
|
|
||||||
|
# 读取赢单Excel文件
|
||||||
|
df_win = pd.read_excel('./data_src/pingcap_won.xlsx')
|
||||||
|
acv_name = 'ACV'
|
||||||
|
|
||||||
|
# ACV by 客户分类
|
||||||
|
# df_win_grouped_by_industry = df_win.groupby('客户分类')[acv_name].sum().astype(int).reset_index()
|
||||||
|
# df_win_grouped_by_industry[acv_name] = df_win_grouped_by_industry[acv_name].apply(lambda x: '{:,}'.format(x))
|
||||||
|
print("------ACV by 行业------")
|
||||||
|
df_win_grouped_by_industry = calc_acv_sum(df_win, acv_name, '客户分类')
|
||||||
|
print(refine_content(df_win_grouped_by_industry))
|
||||||
|
|
||||||
|
# Group by customer industry and calculate the average ACV for each group
|
||||||
|
# won_average_acv_by_industry = df_win.groupby('客户分类')[acv_name].mean().astype(int).reset_index()
|
||||||
|
# won_average_acv_by_industry[acv_name] = won_average_acv_by_industry[acv_name].apply(lambda x: '{:,}'.format(x))
|
||||||
|
print("------平均ACV by 行业------")
|
||||||
|
df_win_grouped_by_industry_mean = calc_acv_mean(df_win, acv_name, '客户分类')
|
||||||
|
print(refine_content(df_win_grouped_by_industry_mean))
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
# df_win_grouped_by_sub_industry = df_win.groupby('客户行业')[acv_name].sum().astype(int).reset_index()
|
||||||
|
# df_win_grouped_by_sub_industry[acv_name] = df_win_grouped_by_sub_industry[acv_name].apply(lambda x: '{:,}'.format(x))
|
||||||
|
print("------ACV by 子行业------")
|
||||||
|
df_win_grouped_by_sub_industry = calc_acv_sum(df_win, acv_name, '客户行业')
|
||||||
|
print(refine_content(df_win_grouped_by_sub_industry))
|
||||||
|
|
||||||
|
# won_average_acv_by_sub_industry = df_win.groupby('客户行业')[acv_name].mean().astype(int).reset_index()
|
||||||
|
# won_average_acv_by_sub_industry[acv_name] = won_average_acv_by_sub_industry[acv_name].apply(lambda x: '{:,}'.format(x))
|
||||||
|
print("------平均ACV by 子行业------")
|
||||||
|
df_win_grouped_by_sub_industry_mean = calc_acv_mean(df_win, acv_name, '客户行业')
|
||||||
|
print(refine_content(df_win_grouped_by_sub_industry_mean))
|
||||||
|
|
||||||
|
# 读取Excel文件
|
||||||
|
df = pd.read_excel('./data_src/pingcap_pipeline.xlsx')
|
||||||
|
|
||||||
|
# 按照"客户分类"列分组,并计算ACV列的和
|
||||||
|
acv_name = '预估 ACV'
|
||||||
|
# df_pipeline_grouped_by_industry = df.groupby('负责人所属行业')[acv_name].sum().astype(int).reset_index()
|
||||||
|
# df_pipeline_grouped_by_industry = df_pipeline_grouped_by_industry.sort_values(by=acv_name, ascending=False)
|
||||||
|
print("------预估ACV by 行业------")
|
||||||
|
df_pipeline_grouped_by_industry_sum = calc_acv_sum(df, acv_name, '负责人所属行业')
|
||||||
|
df_pipeline_grouped_by_industry_sum[acv_name] = df_pipeline_grouped_by_industry_sum[acv_name].apply(lambda x: '{:,}'.format(x))
|
||||||
|
print(refine_content(df_pipeline_grouped_by_industry_sum))
|
||||||
|
|
||||||
|
pipeline_average_acv_by_industry = df.groupby('负责人所属行业')[acv_name].mean().astype(int).reset_index()
|
||||||
|
pipeline_average_acv_by_industry[acv_name] = pipeline_average_acv_by_industry[acv_name].apply(lambda x: '{:,}'.format(x))
|
||||||
|
print("------平均预估ACV by 行业------")
|
||||||
|
print(refine_content(pipeline_average_acv_by_industry))
|
||||||
|
|
||||||
|
df_pipeline_grouped_by_sub_industry = df.groupby('客户行业')[acv_name].sum().astype(int).reset_index()
|
||||||
|
df_pipeline_grouped_by_sub_industry[acv_name] = df_pipeline_grouped_by_sub_industry[acv_name].apply(lambda x: '{:,}'.format(x))
|
||||||
|
print("------预估ACV by 子行业------")
|
||||||
|
print(refine_content(df_pipeline_grouped_by_sub_industry))
|
||||||
|
|
||||||
|
|
||||||
|
pipeline_average_acv_by_sub_industry = df.groupby('客户行业')[acv_name].mean().fillna(0).astype(int).reset_index()
|
||||||
|
pipeline_average_acv_by_sub_industry[acv_name] = pipeline_average_acv_by_sub_industry[acv_name].apply(lambda x: '{:,}'.format(x))
|
||||||
|
print("------平均预估ACV by 子行业------")
|
||||||
|
print(refine_content(pipeline_average_acv_by_sub_industry))
|
||||||
|
|
@ -0,0 +1,70 @@
|
||||||
|
import pandas as pd
|
||||||
|
from typing import List
|
||||||
|
def strip_character(column_name, characters: List[str]):
|
||||||
|
|
||||||
|
new_col_name = column_name
|
||||||
|
for character in characters:
|
||||||
|
new_col_name = new_col_name.replace(character, '')
|
||||||
|
new_col_name = new_col_name.strip()
|
||||||
|
return new_col_name
|
||||||
|
|
||||||
|
def refine_content(df):
|
||||||
|
strip_character_list = [' ', '\n', ':', ':','其他']
|
||||||
|
for col in df.columns:
|
||||||
|
df[col] = df[col].apply(lambda x: "其他" if strip_character(x, strip_character_list) == "" else strip_character(x, strip_character_list))
|
||||||
|
return df
|
||||||
|
|
||||||
|
# 读取赢单Excel文件
|
||||||
|
df_win = pd.read_excel('./data_src/pingcap_won.xlsx')
|
||||||
|
acv_name = 'ACV'
|
||||||
|
|
||||||
|
# ACV by 客户分类
|
||||||
|
df_win_grouped_by_industry = df_win.groupby('客户分类')[acv_name].sum().astype(int).reset_index()
|
||||||
|
df_win_grouped_by_industry[acv_name] = df_win_grouped_by_industry[acv_name].apply(lambda x: '{:,}'.format(x))
|
||||||
|
print("------ACV by 行业------")
|
||||||
|
print(refine_content(df_win_grouped_by_industry))
|
||||||
|
|
||||||
|
# Group by customer industry and calculate the average ACV for each group
|
||||||
|
won_average_acv_by_industry = df_win.groupby('客户分类')[acv_name].mean().astype(int).reset_index()
|
||||||
|
won_average_acv_by_industry[acv_name] = won_average_acv_by_industry[acv_name].apply(lambda x: '{:,}'.format(x))
|
||||||
|
print("------平均ACV by 行业------")
|
||||||
|
print(refine_content(won_average_acv_by_industry))
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
df_win_grouped_by_sub_industry = df_win.groupby('客户行业')[acv_name].sum().astype(int).reset_index()
|
||||||
|
df_win_grouped_by_sub_industry[acv_name] = df_win_grouped_by_sub_industry[acv_name].apply(lambda x: '{:,}'.format(x))
|
||||||
|
print("------ACV by 子行业------")
|
||||||
|
print(refine_content(df_win_grouped_by_sub_industry))
|
||||||
|
|
||||||
|
won_average_acv_by_sub_industry = df_win.groupby('客户行业')[acv_name].mean().astype(int).reset_index()
|
||||||
|
won_average_acv_by_sub_industry[acv_name] = won_average_acv_by_sub_industry[acv_name].apply(lambda x: '{:,}'.format(x))
|
||||||
|
print("------平均ACV by 子行业------")
|
||||||
|
print(refine_content(won_average_acv_by_sub_industry))
|
||||||
|
|
||||||
|
# 读取Excel文件
|
||||||
|
df = pd.read_excel('./data_src/pingcap_pipeline.xlsx')
|
||||||
|
|
||||||
|
# 按照"客户分类"列分组,并计算ACV列的和
|
||||||
|
acv_name = '预估 ACV'
|
||||||
|
df_pipeline_grouped_by_industry = df.groupby('负责人所属行业')[acv_name].sum().astype(int).reset_index()
|
||||||
|
df_pipeline_grouped_by_industry = df_pipeline_grouped_by_industry.sort_values(by=acv_name, ascending=False)
|
||||||
|
df_pipeline_grouped_by_industry[acv_name] = df_pipeline_grouped_by_industry[acv_name].apply(lambda x: '{:,}'.format(x))
|
||||||
|
print("------预估ACV by 行业------")
|
||||||
|
print(refine_content(df_pipeline_grouped_by_industry))
|
||||||
|
|
||||||
|
pipeline_average_acv_by_industry = df.groupby('负责人所属行业')[acv_name].mean().astype(int).reset_index()
|
||||||
|
pipeline_average_acv_by_industry[acv_name] = pipeline_average_acv_by_industry[acv_name].apply(lambda x: '{:,}'.format(x))
|
||||||
|
print("------平均预估ACV by 行业------")
|
||||||
|
print(refine_content(pipeline_average_acv_by_industry))
|
||||||
|
|
||||||
|
df_pipeline_grouped_by_sub_industry = df.groupby('客户行业')[acv_name].sum().astype(int).reset_index()
|
||||||
|
df_pipeline_grouped_by_sub_industry[acv_name] = df_pipeline_grouped_by_sub_industry[acv_name].apply(lambda x: '{:,}'.format(x))
|
||||||
|
print("------预估ACV by 子行业------")
|
||||||
|
print(refine_content(df_pipeline_grouped_by_sub_industry))
|
||||||
|
|
||||||
|
|
||||||
|
pipeline_average_acv_by_sub_industry = df.groupby('客户行业')[acv_name].mean().fillna(0).astype(int).reset_index()
|
||||||
|
pipeline_average_acv_by_sub_industry[acv_name] = pipeline_average_acv_by_sub_industry[acv_name].apply(lambda x: '{:,}'.format(x))
|
||||||
|
print("------平均预估ACV by 子行业------")
|
||||||
|
print(refine_content(pipeline_average_acv_by_sub_industry))
|
||||||
Loading…
Reference in New Issue