import pandas as pd # Read data from specific Excel sheets excel_file = './output/acv_analysis_v2(with count).xlsx' # Replace with the actual path to your Excel file df_pipeline = pd.read_excel(excel_file, sheet_name='预估ACV by 子行业') # Replace 'Pipeline' with the actual sheet name df_won = pd.read_excel(excel_file, sheet_name='ACV by 子行业') # Replace 'Won' with the actual sheet name # Merge the two dataframes on '客户行业' df_combined = pd.merge(df_pipeline, df_won, on='客户行业', how='outer', suffixes=('_pipeline', '_won')) # Rename columns for clarity df_combined = df_combined.rename(columns={ 'ACV Sum': 'ACVSUM', 'ACV Count': 'ACVCNT', '预估 ACV Sum': '预估ACVSUM', '预估 ACV Count': '预估ACVCNT', }) # Fill NaN values with 0 for numerical columns df_combined['ACVSUM'] = df_combined['ACVSUM'].fillna(0) df_combined['ACVCNT'] = df_combined['ACVCNT'].fillna(0) df_combined['预估ACVSUM'] = df_combined['预估ACVSUM'].fillna(0) df_combined['预估ACVCNT'] = df_combined['预估ACVCNT'].fillna(0) # Convert numerical columns to integers df_combined['ACVSUM'] = df_combined['ACVSUM'].astype(int) df_combined['ACVCNT'] = df_combined['ACVCNT'].astype(int) df_combined['预估ACVSUM'] = df_combined['预估ACVSUM'].astype(int) df_combined['预估ACVCNT'] = df_combined['预估ACVCNT'].astype(int) # Reorder columns df_combined = df_combined[['客户行业', 'ACVSUM', 'ACVCNT', '预估ACVSUM', '预估ACVCNT']] # Sort by '预估ACV' in descending order df_combined = df_combined.sort_values('预估ACVSUM', ascending=False) # Reset index df_combined = df_combined.reset_index(drop=True) print(df_combined) df_combined.to_excel('./output/acv_analysis_combined.xlsx', index=False)