market_assistant/analyze_crm_mandatory.py

import pandas as pd
from typing import List
from openpyxl import Workbook

df = pd.read_excel('./data_src/pingcap_pipeline.xlsx')

# unique_values = df['Sales stage'].unique()
# print("Unique values using unique():")
# print(unique_values)

crm_mandatory_fields = {
    "1-prospecting": [
        "联系人姓名", "部门", "职务", "手机",
        "预估整体项目节点数或vCPU数", "现用数据库痛点", "主要数据库", "客户业务场景", "数据量"
    ],
    "2-evaluation": [
        "客户预算情况", "是否是独立预算", "预算是否完成审批","是否进PcC", "PcC是否已审批", "PcC状态", "测试类型",
          "测试项（用飞书链接）","参与友商", "poc开始日期", "poc预计结束日期", "MBO评审结果",

    ],
    "3-qualification": [
        "立项批复", "立项时间", "立项贡献（立项报告的内容）", "招标方式"
    ],
    "4-bidding/negotiating": [
        "是否投标", "发标时间", "投标时间", "竞争对手", "对手产配置", "对手投标金额",
        "标书/谈判文件（最大100M，考虑链接方式）", "中标结果", "丢标原因"
    ],
    "5-contract review": [],
    "6A-cancel": [
        "取消原因", "leader审批结果"
    ],
    "6B-closed lost": [
        "输单原因", "leader审批结果"
    ],
    "6C-closed won": [
        "生成销售订单并关联商机", "商机字段更新到赢单"
    ]
}
crm_mandatory_fields_list = list(crm_mandatory_fields.keys())

stage_mapping = {
    # "prospecting": "1-prospecting",
    "Evaluation": "2-evaluation",
    "Qualification": "3-qualification",
    "PoC Won/Bidding+Negotiation": "4-bidding/negotiating",
    # "contract review": "5-contract review",
    # "cancel": "6A-cancel",
    # "closed lost": "6B-closed lost",
    "Closed Won": "6C-closed won"
}

def get_stages_up_to(crm_stage, stages_list):
    if crm_stage.startswith('6'):
        # Find the element starting with '5'
        index_5 = stages_list.index("5-contract review")
        if index_5 is not None:
            result_list = stages_list[:index_5 + 1]
            result_list.append(crm_stage)
            return result_list
        else:
            return stages_list  # Return all stages if no '5' is found
    else:
        index = stages_list.index(crm_stage)
        return stages_list[:index + 1]


def check_mandatory_fields(row, crm_stage):
    mandatory_fields = crm_mandatory_fields[crm_stage]
    sales_stage = row['Sales stage']
    print(f"Sales stage: {sales_stage}, CRM stage: {crm_stage}")
    result = {
        "fields_filled": [],
        "fields_not_filled": []
    }
    for field in mandatory_fields:
        if field in row and pd.notna(row[field]) and row[field] != '':
            print(f"Field: {field}, Value: {row[field]}")
            data = {
                "field": field,
                "value": row[field]
            }
            result["fields_filled"].append(data)
        else:

            result["fields_not_filled"].append(field)
    return result


for index, row in df.iterrows():
    print("-" * 20,index ,"-" * 20)

    sales_stage = row['Sales stage']
    crm_stage = stage_mapping.get(sales_stage, "unknown")
    if crm_stage == "unknown":
        print(f"Sales stage: {sales_stage}, CRM stage: {crm_stage}")
        continue
    else:
        print(f"Sales stage: {sales_stage}, CRM stage: {crm_stage}")

    crm_stages = get_stages_up_to(crm_stage, crm_mandatory_fields_list)
    df.at[index, "crm_stage"] = crm_stage
    for stage in crm_stages:
        result = check_mandatory_fields(row, stage)
        df.at[index, stage] = ', '.join(result["fields_not_filled"])
        print("-"*10, stage, "-"*10)
        print(result)
df.to_excel("./data_output/pingcap_pipeline_mandatory.xlsx")