from linguistic_checker import LinguisticChecker
import pandas as pd

api_key = '25bda2c39c0f8ca0'
api_secret = 'e0008b9b9727cb8ceea5a132dbe62495'
assistant_id = "66bb09a84673b57506fe7bbd"

checker = LinguisticChecker(api_key, api_secret, assistant_id)

df = pd.read_excel('./data_src/pingcap_pipeline.xlsx')

report = []


crm_fields = ['客户业务场景','友商信息']

for crm_field in crm_fields:
    # Randomly select 20 items from the DataFrame
    random_pick_20 = df.sample(n=20, random_state=42)

    # Reset the index of the randomly selected items
    random_pick_20 = random_pick_20.reset_index(drop=True)

    print(f"Randomly selected {len(random_pick_20)} items from the DataFrame.")
# Iterate over the DataFrame
    for index, row in random_pick_20.iterrows():
        try:
            # Adjust these names if they're different in your actual file
            user_input = row[crm_field]
            
            # Check the input using the LinguisticChecker
            result = checker.check_input(crm_field, user_input)
            # result = 'ssss'
            # Append the results to the report list
            report.append({
                '商机ID':row['唯一性ID（必填）'],
                'CRM必填项': crm_field,
                '用户输入': user_input,
                '分析诊断': result
            })
        except Exception as e:
            print(f"Error processing row {index}: {str(e)}")
            result = f"Error processing row {index}: {str(e)}"

        # Convert the report list to a DataFrame
report_df = pd.DataFrame(report)

# Write the DataFrame to an Excel file
output_file = 'crm_mandatory_fields_analysis.xlsx'
report_df.to_excel(output_file, index=False)

print(f"Analysis report has been saved to {output_file}")