From 9eb1428779a6e9550aa64172d740488bc21cc767 Mon Sep 17 00:00:00 2001 From: tigermren Date: Fri, 17 Oct 2025 01:12:56 +0800 Subject: [PATCH] docs(sync): update docs and scripts for TiDB Cloud sync changes - Mark DM-based sync as incompatible with TiDB Cloud Serverless - Deprecate sync-control.sh and add warning messages - Add new sync-data.sh script for syncing data from TiDB Cloud - Revise README to recommend official migration methods and clarify sync instructions - Update start.sh to remove references to DM status and control commands - Add links to TIDB_CLOUD_MIGRATION.md and SYNC_GUIDE.md for detailed guidance --- README.md | 31 ++--- TIDB_CLOUD_MIGRATION.md | 275 ++++++++++++++++++++++++++++++++++++++++ export-cloud.sh | 86 +++++++++++++ import-local.sh | 91 +++++++++++++ start.sh | 8 +- sync-control.sh | 7 + sync-data.sh | 29 +++++ 7 files changed, 507 insertions(+), 20 deletions(-) create mode 100644 TIDB_CLOUD_MIGRATION.md create mode 100755 export-cloud.sh create mode 100755 import-local.sh create mode 100755 sync-data.sh diff --git a/README.md b/README.md index 1efc0de..0465a06 100644 --- a/README.md +++ b/README.md @@ -49,19 +49,19 @@ A minimal TiDB instance with Data Migration (DM) for syncing data from test envi ### Useful Commands ```bash -# Start environment (auto-starts sync) +# Start environment (TiDB only, no working sync) ./start.sh # Test connection ./test-connection.sh -# Check sync status -./status.sh -# or use the sync control script: -./sync-control.sh status +# NEW: Sync data from TiDB Cloud to local +./sync-data.sh -# Control sync task -./sync-control.sh [start|stop|pause|resume|restart|reinit] +# Check sync status (deprecated - DM doesn't work with TiDB Cloud) +# ./status.sh +# or use the sync control script: +# ./sync-control.sh status # Connect with MySQL client mysql -h 127.0.0.1 -P 4000 -u root @@ -108,19 +108,16 @@ Required variables: ### How the Sync Works -The data synchronization is **automatically configured and started** when you run `./start.sh`: +**Important Note**: The original DM-based sync approach doesn't work with TiDB Cloud Serverless because TiDB Cloud doesn't support the MySQL replication features that DM requires. -1. **Automatic Setup** (recommended): - - The `dm-init` container runs [`scripts/init-dm.sh`](scripts/init-dm.sh) - - It generates the actual DM task config from your `.env` variables - - The sync task is automatically started - - No manual intervention needed! +### Officially Recommended Approaches -2. **Manual Control** (optional): - - Use [`./sync-control.sh`](sync-control.sh) for easy management - - Or use `dmctl` commands directly (see Manual DM Operations below) +See [TIDB_CLOUD_MIGRATION.md](TIDB_CLOUD_MIGRATION.md) for officially supported migration methods: -**Note:** [`configs/task.yaml`](configs/task.yaml) is just a template. The real task config is generated dynamically at runtime. +1. **Console Export + SQL Import** (simplest for development) +2. **Dumpling + TiDB Lightning** (for larger datasets) +3. **Periodic Sync Scripts** (created in this project) +4. **Application-Level Sync** (for real-time needs) **For detailed sync operations, see [SYNC_GUIDE.md](SYNC_GUIDE.md)** diff --git a/TIDB_CLOUD_MIGRATION.md b/TIDB_CLOUD_MIGRATION.md new file mode 100644 index 0000000..1231ec3 --- /dev/null +++ b/TIDB_CLOUD_MIGRATION.md @@ -0,0 +1,275 @@ +# TiDB Cloud to Local TiDB Migration Guide + +This guide provides officially recommended approaches for migrating data from TiDB Cloud to a local TiDB instance, since TiDB Data Migration (DM) cannot be used with TiDB Cloud Serverless. + +## Why DM Doesn't Work with TiDB Cloud + +TiDB Data Migration (DM) fails with TiDB Cloud because: + +1. **No MySQL binlog support** - TiDB Cloud Serverless doesn't expose binlog in the traditional MySQL way +2. **binlog_format is STATEMENT** - DM requires ROW format +3. **TiDB explicitly not supported as upstream** - DM is designed for MySQL/MariaDB → TiDB, not TiDB → TiDB + +## Approach 1: Console Export + SQL Import (Simplest) + +### Export from TiDB Cloud + +1. **Using TiDB Cloud Console**: + - Navigate to your cluster in the TiDB Cloud Console + - Go to Data > Import + - Click "Export Data to" > "Local File" + - Select databases/tables to export + - Choose format (SQL recommended for small datasets) + - Click "Export" + +2. **Using TiDB Cloud CLI**: + ```bash + # Create export task + ticloud serverless export create -c + + # Download exported data + ticloud serverless export download -c -e + ``` + +### Import to Local TiDB + +```bash +# Import SQL file +mysql -h 127.0.0.1 -P 4000 -u root < exported_data.sql + +# Or for CSV files +mysql -h 127.0.0.1 -P 4000 -u root -e " +LOAD DATA LOCAL INFILE 'table_data.csv' +INTO TABLE your_table +FIELDS TERMINATED BY ',' +ENCLOSED BY '\"' +LINES TERMINATED BY '\n' +IGNORE 1 ROWS;" +``` + +## Approach 2: Dumpling + TiDB Lightning (For Larger Datasets) + +### Prerequisites + +Install TiDB tools: +```bash +# Install TiUP +curl --proto '=https' --tlsv1.2 -sSf https://tiup-mirrors.pingcap.com/install.sh | sh +source ~/.bash_profile + +# Install tools +tiup install dumpling tidb-lightning +``` + +### Export with Dumpling + +```bash +# Export data from TiDB Cloud +dumpling \ + -u {TEST_DB_USER} \ + -p {TEST_DB_PASSWORD} \ + -P {TEST_DB_PORT} \ + -h {TEST_DB_HOST} \ + -o /tmp/tidb-export \ + --filetype sql \ + -r 20000 \ + -F 256MiB +``` + +### Import with TiDB Lightning + +1. **Create configuration file** (`lightning.toml`): + ```toml + [lightning] + level = "info" + file = "tidb-lightning.log" + + [tikv-importer] + backend = "local" + sorted-kv-dir = "/tmp/sorted-kv-dir" + + [mydumper] + data-source-dir = "/tmp/tidb-export" + no-schema = false + + [tidb] + host = "127.0.0.1" + port = 4000 + user = "root" + password = "" + status-port = 10080 + pd-addr = "127.0.0.1:2379" + ``` + +2. **Run TiDB Lightning**: + ```bash + tidb-lightning -config lightning.toml + ``` + +## Approach 3: Periodic Sync Script + +Create a script for periodic data sync: + +### Export Script (`export-cloud.sh`) +```bash +#!/bin/bash + +# Source .env file +source .env + +# Export data using mysqldump (built-in tool) +mysqldump \ + -h $TEST_DB_HOST \ + -P $TEST_DB_PORT \ + -u $TEST_DB_USER \ + -p$TEST_DB_PASSWORD \ + --single-transaction \ + --routines \ + --triggers \ + $DATABASE_NAME \ + $TABLES > /tmp/cloud-export.sql + +echo "Export completed: /tmp/cloud-export.sql" +``` + +### Import Script (`import-local.sh`) +```bash +#!/bin/bash + +# Import to local TiDB +mysql -h 127.0.0.1 -P 4000 -u root < /tmp/cloud-export.sql + +echo "Import completed to local TiDB" +``` + +### Combined Sync Script (`sync-data.sh`) +```bash +#!/bin/bash + +echo "🔄 Syncing data from TiDB Cloud to local TiDB..." + +# Export from cloud +./export-cloud.sh + +# Import to local +./import-local.sh + +echo "✅ Sync completed!" +``` + +## Approach 4: Application-Level Sync (For Continuous Updates) + +For real-time sync, implement in your application: + +```python +# Example Python script for selective sync +import mysql.connector + +# Connect to both databases +cloud_db = mysql.connector.connect( + host="gateway01.ap-northeast-1.prod.aws.tidbcloud.com", + port=4000, + user="3mmwxY44wQF4L6P.root", + password="JQ8wbPsXfx7xJOR5", + database="workflow_local" +) + +local_db = mysql.connector.connect( + host="127.0.0.1", + port=4000, + user="root", + password="", + database="workflow_local" +) + +# Sync specific tables +def sync_table(table_name): + # Get data from cloud + cloud_cursor = cloud_db.cursor() + cloud_cursor.execute(f"SELECT * FROM {table_name}") + rows = cloud_cursor.fetchall() + + # Clear and insert into local + local_cursor = local_db.cursor() + local_cursor.execute(f"DELETE FROM {table_name}") + + if rows: + placeholders = ','.join(['%s'] * len(rows[0])) + local_cursor.executemany( + f"INSERT INTO {table_name} VALUES ({placeholders})", + rows + ) + + local_db.commit() + print(f"Synced {len(rows)} rows to {table_name}") + +# Sync your tables +sync_table("plans") +``` + +## Recommended Solution for Your Setup + +For development purposes, I recommend: + +1. **Use Approach 1** (Console Export + SQL Import) for simplicity +2. **Create helper scripts** for periodic sync +3. **Consider application-level sync** for real-time needs + +### Quick Setup + +Create these helper scripts in your project: + +```bash +# Make scripts executable +chmod +x sync-data.sh export-cloud.sh import-local.sh + +# Run sync +./sync-data.sh +``` + +## Limitations and Considerations + +### TiDB Cloud Serverless Limitations +- No traditional MySQL binlog access +- Limited to export/import methods +- No direct replication support in most plans + +### Performance Considerations +- Full table exports can be slow for large datasets +- Network bandwidth affects sync speed +- Consider incremental exports for large tables + +### Security Notes +- Store credentials securely (use .env file) +- Use TLS connections when possible +- Rotate credentials regularly + +## Troubleshooting + +### Connection Issues +```bash +# Test connection to TiDB Cloud +mysql -h $TEST_DB_HOST -P $TEST_DB_PORT -u $TEST_DB_USER -p + +# Test connection to local TiDB +mysql -h 127.0.0.1 -P 4000 -u root +``` + +### Export Errors +- Ensure user has SELECT privileges +- Check network connectivity +- Verify table existence + +### Import Errors +- Check schema compatibility +- Ensure sufficient disk space +- Verify TiDB is running + +## References + +- [TiDB Cloud Export Documentation](https://docs.pingcap.com/tidbcloud/serverless-export/) +- [TiDB Migration Tools Overview](https://docs.pingcap.com/tidb/stable/migration-tools) +- [Dumpling Documentation](https://docs.pingcap.com/tidb/stable/dumpling-overview) +- [TiDB Lightning Documentation](https://docs.pingcap.com/tidb/stable/tidb-lightning-overview) + +For production use cases, contact TiDB Cloud Support to discuss available replication options for your specific plan. \ No newline at end of file diff --git a/export-cloud.sh b/export-cloud.sh new file mode 100755 index 0000000..43799da --- /dev/null +++ b/export-cloud.sh @@ -0,0 +1,86 @@ +#!/bin/bash + +echo "â˜ī¸ Exporting data from TiDB Cloud..." + +# Check if .env exists +if [ ! -f .env ]; then + echo "❌ .env file not found!" + echo "📝 Please create .env file with your TiDB Cloud credentials" + exit 1 +fi + +# Source environment variables +source .env + +# Validate required variables +if [ -z "$TEST_DB_HOST" ] || [ -z "$TEST_DB_USER" ] || [ -z "$TEST_DB_PASSWORD" ]; then + echo "❌ Missing database credentials in .env" + echo "📝 Required: TEST_DB_HOST, TEST_DB_USER, TEST_DB_PASSWORD" + exit 1 +fi + +# Create export directory +EXPORT_DIR="/tmp/tidb-cloud-export" +mkdir -p $EXPORT_DIR + +# Test connection +echo "🔍 Testing connection to TiDB Cloud..." +mysql -h $TEST_DB_HOST -P ${TEST_DB_PORT:-4000} -u $TEST_DB_USER -p$TEST_DB_PASSWORD -e "SELECT 1" >/dev/null 2>&1 +if [ $? -ne 0 ]; then + echo "❌ Cannot connect to TiDB Cloud" + exit 1 +fi + +echo "✅ Connected successfully" + +# Export schema using SQL queries +echo "đŸ“Ļ Exporting schema..." + +# Create database statement +echo "CREATE DATABASE IF NOT EXISTS \`${DATABASE_NAME:-workflow_local}\`; +USE \`${DATABASE_NAME:-workflow_local}\`; +" > $EXPORT_DIR/schema.sql + +# Get table schemas +for table in ${TABLES//,/ }; do + echo "-- Table: $table" + mysql -h $TEST_DB_HOST -P ${TEST_DB_PORT:-4000} -u $TEST_DB_USER -p$TEST_DB_PASSWORD -e "SHOW CREATE TABLE \`${DATABASE_NAME:-workflow_local}\`.$table;" -N -s | cut -f2 >> $EXPORT_DIR/schema.sql + echo ";" >> $EXPORT_DIR/schema.sql + echo "" >> $EXPORT_DIR/schema.sql +done + +# Check if export was successful +if [ ! -s "$EXPORT_DIR/schema.sql" ]; then + echo "❌ Schema export failed - empty file" + exit 1 +fi + +echo "✅ Schema exported to $EXPORT_DIR/schema.sql" + +# Export data using SQL +echo "đŸ“Ļ Exporting data..." + +# Clear data file +> $EXPORT_DIR/data.sql + +# Export data for each table +for table in ${TABLES//,/ }; do + echo "-- Data for table: $table" + # Simple approach: export as CSV and convert to INSERT statements + mysql -h $TEST_DB_HOST -P ${TEST_DB_PORT:-4000} -u $TEST_DB_USER -p$TEST_DB_PASSWORD -e "SELECT * FROM \`${DATABASE_NAME:-workflow_local}\`.$table;" | sed '1d' > $EXPORT_DIR/${table}.csv + + # If we have data, convert to INSERT statements + if [ -s "$EXPORT_DIR/${table}.csv" ]; then + # This is a simplified approach - for production use, you'd want a more robust CSV to SQL converter + echo "-- Note: Data export for $table requires manual conversion from CSV" >> $EXPORT_DIR/data.sql + echo "-- CSV file location: $EXPORT_DIR/${table}.csv" >> $EXPORT_DIR/data.sql + fi +done + +echo "âš ī¸ Data export completed - CSV files created for manual import" +echo "📂 Export completed successfully!" +echo " Schema: $EXPORT_DIR/schema.sql" +echo " Data CSV files:" +for table in ${TABLES//,/ }; do + echo " $EXPORT_DIR/${table}.csv" +done \ No newline at end of file diff --git a/import-local.sh b/import-local.sh new file mode 100755 index 0000000..a7ac8b4 --- /dev/null +++ b/import-local.sh @@ -0,0 +1,91 @@ +#!/bin/bash + +echo "🏠 Importing data to local TiDB..." + +# Check if local TiDB is accessible +mysql -h 127.0.0.1 -P 4000 -u root -e "SELECT 1" >/dev/null 2>&1 +if [ $? -ne 0 ]; then + echo "❌ Cannot connect to local TiDB" + echo "📝 Make sure TiDB is running: ./start.sh" + exit 1 +fi + +# Check if export files exist +EXPORT_DIR="/tmp/tidb-cloud-export" +if [ ! -f "$EXPORT_DIR/schema.sql" ]; then + echo "❌ Export files not found!" + echo "📝 Run export-cloud.sh first" + exit 1 +fi + +# Import schema +echo "đŸ—ī¸ Importing schema..." +mysql -h 127.0.0.1 -P 4000 -u root < $EXPORT_DIR/schema.sql 2>/dev/null + +if [ $? -ne 0 ]; then + echo "❌ Schema import failed" + exit 1 +fi + +echo "✅ Schema imported successfully" + +# Import data from CSV files +echo "đŸ“Ĩ Importing data..." +for table in ${TABLES//,/ }; do + if [ -f "$EXPORT_DIR/${table}.csv" ]; then + echo " Importing data for table: $table" + # Count lines in CSV (excluding header if present) + line_count=$(wc -l < "$EXPORT_DIR/${table}.csv" | tr -d ' ') + + if [ "$line_count" -gt 0 ]; then + # Use LOAD DATA LOCAL INFILE to import CSV + mysql -h 127.0.0.1 -P 4000 -u root --local-infile=1 -e " + USE ${DATABASE_NAME:-workflow_local}; + LOAD DATA LOCAL INFILE '$EXPORT_DIR/${table}.csv' + INTO TABLE $table + FIELDS TERMINATED BY '\t' + LINES TERMINATED BY '\n' + IGNORE 0 LINES; + " 2>/dev/null + + if [ $? -ne 0 ]; then + echo "âš ī¸ Warning: Failed to import data for table $table" + # Try alternative method - read CSV and generate INSERT statements + echo " Trying alternative import method..." + while IFS=$'\t' read -r col1 col2 col3 col4 col5; do + # Escape single quotes + col1_escaped=$(echo "$col1" | sed "s/'/''/g") + col2_escaped=$(echo "$col2" | sed "s/'/''/g") + col3_escaped=$(echo "$col3" | sed "s/'/''/g") + col4_escaped=$(echo "$col4" | sed "s/'/''/g") + col5_escaped=$(echo "$col5" | sed "s/'/''/g" | sed "s/NULL//") + + # Handle NULL values + if [ "$col5_escaped" = "" ]; then + col5_sql="NULL" + else + col5_sql="'$col5_escaped'" + fi + + # Only insert if we have data + if [ -n "$col1" ]; then + mysql -h 127.0.0.1 -P 4000 -u root -e " + USE ${DATABASE_NAME:-workflow_local}; + INSERT INTO $table (id, name, description, type, parent_plan_id) + VALUES ('$col1_escaped', '$col2_escaped', '$col3_escaped', '$col4_escaped', $col5_sql); + " 2>/dev/null + fi + done < "$EXPORT_DIR/${table}.csv" + fi + + # Count rows imported + row_count=$(mysql -h 127.0.0.1 -P 4000 -u root -e "USE ${DATABASE_NAME:-workflow_local}; SELECT COUNT(*) FROM $table;" -N -s 2>/dev/null) + echo " ✅ Imported $row_count rows into $table" + else + echo " â„šī¸ No data to import for table $table" + fi + fi +done + +echo "✅ Data import completed" +echo "🎉 Import completed!" \ No newline at end of file diff --git a/start.sh b/start.sh index f211c25..12d744b 100755 --- a/start.sh +++ b/start.sh @@ -35,14 +35,16 @@ echo "" echo "📊 Connection Info:" echo " TiDB: mysql -h 127.0.0.1 -P 4000 -u root" echo " DataGrip: Host: 127.0.0.1, Port: 4000, User: root, Password: (empty)" -echo " DM Master: http://localhost:8261" +echo "" +echo "🔄 To sync data from TiDB Cloud:" +echo " ./sync-data.sh" echo "" echo "🔍 Useful commands:" echo " Test connection: ./test-connection.sh" -echo " Check sync status: ./status.sh" -echo " Control sync: ./sync-control.sh [start|stop|pause|resume|restart]" +echo " Sync data: ./sync-data.sh" echo " View logs: docker compose logs -f" echo " Stop environment: docker compose down" echo "" echo "📖 For DataGrip setup: see DATAGRIP_SETUP.md" +echo "📘 For TiDB Cloud migration: see TIDB_CLOUD_MIGRATION.md" echo "" diff --git a/sync-control.sh b/sync-control.sh index 8dad1cf..6d93bf1 100755 --- a/sync-control.sh +++ b/sync-control.sh @@ -1,5 +1,12 @@ #!/bin/bash +echo "âš ī¸ WARNING: TiDB Data Migration (DM) is not compatible with TiDB Cloud Serverless" +echo "âš ī¸ This script is deprecated. Use ./sync-data.sh instead." +echo "" + +echo "For officially supported migration approaches, see TIDB_CLOUD_MIGRATION.md" +echo "" + TASK_NAME="test-to-local" DMCTL="docker exec dm-master /dmctl --master-addr=dm-master:8261" diff --git a/sync-data.sh b/sync-data.sh new file mode 100755 index 0000000..b2d5f05 --- /dev/null +++ b/sync-data.sh @@ -0,0 +1,29 @@ +#!/bin/bash + +echo "🔄 Syncing data from TiDB Cloud to local TiDB..." +echo "" + +# Export from TiDB Cloud +echo "â˜ī¸ Step 1: Exporting from TiDB Cloud" +./export-cloud.sh +if [ $? -ne 0 ]; then + echo "❌ Export failed" + exit 1 +fi + +echo "" + +# Import to local TiDB +echo "🏠 Step 2: Importing to local TiDB" +./import-local.sh +if [ $? -ne 0 ]; then + echo "❌ Import failed" + exit 1 +fi + +echo "" +echo "✅ Data sync completed successfully!" +echo "" +echo "📊 Verify data:" +echo " mysql -h 127.0.0.1 -P 4000 -u root -e 'USE ${DATABASE_NAME:-workflow_local}; SHOW TABLES;'" +echo " mysql -h 127.0.0.1 -P 4000 -u root -e 'USE ${DATABASE_NAME:-workflow_local}; SELECT COUNT(*) FROM ${TABLES%%,*};'" \ No newline at end of file