""" One-shot migration: import all NewBellCurve CSV candle files into data/candles.db. Usage: python -m shared.import_csv python -m shared.import_csv --csv-dir /path/to/other/csvs python -m shared.import_csv --dry-run """ import argparse import logging import re import sys from pathlib import Path import pandas as pd # Allow running from repo root or any subdirectory sys.path.insert(0, str(Path(__file__).parent.parent)) from shared.db import upsert_candles, list_available, DB_PATH logger = logging.getLogger(__name__) DEFAULT_CSV_DIR = Path(__file__).parent.parent / "NewBellCurve" / "simulation" / "data" _FILENAME_RE = re.compile(r"^([A-Z]+)_([A-Z0-9]+)\.csv$") def import_directory(csv_dir: Path, dry_run: bool = False) -> dict[str, int]: csv_files = sorted(csv_dir.glob("*.csv")) if not csv_files: logger.warning("No CSV files found in %s", csv_dir) return {} results = {} for path in csv_files: m = _FILENAME_RE.match(path.name) if not m: logger.warning("Skipping unrecognised filename: %s", path.name) continue symbol, timeframe = m.group(1), m.group(2) df = pd.read_csv(path, parse_dates=["time"]) if dry_run: logger.info("DRY RUN %-10s %-5s %d candles", symbol, timeframe, len(df)) results[path.name] = len(df) continue n = upsert_candles(symbol, timeframe, df) logger.info("%-10s %-5s %d candles written", symbol, timeframe, n) results[path.name] = n return results def main(): logging.basicConfig(level=logging.INFO, format="%(asctime)s %(message)s") parser = argparse.ArgumentParser(description="Import CSV candle files into shared candles.db") parser.add_argument("--csv-dir", default=str(DEFAULT_CSV_DIR), help="Directory of CSV files") parser.add_argument("--dry-run", action="store_true", help="Parse only, do not write to DB") args = parser.parse_args() csv_dir = Path(args.csv_dir) if not csv_dir.exists(): logger.error("CSV directory not found: %s", csv_dir) sys.exit(1) logger.info("Importing from: %s", csv_dir) logger.info("Database: %s", DB_PATH) results = import_directory(csv_dir, dry_run=args.dry_run) total = sum(results.values()) logger.info("Done — %d files, %d candles total", len(results), total) if not args.dry_run: print("\nAvailable data in candles.db:") print(list_available().to_string(index=False)) if __name__ == "__main__": main()