Initial commit: shared DB, fetcher, and CSV import utilities

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-04-28 21:09:13 +02:00
commit 10f2253a6a
5 changed files with 316 additions and 0 deletions
+80
View File
@@ -0,0 +1,80 @@
"""
One-shot migration: import all NewBellCurve CSV candle files into data/candles.db.
Usage:
python -m shared.import_csv
python -m shared.import_csv --csv-dir /path/to/other/csvs
python -m shared.import_csv --dry-run
"""
import argparse
import logging
import re
import sys
from pathlib import Path
import pandas as pd
# Allow running from repo root or any subdirectory
sys.path.insert(0, str(Path(__file__).parent.parent))
from shared.db import upsert_candles, list_available, DB_PATH
logger = logging.getLogger(__name__)
DEFAULT_CSV_DIR = Path(__file__).parent.parent / "NewBellCurve" / "simulation" / "data"
_FILENAME_RE = re.compile(r"^([A-Z]+)_([A-Z0-9]+)\.csv$")
def import_directory(csv_dir: Path, dry_run: bool = False) -> dict[str, int]:
csv_files = sorted(csv_dir.glob("*.csv"))
if not csv_files:
logger.warning("No CSV files found in %s", csv_dir)
return {}
results = {}
for path in csv_files:
m = _FILENAME_RE.match(path.name)
if not m:
logger.warning("Skipping unrecognised filename: %s", path.name)
continue
symbol, timeframe = m.group(1), m.group(2)
df = pd.read_csv(path, parse_dates=["time"])
if dry_run:
logger.info("DRY RUN %-10s %-5s %d candles", symbol, timeframe, len(df))
results[path.name] = len(df)
continue
n = upsert_candles(symbol, timeframe, df)
logger.info("%-10s %-5s %d candles written", symbol, timeframe, n)
results[path.name] = n
return results
def main():
logging.basicConfig(level=logging.INFO, format="%(asctime)s %(message)s")
parser = argparse.ArgumentParser(description="Import CSV candle files into shared candles.db")
parser.add_argument("--csv-dir", default=str(DEFAULT_CSV_DIR), help="Directory of CSV files")
parser.add_argument("--dry-run", action="store_true", help="Parse only, do not write to DB")
args = parser.parse_args()
csv_dir = Path(args.csv_dir)
if not csv_dir.exists():
logger.error("CSV directory not found: %s", csv_dir)
sys.exit(1)
logger.info("Importing from: %s", csv_dir)
logger.info("Database: %s", DB_PATH)
results = import_directory(csv_dir, dry_run=args.dry_run)
total = sum(results.values())
logger.info("Done — %d files, %d candles total", len(results), total)
if not args.dry_run:
print("\nAvailable data in candles.db:")
print(list_available().to_string(index=False))
if __name__ == "__main__":
main()