commit 486749a8901d4ec34c71eaba0a98f7f49fbd66dd Author: Jonathan Date: Mon May 4 22:27:24 2026 +0200 Initial project scaffold Full-stack Dutch supermarket price tracker with FastAPI backend, PostgreSQL/SQLAlchemy, Albert Heijn scraper, and Next.js frontend. Co-Authored-By: Claude Sonnet 4.6 diff --git a/.env.example b/.env.example new file mode 100644 index 0000000..838ddf0 --- /dev/null +++ b/.env.example @@ -0,0 +1,5 @@ +# Backend — copy to backend/.env for local dev +DATABASE_URL=postgresql://postgres:postgres@localhost:5432/food_prices + +# Frontend — copy to frontend/.env.local for local dev +NEXT_PUBLIC_API_URL=http://localhost:8000 diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..f77c7a9 --- /dev/null +++ b/.gitignore @@ -0,0 +1,31 @@ +# Python +__pycache__/ +*.pyc +*.pyo +.venv/ +venv/ +.env +*.egg-info/ +dist/ +build/ + +# Alembic +backend/alembic/versions/*.pyc + +# Node +frontend/node_modules/ +frontend/.next/ +frontend/.env.local +frontend/.env*.local + +# Docker +.dockerignore + +# OS +.DS_Store +Thumbs.db + +# IDE +.idea/ +.vscode/ +*.swp diff --git a/README.md b/README.md new file mode 100644 index 0000000..9f16e50 --- /dev/null +++ b/README.md @@ -0,0 +1,137 @@ +# Dutch Food Price Tracker + +Track Dutch supermarket food prices over time. Currently supports Albert Heijn. + +## Stack + +| Layer | Tech | +|---|---| +| Backend API | Python FastAPI + SQLAlchemy + Alembic | +| Database | PostgreSQL 16 | +| Scraper | httpx + AH anonymous token auth | +| Frontend | Next.js 14 + TypeScript + Tailwind CSS + Recharts | +| Dev infra | Docker Compose | + +## Quick start (Docker) + +```bash +git clone && cd dutch-food-price-tracker + +# Build and start all services +docker compose up --build -d + +# Run DB migrations (tables are also auto-created on backend start) +docker compose exec backend alembic upgrade head + +# Scrape Albert Heijn +docker compose exec backend python cli.py scrape-ah \ + --query melk \ + --query brood \ + --query kaas \ + --query yoghurt +``` + +- Frontend: http://localhost:3000 +- API docs: http://localhost:8000/docs +- Database: `localhost:5432` (user: postgres / password: postgres / db: food_prices) + +## Local development (without Docker) + +### Backend + +```bash +cd backend +python -m venv .venv +source .venv/bin/activate # Windows: .venv\Scripts\activate +pip install -r requirements.txt + +# Needs a running PostgreSQL instance +export DATABASE_URL=postgresql://postgres:postgres@localhost:5432/food_prices + +# Create tables +alembic upgrade head +# or: python -c "from app.database import engine; from app.models import Base; Base.metadata.create_all(engine)" + +# Start API server +uvicorn app.main:app --reload +``` + +### Frontend + +```bash +cd frontend +npm install + +# Point at the local backend +echo "NEXT_PUBLIC_API_URL=http://localhost:8000" > .env.local + +npm run dev +``` + +## CLI reference + +```bash +# Scrape one or more queries +python cli.py scrape-ah --query "melk" --query "brood" + +# Run via Docker +docker compose exec backend python cli.py scrape-ah --query melk +``` + +## API endpoints + +| Method | Path | Description | +|---|---|---| +| GET | `/api/products?search=melk` | Search products | +| GET | `/api/products/{id}` | Product detail | +| GET | `/api/products/{id}/prices` | Full price history | +| GET | `/api/prices/cheapest?date=2024-01-15` | Cheapest per product for a day | +| GET | `/api/stores` | List stores | +| GET | `/api/scrape-runs` | List recent scrape runs | + +## Project structure + +``` +dutch-food-price-tracker/ +├── backend/ +│ ├── app/ +│ │ ├── main.py # FastAPI app + CORS +│ │ ├── models.py # SQLAlchemy ORM models +│ │ ├── schemas.py # Pydantic request/response schemas +│ │ ├── database.py # Engine, session, Base +│ │ ├── config.py # Pydantic settings +│ │ ├── routers/ # products, stores, prices, scrape_runs +│ │ └── scrapers/ +│ │ └── albert_heijn.py # Token auth + product search +│ ├── alembic/ # DB migration history +│ ├── cli.py # Click CLI (scrape-ah) +│ └── requirements.txt +├── frontend/ +│ └── src/ +│ ├── app/ +│ │ ├── page.tsx # Product search +│ │ ├── products/[id]/page.tsx # Detail + price chart +│ │ └── cheapest/page.tsx # Daily cheapest overview +│ ├── components/ +│ │ ├── Nav.tsx +│ │ ├── ProductCard.tsx +│ │ └── PriceChart.tsx # Recharts line chart +│ └── lib/api.ts # Typed API client +├── seed/ # Historical CSV/JSON import datasets +├── docker-compose.yml +└── .env.example +``` + +## Adding a new store + +1. Create `backend/app/scrapers/.py` +2. Implement `scrape_query(db: Session, query: str) -> ScrapeRun` +3. Add a `@cli.command` in `backend/cli.py` +4. Insert a `Store` row with the new slug + +## Data model + +- **stores** — one row per chain (Albert Heijn, Jumbo, …) +- **products** — one row per store SKU; keyed by `(store_id, external_id)` +- **scrape_runs** — one row per CLI invocation / query +- **price_snapshots** — append-only price observations (cents, UTC timestamp) diff --git a/backend/Dockerfile b/backend/Dockerfile new file mode 100644 index 0000000..5156c36 --- /dev/null +++ b/backend/Dockerfile @@ -0,0 +1,11 @@ +FROM python:3.12-slim + +WORKDIR /app + +COPY requirements.txt . +RUN pip install --no-cache-dir -r requirements.txt + +COPY . . + +EXPOSE 8000 +CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "8000", "--reload"] diff --git a/backend/alembic.ini b/backend/alembic.ini new file mode 100644 index 0000000..2994cb9 --- /dev/null +++ b/backend/alembic.ini @@ -0,0 +1,39 @@ +[alembic] +script_location = alembic +prepend_sys_path = . +version_path_separator = os +sqlalchemy.url = postgresql://postgres:postgres@db:5432/food_prices + +[loggers] +keys = root,sqlalchemy,alembic + +[handlers] +keys = console + +[formatters] +keys = generic + +[logger_root] +level = WARN +handlers = console +qualname = + +[logger_sqlalchemy] +level = WARN +handlers = +qualname = sqlalchemy.engine + +[logger_alembic] +level = INFO +handlers = +qualname = alembic + +[handler_console] +class = StreamHandler +args = (sys.stderr,) +level = NOTSET +formatter = generic + +[formatter_generic] +format = %(levelname)-5.5s [%(name)s] %(message)s +datefmt = %H:%M:%S diff --git a/backend/alembic/env.py b/backend/alembic/env.py new file mode 100644 index 0000000..82398e7 --- /dev/null +++ b/backend/alembic/env.py @@ -0,0 +1,46 @@ +import os +from logging.config import fileConfig + +from alembic import context +from sqlalchemy import engine_from_config, pool + +config = context.config + +if config.config_file_name is not None: + fileConfig(config.config_file_name) + +db_url = os.getenv("DATABASE_URL") or config.get_main_option("sqlalchemy.url") +config.set_main_option("sqlalchemy.url", db_url) + +from app.models import Base # noqa: E402 + +target_metadata = Base.metadata + + +def run_migrations_offline() -> None: + context.configure( + url=db_url, + target_metadata=target_metadata, + literal_binds=True, + dialect_opts={"paramstyle": "named"}, + ) + with context.begin_transaction(): + context.run_migrations() + + +def run_migrations_online() -> None: + connectable = engine_from_config( + config.get_section(config.config_ini_section, {}), + prefix="sqlalchemy.", + poolclass=pool.NullPool, + ) + with connectable.connect() as connection: + context.configure(connection=connection, target_metadata=target_metadata) + with context.begin_transaction(): + context.run_migrations() + + +if context.is_offline_mode(): + run_migrations_offline() +else: + run_migrations_online() diff --git a/backend/alembic/script.py.mako b/backend/alembic/script.py.mako new file mode 100644 index 0000000..17dcba0 --- /dev/null +++ b/backend/alembic/script.py.mako @@ -0,0 +1,25 @@ +"""${message} + +Revision ID: ${up_revision} +Revises: ${down_revision | comma,n} +Create Date: ${create_date} + +""" +from typing import Sequence, Union + +from alembic import op +import sqlalchemy as sa +${imports if imports else ""} + +revision: str = ${repr(up_revision)} +down_revision: Union[str, None] = ${repr(down_revision)} +branch_labels: Union[str, Sequence[str], None] = ${repr(branch_labels)} +depends_on: Union[str, Sequence[str], None] = ${repr(depends_on)} + + +def upgrade() -> None: + ${upgrades if upgrades else "pass"} + + +def downgrade() -> None: + ${downgrades if downgrades else "pass"} diff --git a/backend/alembic/versions/001_initial.py b/backend/alembic/versions/001_initial.py new file mode 100644 index 0000000..fce4b82 --- /dev/null +++ b/backend/alembic/versions/001_initial.py @@ -0,0 +1,85 @@ +"""initial + +Revision ID: 001 +Revises: +Create Date: 2024-01-01 00:00:00.000000 +""" +from typing import Sequence, Union + +import sqlalchemy as sa +from alembic import op + +revision: str = "001" +down_revision: Union[str, None] = None +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = None + + +def upgrade() -> None: + op.create_table( + "stores", + sa.Column("id", sa.Integer(), primary_key=True), + sa.Column("name", sa.String(100), nullable=False), + sa.Column("slug", sa.String(50), nullable=False), + sa.Column("country", sa.String(2), server_default="NL"), + sa.Column("website", sa.String(255)), + sa.UniqueConstraint("slug", name="uq_stores_slug"), + ) + + op.create_table( + "products", + sa.Column("id", sa.Integer(), primary_key=True), + sa.Column("store_id", sa.Integer(), sa.ForeignKey("stores.id"), nullable=False), + sa.Column("external_id", sa.String(50), nullable=False), + sa.Column("ean", sa.String(20)), + sa.Column("name", sa.String(255), nullable=False), + sa.Column("brand", sa.String(100)), + sa.Column("category", sa.String(100)), + sa.Column("unit_size", sa.String(50)), + sa.Column("url", sa.String(500)), + sa.Column("created_at", sa.DateTime(), server_default=sa.func.now()), + sa.Column("updated_at", sa.DateTime(), server_default=sa.func.now()), + sa.UniqueConstraint("store_id", "external_id", name="uq_products_store_external"), + ) + op.create_index("ix_products_ean", "products", ["ean"]) + + op.create_table( + "scrape_runs", + sa.Column("id", sa.Integer(), primary_key=True), + sa.Column("store_id", sa.Integer(), sa.ForeignKey("stores.id"), nullable=False), + sa.Column("query", sa.String(255), nullable=False), + sa.Column("started_at", sa.DateTime(), server_default=sa.func.now()), + sa.Column("finished_at", sa.DateTime()), + sa.Column("status", sa.String(20), server_default="running"), + sa.Column("products_found", sa.Integer(), server_default="0"), + sa.Column("error_message", sa.String(1000)), + ) + + op.create_table( + "price_snapshots", + sa.Column("id", sa.Integer(), primary_key=True), + sa.Column("product_id", sa.Integer(), sa.ForeignKey("products.id"), nullable=False), + sa.Column("scrape_run_id", sa.Integer(), sa.ForeignKey("scrape_runs.id"), nullable=False), + sa.Column("price", sa.Integer(), nullable=False), + sa.Column("unit_price", sa.Integer()), + sa.Column("unit_description", sa.String(50)), + sa.Column("currency", sa.String(3), server_default="EUR"), + sa.Column("discount_label", sa.String(100)), + sa.Column("discount_description", sa.String(255)), + sa.Column("was_price", sa.Integer()), + sa.Column("is_on_sale", sa.Boolean(), server_default="false"), + sa.Column("timestamp", sa.DateTime(), server_default=sa.func.now()), + ) + op.create_index("ix_price_snapshots_timestamp", "price_snapshots", ["timestamp"]) + op.create_index( + "ix_price_snapshots_product_timestamp", + "price_snapshots", + ["product_id", "timestamp"], + ) + + +def downgrade() -> None: + op.drop_table("price_snapshots") + op.drop_table("scrape_runs") + op.drop_table("products") + op.drop_table("stores") diff --git a/backend/app/__init__.py b/backend/app/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/backend/app/config.py b/backend/app/config.py new file mode 100644 index 0000000..eb933d6 --- /dev/null +++ b/backend/app/config.py @@ -0,0 +1,11 @@ +from pydantic_settings import BaseSettings + + +class Settings(BaseSettings): + database_url: str = "postgresql://postgres:postgres@db:5432/food_prices" + ah_client_id: str = "appie" + + model_config = {"env_file": ".env"} + + +settings = Settings() diff --git a/backend/app/database.py b/backend/app/database.py new file mode 100644 index 0000000..4b1de5a --- /dev/null +++ b/backend/app/database.py @@ -0,0 +1,19 @@ +from sqlalchemy import create_engine +from sqlalchemy.orm import DeclarativeBase, sessionmaker + +from .config import settings + +engine = create_engine(settings.database_url) +SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine) + + +class Base(DeclarativeBase): + pass + + +def get_db(): + db = SessionLocal() + try: + yield db + finally: + db.close() diff --git a/backend/app/main.py b/backend/app/main.py new file mode 100644 index 0000000..fd9421c --- /dev/null +++ b/backend/app/main.py @@ -0,0 +1,26 @@ +from fastapi import FastAPI +from fastapi.middleware.cors import CORSMiddleware + +from .database import Base, engine +from .routers import prices, products, scrape_runs, stores + +Base.metadata.create_all(bind=engine) + +app = FastAPI(title="Dutch Food Price Tracker", version="0.1.0") + +app.add_middleware( + CORSMiddleware, + allow_origins=["*"], + allow_methods=["*"], + allow_headers=["*"], +) + +app.include_router(products.router) +app.include_router(stores.router) +app.include_router(prices.router) +app.include_router(scrape_runs.router) + + +@app.get("/") +def root(): + return {"status": "ok", "service": "dutch-food-price-tracker"} diff --git a/backend/app/models.py b/backend/app/models.py new file mode 100644 index 0000000..f683b72 --- /dev/null +++ b/backend/app/models.py @@ -0,0 +1,83 @@ +from datetime import datetime + +import sqlalchemy as sa +from sqlalchemy.orm import Mapped, mapped_column, relationship + +from .database import Base + + +class Store(Base): + __tablename__ = "stores" + + id: Mapped[int] = mapped_column(sa.Integer, primary_key=True) + name: Mapped[str] = mapped_column(sa.String(100), nullable=False) + slug: Mapped[str] = mapped_column(sa.String(50), unique=True, nullable=False) + country: Mapped[str] = mapped_column(sa.String(2), default="NL") + website: Mapped[str | None] = mapped_column(sa.String(255)) + + products: Mapped[list["Product"]] = relationship(back_populates="store") + scrape_runs: Mapped[list["ScrapeRun"]] = relationship(back_populates="store") + + +class Product(Base): + __tablename__ = "products" + __table_args__ = ( + sa.UniqueConstraint("store_id", "external_id", name="uq_products_store_external"), + ) + + id: Mapped[int] = mapped_column(sa.Integer, primary_key=True) + store_id: Mapped[int] = mapped_column(sa.Integer, sa.ForeignKey("stores.id"), nullable=False) + external_id: Mapped[str] = mapped_column(sa.String(50), nullable=False) + ean: Mapped[str | None] = mapped_column(sa.String(20), index=True) + name: Mapped[str] = mapped_column(sa.String(255), nullable=False) + brand: Mapped[str | None] = mapped_column(sa.String(100)) + category: Mapped[str | None] = mapped_column(sa.String(100)) + unit_size: Mapped[str | None] = mapped_column(sa.String(50)) + url: Mapped[str | None] = mapped_column(sa.String(500)) + created_at: Mapped[datetime] = mapped_column(sa.DateTime, default=datetime.utcnow) + updated_at: Mapped[datetime] = mapped_column( + sa.DateTime, default=datetime.utcnow, onupdate=datetime.utcnow + ) + + store: Mapped["Store"] = relationship(back_populates="products") + price_snapshots: Mapped[list["PriceSnapshot"]] = relationship(back_populates="product") + + +class ScrapeRun(Base): + __tablename__ = "scrape_runs" + + id: Mapped[int] = mapped_column(sa.Integer, primary_key=True) + store_id: Mapped[int] = mapped_column(sa.Integer, sa.ForeignKey("stores.id"), nullable=False) + query: Mapped[str] = mapped_column(sa.String(255), nullable=False) + started_at: Mapped[datetime] = mapped_column(sa.DateTime, default=datetime.utcnow) + finished_at: Mapped[datetime | None] = mapped_column(sa.DateTime) + status: Mapped[str] = mapped_column(sa.String(20), default="running") + products_found: Mapped[int] = mapped_column(sa.Integer, default=0) + error_message: Mapped[str | None] = mapped_column(sa.String(1000)) + + store: Mapped["Store"] = relationship(back_populates="scrape_runs") + price_snapshots: Mapped[list["PriceSnapshot"]] = relationship(back_populates="scrape_run") + + +class PriceSnapshot(Base): + __tablename__ = "price_snapshots" + + id: Mapped[int] = mapped_column(sa.Integer, primary_key=True) + product_id: Mapped[int] = mapped_column( + sa.Integer, sa.ForeignKey("products.id"), nullable=False + ) + scrape_run_id: Mapped[int] = mapped_column( + sa.Integer, sa.ForeignKey("scrape_runs.id"), nullable=False + ) + price: Mapped[int] = mapped_column(sa.Integer, nullable=False) # euro cents + unit_price: Mapped[int | None] = mapped_column(sa.Integer) # euro cents + unit_description: Mapped[str | None] = mapped_column(sa.String(50)) + currency: Mapped[str] = mapped_column(sa.String(3), default="EUR") + discount_label: Mapped[str | None] = mapped_column(sa.String(100)) + discount_description: Mapped[str | None] = mapped_column(sa.String(255)) + was_price: Mapped[int | None] = mapped_column(sa.Integer) # euro cents, original price + is_on_sale: Mapped[bool] = mapped_column(sa.Boolean, default=False) + timestamp: Mapped[datetime] = mapped_column(sa.DateTime, default=datetime.utcnow, index=True) + + product: Mapped["Product"] = relationship(back_populates="price_snapshots") + scrape_run: Mapped["ScrapeRun"] = relationship(back_populates="price_snapshots") diff --git a/backend/app/routers/__init__.py b/backend/app/routers/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/backend/app/routers/prices.py b/backend/app/routers/prices.py new file mode 100644 index 0000000..84a5034 --- /dev/null +++ b/backend/app/routers/prices.py @@ -0,0 +1,57 @@ +from datetime import date, datetime + +from fastapi import APIRouter, Depends, Query +from sqlalchemy import func, select +from sqlalchemy.orm import Session, selectinload + +from ..database import get_db +from ..models import PriceSnapshot, Product +from ..schemas import CheapestProduct, Product as ProductSchema + +router = APIRouter(prefix="/api/prices", tags=["prices"]) + + +@router.get("/cheapest", response_model=list[CheapestProduct]) +def get_cheapest( + date_filter: date = Query(default=None, alias="date"), + limit: int = Query(default=20, le=100), + db: Session = Depends(get_db), +): + target = date_filter or date.today() + day_start = datetime(target.year, target.month, target.day, 0, 0, 0) + day_end = datetime(target.year, target.month, target.day, 23, 59, 59) + + min_per_product = ( + select( + PriceSnapshot.product_id, + func.min(PriceSnapshot.price).label("min_price"), + ) + .where(PriceSnapshot.timestamp.between(day_start, day_end)) + .group_by(PriceSnapshot.product_id) + .subquery() + ) + + rows = db.execute( + select(PriceSnapshot, Product) + .join( + min_per_product, + (PriceSnapshot.product_id == min_per_product.c.product_id) + & (PriceSnapshot.price == min_per_product.c.min_price), + ) + .join(Product, PriceSnapshot.product_id == Product.id) + .options(selectinload(Product.store)) + .order_by(PriceSnapshot.price.asc()) + .limit(limit) + ).all() + + return [ + CheapestProduct( + product=ProductSchema.model_validate(product), + price=snapshot.price, + unit_price=snapshot.unit_price, + unit_description=snapshot.unit_description, + is_on_sale=snapshot.is_on_sale, + timestamp=snapshot.timestamp, + ) + for snapshot, product in rows + ] diff --git a/backend/app/routers/products.py b/backend/app/routers/products.py new file mode 100644 index 0000000..f97ddab --- /dev/null +++ b/backend/app/routers/products.py @@ -0,0 +1,64 @@ +from fastapi import APIRouter, Depends, HTTPException, Query +from sqlalchemy import select +from sqlalchemy.orm import Session, selectinload + +from ..database import get_db +from ..models import PriceSnapshot, Product +from ..schemas import PriceSnapshot as PriceSnapshotSchema, ProductWithLatestPrice + +router = APIRouter(prefix="/api/products", tags=["products"]) + + +def _attach_latest_price(product: Product, db: Session) -> ProductWithLatestPrice: + p = ProductWithLatestPrice.model_validate(product) + latest = db.scalar( + select(PriceSnapshot) + .where(PriceSnapshot.product_id == product.id) + .order_by(PriceSnapshot.timestamp.desc()) + .limit(1) + ) + if latest: + p.latest_price = latest.price + p.latest_price_timestamp = latest.timestamp + p.is_on_sale = latest.is_on_sale + return p + + +@router.get("", response_model=list[ProductWithLatestPrice]) +def search_products( + search: str = Query(default=""), + limit: int = Query(default=20, le=100), + db: Session = Depends(get_db), +): + q = select(Product).options(selectinload(Product.store)) + if search: + q = q.where(Product.name.ilike(f"%{search}%")) + q = q.order_by(Product.name).limit(limit) + products = db.scalars(q).all() + return [_attach_latest_price(p, db) for p in products] + + +@router.get("/{product_id}", response_model=ProductWithLatestPrice) +def get_product(product_id: int, db: Session = Depends(get_db)): + product = db.scalar( + select(Product) + .where(Product.id == product_id) + .options(selectinload(Product.store)) + ) + if not product: + raise HTTPException(status_code=404, detail="Product not found") + return _attach_latest_price(product, db) + + +@router.get("/{product_id}/prices", response_model=list[PriceSnapshotSchema]) +def get_product_prices( + product_id: int, + limit: int = Query(default=200, le=1000), + db: Session = Depends(get_db), +): + return db.scalars( + select(PriceSnapshot) + .where(PriceSnapshot.product_id == product_id) + .order_by(PriceSnapshot.timestamp.asc()) + .limit(limit) + ).all() diff --git a/backend/app/routers/scrape_runs.py b/backend/app/routers/scrape_runs.py new file mode 100644 index 0000000..b431c88 --- /dev/null +++ b/backend/app/routers/scrape_runs.py @@ -0,0 +1,19 @@ +from fastapi import APIRouter, Depends, Query +from sqlalchemy import select +from sqlalchemy.orm import Session + +from ..database import get_db +from ..models import ScrapeRun +from ..schemas import ScrapeRun as ScrapeRunSchema + +router = APIRouter(prefix="/api/scrape-runs", tags=["scrape-runs"]) + + +@router.get("", response_model=list[ScrapeRunSchema]) +def list_scrape_runs( + limit: int = Query(default=20, le=100), + db: Session = Depends(get_db), +): + return db.scalars( + select(ScrapeRun).order_by(ScrapeRun.started_at.desc()).limit(limit) + ).all() diff --git a/backend/app/routers/stores.py b/backend/app/routers/stores.py new file mode 100644 index 0000000..9b6819f --- /dev/null +++ b/backend/app/routers/stores.py @@ -0,0 +1,14 @@ +from fastapi import APIRouter, Depends +from sqlalchemy import select +from sqlalchemy.orm import Session + +from ..database import get_db +from ..models import Store +from ..schemas import Store as StoreSchema + +router = APIRouter(prefix="/api/stores", tags=["stores"]) + + +@router.get("", response_model=list[StoreSchema]) +def list_stores(db: Session = Depends(get_db)): + return db.scalars(select(Store).order_by(Store.name)).all() diff --git a/backend/app/schemas.py b/backend/app/schemas.py new file mode 100644 index 0000000..539a1a6 --- /dev/null +++ b/backend/app/schemas.py @@ -0,0 +1,75 @@ +from datetime import datetime + +from pydantic import BaseModel + + +class Store(BaseModel): + id: int + name: str + slug: str + country: str + website: str | None = None + + model_config = {"from_attributes": True} + + +class Product(BaseModel): + id: int + store_id: int + external_id: str + ean: str | None = None + name: str + brand: str | None = None + category: str | None = None + unit_size: str | None = None + url: str | None = None + created_at: datetime + updated_at: datetime + store: Store | None = None + + model_config = {"from_attributes": True} + + +class ProductWithLatestPrice(Product): + latest_price: int | None = None + latest_price_timestamp: datetime | None = None + is_on_sale: bool = False + + +class PriceSnapshot(BaseModel): + id: int + product_id: int + scrape_run_id: int + price: int + unit_price: int | None = None + unit_description: str | None = None + currency: str + discount_label: str | None = None + discount_description: str | None = None + was_price: int | None = None + is_on_sale: bool + timestamp: datetime + + model_config = {"from_attributes": True} + + +class ScrapeRun(BaseModel): + id: int + store_id: int + query: str + started_at: datetime + finished_at: datetime | None = None + status: str + products_found: int + error_message: str | None = None + + model_config = {"from_attributes": True} + + +class CheapestProduct(BaseModel): + product: Product + price: int + unit_price: int | None = None + unit_description: str | None = None + is_on_sale: bool + timestamp: datetime diff --git a/backend/app/scrapers/__init__.py b/backend/app/scrapers/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/backend/app/scrapers/albert_heijn.py b/backend/app/scrapers/albert_heijn.py new file mode 100644 index 0000000..ead8630 --- /dev/null +++ b/backend/app/scrapers/albert_heijn.py @@ -0,0 +1,138 @@ +from datetime import datetime + +import httpx +from sqlalchemy import and_, select +from sqlalchemy.orm import Session + +from ..models import PriceSnapshot, Product, ScrapeRun, Store + +AH_AUTH_URL = "https://api.ah.nl/mobile-auth/v1/auth/token/anonymous" +AH_SEARCH_URL = "https://api.ah.nl/mobile-services/product/search/v2" +AH_BASE_URL = "https://www.ah.nl" +AH_CLIENT_ID = "appie" + + +def _get_token(client: httpx.Client) -> str: + resp = client.post(AH_AUTH_URL, json={"clientId": AH_CLIENT_ID}) + resp.raise_for_status() + return resp.json()["access_token"] + + +def _search(client: httpx.Client, token: str, query: str, page: int = 0, size: int = 30) -> dict: + resp = client.get( + AH_SEARCH_URL, + params={"query": query, "page": page, "size": size}, + headers={"Authorization": f"Bearer {token}"}, + ) + resp.raise_for_status() + return resp.json() + + +def _to_cents(value: float | int | None) -> int | None: + if value is None: + return None + return round(float(value) * 100) + + +def _upsert_store(db: Session) -> Store: + store = db.scalar(select(Store).where(Store.slug == "albert-heijn")) + if not store: + store = Store( + name="Albert Heijn", + slug="albert-heijn", + country="NL", + website="https://www.ah.nl", + ) + db.add(store) + db.commit() + db.refresh(store) + return store + + +def scrape_query(db: Session, query: str) -> ScrapeRun: + store = _upsert_store(db) + + run = ScrapeRun(store_id=store.id, query=query, started_at=datetime.utcnow()) + db.add(run) + db.commit() + db.refresh(run) + + try: + with httpx.Client(timeout=30.0) as client: + token = _get_token(client) + data = _search(client, token, query) + + count = 0 + now = datetime.utcnow() + + for card in data.get("cards", []): + for raw in card.get("products", []): + external_id = str(raw.get("webshopId", "")).strip() + if not external_id: + continue + + product = db.scalar( + select(Product).where( + and_( + Product.store_id == store.id, + Product.external_id == external_id, + ) + ) + ) + if not product: + product = Product(store_id=store.id, external_id=external_id) + db.add(product) + + link = raw.get("link", "") or "" + product.name = raw.get("title", "") or "" + product.brand = raw.get("brand") or None + product.category = raw.get("category") or None + product.ean = raw.get("eanCode") or None + product.url = f"{AH_BASE_URL}{link}" if link else None + product.updated_at = now + + price_info = raw.get("price") or {} + price_cents = _to_cents(price_info.get("now")) + was_cents = _to_cents(price_info.get("was")) + + unit_info = price_info.get("unitInfo") or {} + unit_price_cents = _to_cents(unit_info.get("price")) + unit_description = unit_info.get("description") or None + + discount = raw.get("discount") or {} + discount_label = discount.get("label") or None + discount_description = discount.get("description") or None + + db.flush() # get product.id if newly created + + if price_cents is not None: + snapshot = PriceSnapshot( + product_id=product.id, + scrape_run_id=run.id, + price=price_cents, + unit_price=unit_price_cents, + unit_description=unit_description, + was_price=was_cents, + is_on_sale=was_cents is not None or discount_label is not None, + discount_label=discount_label, + discount_description=discount_description, + timestamp=now, + ) + db.add(snapshot) + count += 1 + + db.commit() + run.status = "success" + run.products_found = count + run.finished_at = datetime.utcnow() + db.commit() + + except Exception as exc: + db.rollback() + run.status = "failed" + run.error_message = str(exc)[:900] + run.finished_at = datetime.utcnow() + db.commit() + raise + + return run diff --git a/backend/cli.py b/backend/cli.py new file mode 100644 index 0000000..a6adfb8 --- /dev/null +++ b/backend/cli.py @@ -0,0 +1,36 @@ +import click + +from app.database import SessionLocal +from app.scrapers.albert_heijn import scrape_query + + +@click.group() +def cli(): + pass + + +@cli.command("scrape-ah") +@click.option( + "--query", + "queries", + multiple=True, + required=True, + help="Search term to scrape (repeatable)", +) +def scrape_ah(queries: tuple[str, ...]): + """Scrape Albert Heijn product prices for one or more search queries.""" + db = SessionLocal() + try: + for query in queries: + click.echo(f"Scraping Albert Heijn: {query!r}") + run = scrape_query(db, query) + if run.status == "success": + click.echo(f" {run.products_found} products stored (run id={run.id})") + else: + click.echo(f" Failed: {run.error_message}", err=True) + finally: + db.close() + + +if __name__ == "__main__": + cli() diff --git a/backend/requirements.txt b/backend/requirements.txt new file mode 100644 index 0000000..a70d8ab --- /dev/null +++ b/backend/requirements.txt @@ -0,0 +1,10 @@ +fastapi==0.115.0 +uvicorn[standard]==0.30.6 +sqlalchemy==2.0.35 +alembic==1.13.3 +psycopg2-binary==2.9.9 +pydantic==2.9.2 +pydantic-settings==2.5.2 +httpx==0.27.2 +click==8.1.7 +python-dotenv==1.0.1 diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100644 index 0000000..485fe1f --- /dev/null +++ b/docker-compose.yml @@ -0,0 +1,50 @@ +services: + db: + image: postgres:16 + restart: unless-stopped + environment: + POSTGRES_USER: postgres + POSTGRES_PASSWORD: postgres + POSTGRES_DB: food_prices + volumes: + - postgres_data:/var/lib/postgresql/data + ports: + - "5432:5432" + healthcheck: + test: ["CMD-SHELL", "pg_isready -U postgres"] + interval: 10s + timeout: 5s + retries: 5 + + backend: + build: ./backend + restart: unless-stopped + depends_on: + db: + condition: service_healthy + environment: + DATABASE_URL: postgresql://postgres:postgres@db:5432/food_prices + ports: + - "8000:8000" + volumes: + - ./backend:/app + + frontend: + build: ./frontend + restart: unless-stopped + depends_on: + - backend + environment: + NEXT_PUBLIC_API_URL: http://localhost:8000 + ports: + - "3000:3000" + volumes: + - ./frontend/src:/app/src + - ./frontend/public:/app/public + - frontend_node_modules:/app/node_modules + - frontend_next:/app/.next + +volumes: + postgres_data: + frontend_node_modules: + frontend_next: diff --git a/frontend/Dockerfile b/frontend/Dockerfile new file mode 100644 index 0000000..3d23dd2 --- /dev/null +++ b/frontend/Dockerfile @@ -0,0 +1,12 @@ +FROM node:20-alpine + +WORKDIR /app + +COPY package*.json ./ +RUN npm ci + +COPY . . + +EXPOSE 3000 +ENV NODE_ENV=development +CMD ["npm", "run", "dev"] diff --git a/frontend/next.config.ts b/frontend/next.config.ts new file mode 100644 index 0000000..cb651cd --- /dev/null +++ b/frontend/next.config.ts @@ -0,0 +1,5 @@ +import type { NextConfig } from "next"; + +const nextConfig: NextConfig = {}; + +export default nextConfig; diff --git a/frontend/package.json b/frontend/package.json new file mode 100644 index 0000000..1b90d67 --- /dev/null +++ b/frontend/package.json @@ -0,0 +1,26 @@ +{ + "name": "dutch-food-price-tracker-frontend", + "version": "0.1.0", + "private": true, + "scripts": { + "dev": "next dev", + "build": "next build", + "start": "next start", + "lint": "next lint" + }, + "dependencies": { + "next": "14.2.15", + "react": "^18", + "react-dom": "^18", + "recharts": "^2.12.7" + }, + "devDependencies": { + "@types/node": "^20", + "@types/react": "^18", + "@types/react-dom": "^18", + "autoprefixer": "^10.0.1", + "postcss": "^8", + "tailwindcss": "^3.4.1", + "typescript": "^5" + } +} diff --git a/frontend/postcss.config.mjs b/frontend/postcss.config.mjs new file mode 100644 index 0000000..a982c64 --- /dev/null +++ b/frontend/postcss.config.mjs @@ -0,0 +1,8 @@ +const config = { + plugins: { + tailwindcss: {}, + autoprefixer: {}, + }, +}; + +export default config; diff --git a/frontend/src/app/cheapest/page.tsx b/frontend/src/app/cheapest/page.tsx new file mode 100644 index 0000000..d49c1f3 --- /dev/null +++ b/frontend/src/app/cheapest/page.tsx @@ -0,0 +1,78 @@ +"use client"; + +import Link from "next/link"; +import { useEffect, useState } from "react"; +import { CheapestProduct, formatPrice, getCheapestProducts } from "@/lib/api"; + +export default function CheapestPage() { + const today = new Date().toISOString().split("T")[0]; + const [date, setDate] = useState(today); + const [products, setProducts] = useState([]); + const [loading, setLoading] = useState(true); + const [error, setError] = useState(null); + + useEffect(() => { + setLoading(true); + setError(null); + getCheapestProducts(date) + .then(setProducts) + .catch(() => setError("Kon data niet laden.")) + .finally(() => setLoading(false)); + }, [date]); + + return ( +
+
+

Goedkoopste producten

+ setDate(e.target.value)} + className="border rounded-lg px-2 py-1 text-sm focus:outline-none focus:ring-2 focus:ring-blue-500" + /> +
+ + {loading &&

Laden…

} + {error &&

{error}

} + + {!loading && !error && products.length === 0 && ( +

+ Geen data voor {date}. Voer een scrape uit voor deze datum. +

+ )} + +
+ {products.map(({ product, price, unit_price, unit_description, is_on_sale }) => ( + +
+

{product.store?.name}

+

{product.name}

+ {product.brand &&

{product.brand}

} +
+ +
+

+ {formatPrice(price)} +

+ {unit_price != null && unit_description && ( +

+ {formatPrice(unit_price)} {unit_description} +

+ )} + {is_on_sale && ( + + aanbieding + + )} +
+ + ))} +
+
+ ); +} diff --git a/frontend/src/app/globals.css b/frontend/src/app/globals.css new file mode 100644 index 0000000..b5c61c9 --- /dev/null +++ b/frontend/src/app/globals.css @@ -0,0 +1,3 @@ +@tailwind base; +@tailwind components; +@tailwind utilities; diff --git a/frontend/src/app/layout.tsx b/frontend/src/app/layout.tsx new file mode 100644 index 0000000..f30c1af --- /dev/null +++ b/frontend/src/app/layout.tsx @@ -0,0 +1,22 @@ +import type { Metadata } from "next"; +import { Inter } from "next/font/google"; +import "./globals.css"; +import Nav from "@/components/Nav"; + +const inter = Inter({ subsets: ["latin"] }); + +export const metadata: Metadata = { + title: "Dutch Food Price Tracker", + description: "Track supermarket food prices in the Netherlands", +}; + +export default function RootLayout({ children }: { children: React.ReactNode }) { + return ( + + +