Initial project scaffold

Full-stack Dutch supermarket price tracker with FastAPI backend,
PostgreSQL/SQLAlchemy, Albert Heijn scraper, and Next.js frontend.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-05-04 22:27:24 +02:00
commit 486749a890
40 changed files with 1596 additions and 0 deletions
+5
View File
@@ -0,0 +1,5 @@
# Backend — copy to backend/.env for local dev
DATABASE_URL=postgresql://postgres:postgres@localhost:5432/food_prices
# Frontend — copy to frontend/.env.local for local dev
NEXT_PUBLIC_API_URL=http://localhost:8000
+31
View File
@@ -0,0 +1,31 @@
# Python
__pycache__/
*.pyc
*.pyo
.venv/
venv/
.env
*.egg-info/
dist/
build/
# Alembic
backend/alembic/versions/*.pyc
# Node
frontend/node_modules/
frontend/.next/
frontend/.env.local
frontend/.env*.local
# Docker
.dockerignore
# OS
.DS_Store
Thumbs.db
# IDE
.idea/
.vscode/
*.swp
+137
View File
@@ -0,0 +1,137 @@
# Dutch Food Price Tracker
Track Dutch supermarket food prices over time. Currently supports Albert Heijn.
## Stack
| Layer | Tech |
|---|---|
| Backend API | Python FastAPI + SQLAlchemy + Alembic |
| Database | PostgreSQL 16 |
| Scraper | httpx + AH anonymous token auth |
| Frontend | Next.js 14 + TypeScript + Tailwind CSS + Recharts |
| Dev infra | Docker Compose |
## Quick start (Docker)
```bash
git clone <repo> && cd dutch-food-price-tracker
# Build and start all services
docker compose up --build -d
# Run DB migrations (tables are also auto-created on backend start)
docker compose exec backend alembic upgrade head
# Scrape Albert Heijn
docker compose exec backend python cli.py scrape-ah \
--query melk \
--query brood \
--query kaas \
--query yoghurt
```
- Frontend: http://localhost:3000
- API docs: http://localhost:8000/docs
- Database: `localhost:5432` (user: postgres / password: postgres / db: food_prices)
## Local development (without Docker)
### Backend
```bash
cd backend
python -m venv .venv
source .venv/bin/activate # Windows: .venv\Scripts\activate
pip install -r requirements.txt
# Needs a running PostgreSQL instance
export DATABASE_URL=postgresql://postgres:postgres@localhost:5432/food_prices
# Create tables
alembic upgrade head
# or: python -c "from app.database import engine; from app.models import Base; Base.metadata.create_all(engine)"
# Start API server
uvicorn app.main:app --reload
```
### Frontend
```bash
cd frontend
npm install
# Point at the local backend
echo "NEXT_PUBLIC_API_URL=http://localhost:8000" > .env.local
npm run dev
```
## CLI reference
```bash
# Scrape one or more queries
python cli.py scrape-ah --query "melk" --query "brood"
# Run via Docker
docker compose exec backend python cli.py scrape-ah --query melk
```
## API endpoints
| Method | Path | Description |
|---|---|---|
| GET | `/api/products?search=melk` | Search products |
| GET | `/api/products/{id}` | Product detail |
| GET | `/api/products/{id}/prices` | Full price history |
| GET | `/api/prices/cheapest?date=2024-01-15` | Cheapest per product for a day |
| GET | `/api/stores` | List stores |
| GET | `/api/scrape-runs` | List recent scrape runs |
## Project structure
```
dutch-food-price-tracker/
├── backend/
│ ├── app/
│ │ ├── main.py # FastAPI app + CORS
│ │ ├── models.py # SQLAlchemy ORM models
│ │ ├── schemas.py # Pydantic request/response schemas
│ │ ├── database.py # Engine, session, Base
│ │ ├── config.py # Pydantic settings
│ │ ├── routers/ # products, stores, prices, scrape_runs
│ │ └── scrapers/
│ │ └── albert_heijn.py # Token auth + product search
│ ├── alembic/ # DB migration history
│ ├── cli.py # Click CLI (scrape-ah)
│ └── requirements.txt
├── frontend/
│ └── src/
│ ├── app/
│ │ ├── page.tsx # Product search
│ │ ├── products/[id]/page.tsx # Detail + price chart
│ │ └── cheapest/page.tsx # Daily cheapest overview
│ ├── components/
│ │ ├── Nav.tsx
│ │ ├── ProductCard.tsx
│ │ └── PriceChart.tsx # Recharts line chart
│ └── lib/api.ts # Typed API client
├── seed/ # Historical CSV/JSON import datasets
├── docker-compose.yml
└── .env.example
```
## Adding a new store
1. Create `backend/app/scrapers/<store_slug>.py`
2. Implement `scrape_query(db: Session, query: str) -> ScrapeRun`
3. Add a `@cli.command` in `backend/cli.py`
4. Insert a `Store` row with the new slug
## Data model
- **stores** — one row per chain (Albert Heijn, Jumbo, …)
- **products** — one row per store SKU; keyed by `(store_id, external_id)`
- **scrape_runs** — one row per CLI invocation / query
- **price_snapshots** — append-only price observations (cents, UTC timestamp)
+11
View File
@@ -0,0 +1,11 @@
FROM python:3.12-slim
WORKDIR /app
COPY requirements.txt .
RUN pip install --no-cache-dir -r requirements.txt
COPY . .
EXPOSE 8000
CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "8000", "--reload"]
+39
View File
@@ -0,0 +1,39 @@
[alembic]
script_location = alembic
prepend_sys_path = .
version_path_separator = os
sqlalchemy.url = postgresql://postgres:postgres@db:5432/food_prices
[loggers]
keys = root,sqlalchemy,alembic
[handlers]
keys = console
[formatters]
keys = generic
[logger_root]
level = WARN
handlers = console
qualname =
[logger_sqlalchemy]
level = WARN
handlers =
qualname = sqlalchemy.engine
[logger_alembic]
level = INFO
handlers =
qualname = alembic
[handler_console]
class = StreamHandler
args = (sys.stderr,)
level = NOTSET
formatter = generic
[formatter_generic]
format = %(levelname)-5.5s [%(name)s] %(message)s
datefmt = %H:%M:%S
+46
View File
@@ -0,0 +1,46 @@
import os
from logging.config import fileConfig
from alembic import context
from sqlalchemy import engine_from_config, pool
config = context.config
if config.config_file_name is not None:
fileConfig(config.config_file_name)
db_url = os.getenv("DATABASE_URL") or config.get_main_option("sqlalchemy.url")
config.set_main_option("sqlalchemy.url", db_url)
from app.models import Base # noqa: E402
target_metadata = Base.metadata
def run_migrations_offline() -> None:
context.configure(
url=db_url,
target_metadata=target_metadata,
literal_binds=True,
dialect_opts={"paramstyle": "named"},
)
with context.begin_transaction():
context.run_migrations()
def run_migrations_online() -> None:
connectable = engine_from_config(
config.get_section(config.config_ini_section, {}),
prefix="sqlalchemy.",
poolclass=pool.NullPool,
)
with connectable.connect() as connection:
context.configure(connection=connection, target_metadata=target_metadata)
with context.begin_transaction():
context.run_migrations()
if context.is_offline_mode():
run_migrations_offline()
else:
run_migrations_online()
+25
View File
@@ -0,0 +1,25 @@
"""${message}
Revision ID: ${up_revision}
Revises: ${down_revision | comma,n}
Create Date: ${create_date}
"""
from typing import Sequence, Union
from alembic import op
import sqlalchemy as sa
${imports if imports else ""}
revision: str = ${repr(up_revision)}
down_revision: Union[str, None] = ${repr(down_revision)}
branch_labels: Union[str, Sequence[str], None] = ${repr(branch_labels)}
depends_on: Union[str, Sequence[str], None] = ${repr(depends_on)}
def upgrade() -> None:
${upgrades if upgrades else "pass"}
def downgrade() -> None:
${downgrades if downgrades else "pass"}
+85
View File
@@ -0,0 +1,85 @@
"""initial
Revision ID: 001
Revises:
Create Date: 2024-01-01 00:00:00.000000
"""
from typing import Sequence, Union
import sqlalchemy as sa
from alembic import op
revision: str = "001"
down_revision: Union[str, None] = None
branch_labels: Union[str, Sequence[str], None] = None
depends_on: Union[str, Sequence[str], None] = None
def upgrade() -> None:
op.create_table(
"stores",
sa.Column("id", sa.Integer(), primary_key=True),
sa.Column("name", sa.String(100), nullable=False),
sa.Column("slug", sa.String(50), nullable=False),
sa.Column("country", sa.String(2), server_default="NL"),
sa.Column("website", sa.String(255)),
sa.UniqueConstraint("slug", name="uq_stores_slug"),
)
op.create_table(
"products",
sa.Column("id", sa.Integer(), primary_key=True),
sa.Column("store_id", sa.Integer(), sa.ForeignKey("stores.id"), nullable=False),
sa.Column("external_id", sa.String(50), nullable=False),
sa.Column("ean", sa.String(20)),
sa.Column("name", sa.String(255), nullable=False),
sa.Column("brand", sa.String(100)),
sa.Column("category", sa.String(100)),
sa.Column("unit_size", sa.String(50)),
sa.Column("url", sa.String(500)),
sa.Column("created_at", sa.DateTime(), server_default=sa.func.now()),
sa.Column("updated_at", sa.DateTime(), server_default=sa.func.now()),
sa.UniqueConstraint("store_id", "external_id", name="uq_products_store_external"),
)
op.create_index("ix_products_ean", "products", ["ean"])
op.create_table(
"scrape_runs",
sa.Column("id", sa.Integer(), primary_key=True),
sa.Column("store_id", sa.Integer(), sa.ForeignKey("stores.id"), nullable=False),
sa.Column("query", sa.String(255), nullable=False),
sa.Column("started_at", sa.DateTime(), server_default=sa.func.now()),
sa.Column("finished_at", sa.DateTime()),
sa.Column("status", sa.String(20), server_default="running"),
sa.Column("products_found", sa.Integer(), server_default="0"),
sa.Column("error_message", sa.String(1000)),
)
op.create_table(
"price_snapshots",
sa.Column("id", sa.Integer(), primary_key=True),
sa.Column("product_id", sa.Integer(), sa.ForeignKey("products.id"), nullable=False),
sa.Column("scrape_run_id", sa.Integer(), sa.ForeignKey("scrape_runs.id"), nullable=False),
sa.Column("price", sa.Integer(), nullable=False),
sa.Column("unit_price", sa.Integer()),
sa.Column("unit_description", sa.String(50)),
sa.Column("currency", sa.String(3), server_default="EUR"),
sa.Column("discount_label", sa.String(100)),
sa.Column("discount_description", sa.String(255)),
sa.Column("was_price", sa.Integer()),
sa.Column("is_on_sale", sa.Boolean(), server_default="false"),
sa.Column("timestamp", sa.DateTime(), server_default=sa.func.now()),
)
op.create_index("ix_price_snapshots_timestamp", "price_snapshots", ["timestamp"])
op.create_index(
"ix_price_snapshots_product_timestamp",
"price_snapshots",
["product_id", "timestamp"],
)
def downgrade() -> None:
op.drop_table("price_snapshots")
op.drop_table("scrape_runs")
op.drop_table("products")
op.drop_table("stores")
View File
+11
View File
@@ -0,0 +1,11 @@
from pydantic_settings import BaseSettings
class Settings(BaseSettings):
database_url: str = "postgresql://postgres:postgres@db:5432/food_prices"
ah_client_id: str = "appie"
model_config = {"env_file": ".env"}
settings = Settings()
+19
View File
@@ -0,0 +1,19 @@
from sqlalchemy import create_engine
from sqlalchemy.orm import DeclarativeBase, sessionmaker
from .config import settings
engine = create_engine(settings.database_url)
SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine)
class Base(DeclarativeBase):
pass
def get_db():
db = SessionLocal()
try:
yield db
finally:
db.close()
+26
View File
@@ -0,0 +1,26 @@
from fastapi import FastAPI
from fastapi.middleware.cors import CORSMiddleware
from .database import Base, engine
from .routers import prices, products, scrape_runs, stores
Base.metadata.create_all(bind=engine)
app = FastAPI(title="Dutch Food Price Tracker", version="0.1.0")
app.add_middleware(
CORSMiddleware,
allow_origins=["*"],
allow_methods=["*"],
allow_headers=["*"],
)
app.include_router(products.router)
app.include_router(stores.router)
app.include_router(prices.router)
app.include_router(scrape_runs.router)
@app.get("/")
def root():
return {"status": "ok", "service": "dutch-food-price-tracker"}
+83
View File
@@ -0,0 +1,83 @@
from datetime import datetime
import sqlalchemy as sa
from sqlalchemy.orm import Mapped, mapped_column, relationship
from .database import Base
class Store(Base):
__tablename__ = "stores"
id: Mapped[int] = mapped_column(sa.Integer, primary_key=True)
name: Mapped[str] = mapped_column(sa.String(100), nullable=False)
slug: Mapped[str] = mapped_column(sa.String(50), unique=True, nullable=False)
country: Mapped[str] = mapped_column(sa.String(2), default="NL")
website: Mapped[str | None] = mapped_column(sa.String(255))
products: Mapped[list["Product"]] = relationship(back_populates="store")
scrape_runs: Mapped[list["ScrapeRun"]] = relationship(back_populates="store")
class Product(Base):
__tablename__ = "products"
__table_args__ = (
sa.UniqueConstraint("store_id", "external_id", name="uq_products_store_external"),
)
id: Mapped[int] = mapped_column(sa.Integer, primary_key=True)
store_id: Mapped[int] = mapped_column(sa.Integer, sa.ForeignKey("stores.id"), nullable=False)
external_id: Mapped[str] = mapped_column(sa.String(50), nullable=False)
ean: Mapped[str | None] = mapped_column(sa.String(20), index=True)
name: Mapped[str] = mapped_column(sa.String(255), nullable=False)
brand: Mapped[str | None] = mapped_column(sa.String(100))
category: Mapped[str | None] = mapped_column(sa.String(100))
unit_size: Mapped[str | None] = mapped_column(sa.String(50))
url: Mapped[str | None] = mapped_column(sa.String(500))
created_at: Mapped[datetime] = mapped_column(sa.DateTime, default=datetime.utcnow)
updated_at: Mapped[datetime] = mapped_column(
sa.DateTime, default=datetime.utcnow, onupdate=datetime.utcnow
)
store: Mapped["Store"] = relationship(back_populates="products")
price_snapshots: Mapped[list["PriceSnapshot"]] = relationship(back_populates="product")
class ScrapeRun(Base):
__tablename__ = "scrape_runs"
id: Mapped[int] = mapped_column(sa.Integer, primary_key=True)
store_id: Mapped[int] = mapped_column(sa.Integer, sa.ForeignKey("stores.id"), nullable=False)
query: Mapped[str] = mapped_column(sa.String(255), nullable=False)
started_at: Mapped[datetime] = mapped_column(sa.DateTime, default=datetime.utcnow)
finished_at: Mapped[datetime | None] = mapped_column(sa.DateTime)
status: Mapped[str] = mapped_column(sa.String(20), default="running")
products_found: Mapped[int] = mapped_column(sa.Integer, default=0)
error_message: Mapped[str | None] = mapped_column(sa.String(1000))
store: Mapped["Store"] = relationship(back_populates="scrape_runs")
price_snapshots: Mapped[list["PriceSnapshot"]] = relationship(back_populates="scrape_run")
class PriceSnapshot(Base):
__tablename__ = "price_snapshots"
id: Mapped[int] = mapped_column(sa.Integer, primary_key=True)
product_id: Mapped[int] = mapped_column(
sa.Integer, sa.ForeignKey("products.id"), nullable=False
)
scrape_run_id: Mapped[int] = mapped_column(
sa.Integer, sa.ForeignKey("scrape_runs.id"), nullable=False
)
price: Mapped[int] = mapped_column(sa.Integer, nullable=False) # euro cents
unit_price: Mapped[int | None] = mapped_column(sa.Integer) # euro cents
unit_description: Mapped[str | None] = mapped_column(sa.String(50))
currency: Mapped[str] = mapped_column(sa.String(3), default="EUR")
discount_label: Mapped[str | None] = mapped_column(sa.String(100))
discount_description: Mapped[str | None] = mapped_column(sa.String(255))
was_price: Mapped[int | None] = mapped_column(sa.Integer) # euro cents, original price
is_on_sale: Mapped[bool] = mapped_column(sa.Boolean, default=False)
timestamp: Mapped[datetime] = mapped_column(sa.DateTime, default=datetime.utcnow, index=True)
product: Mapped["Product"] = relationship(back_populates="price_snapshots")
scrape_run: Mapped["ScrapeRun"] = relationship(back_populates="price_snapshots")
View File
+57
View File
@@ -0,0 +1,57 @@
from datetime import date, datetime
from fastapi import APIRouter, Depends, Query
from sqlalchemy import func, select
from sqlalchemy.orm import Session, selectinload
from ..database import get_db
from ..models import PriceSnapshot, Product
from ..schemas import CheapestProduct, Product as ProductSchema
router = APIRouter(prefix="/api/prices", tags=["prices"])
@router.get("/cheapest", response_model=list[CheapestProduct])
def get_cheapest(
date_filter: date = Query(default=None, alias="date"),
limit: int = Query(default=20, le=100),
db: Session = Depends(get_db),
):
target = date_filter or date.today()
day_start = datetime(target.year, target.month, target.day, 0, 0, 0)
day_end = datetime(target.year, target.month, target.day, 23, 59, 59)
min_per_product = (
select(
PriceSnapshot.product_id,
func.min(PriceSnapshot.price).label("min_price"),
)
.where(PriceSnapshot.timestamp.between(day_start, day_end))
.group_by(PriceSnapshot.product_id)
.subquery()
)
rows = db.execute(
select(PriceSnapshot, Product)
.join(
min_per_product,
(PriceSnapshot.product_id == min_per_product.c.product_id)
& (PriceSnapshot.price == min_per_product.c.min_price),
)
.join(Product, PriceSnapshot.product_id == Product.id)
.options(selectinload(Product.store))
.order_by(PriceSnapshot.price.asc())
.limit(limit)
).all()
return [
CheapestProduct(
product=ProductSchema.model_validate(product),
price=snapshot.price,
unit_price=snapshot.unit_price,
unit_description=snapshot.unit_description,
is_on_sale=snapshot.is_on_sale,
timestamp=snapshot.timestamp,
)
for snapshot, product in rows
]
+64
View File
@@ -0,0 +1,64 @@
from fastapi import APIRouter, Depends, HTTPException, Query
from sqlalchemy import select
from sqlalchemy.orm import Session, selectinload
from ..database import get_db
from ..models import PriceSnapshot, Product
from ..schemas import PriceSnapshot as PriceSnapshotSchema, ProductWithLatestPrice
router = APIRouter(prefix="/api/products", tags=["products"])
def _attach_latest_price(product: Product, db: Session) -> ProductWithLatestPrice:
p = ProductWithLatestPrice.model_validate(product)
latest = db.scalar(
select(PriceSnapshot)
.where(PriceSnapshot.product_id == product.id)
.order_by(PriceSnapshot.timestamp.desc())
.limit(1)
)
if latest:
p.latest_price = latest.price
p.latest_price_timestamp = latest.timestamp
p.is_on_sale = latest.is_on_sale
return p
@router.get("", response_model=list[ProductWithLatestPrice])
def search_products(
search: str = Query(default=""),
limit: int = Query(default=20, le=100),
db: Session = Depends(get_db),
):
q = select(Product).options(selectinload(Product.store))
if search:
q = q.where(Product.name.ilike(f"%{search}%"))
q = q.order_by(Product.name).limit(limit)
products = db.scalars(q).all()
return [_attach_latest_price(p, db) for p in products]
@router.get("/{product_id}", response_model=ProductWithLatestPrice)
def get_product(product_id: int, db: Session = Depends(get_db)):
product = db.scalar(
select(Product)
.where(Product.id == product_id)
.options(selectinload(Product.store))
)
if not product:
raise HTTPException(status_code=404, detail="Product not found")
return _attach_latest_price(product, db)
@router.get("/{product_id}/prices", response_model=list[PriceSnapshotSchema])
def get_product_prices(
product_id: int,
limit: int = Query(default=200, le=1000),
db: Session = Depends(get_db),
):
return db.scalars(
select(PriceSnapshot)
.where(PriceSnapshot.product_id == product_id)
.order_by(PriceSnapshot.timestamp.asc())
.limit(limit)
).all()
+19
View File
@@ -0,0 +1,19 @@
from fastapi import APIRouter, Depends, Query
from sqlalchemy import select
from sqlalchemy.orm import Session
from ..database import get_db
from ..models import ScrapeRun
from ..schemas import ScrapeRun as ScrapeRunSchema
router = APIRouter(prefix="/api/scrape-runs", tags=["scrape-runs"])
@router.get("", response_model=list[ScrapeRunSchema])
def list_scrape_runs(
limit: int = Query(default=20, le=100),
db: Session = Depends(get_db),
):
return db.scalars(
select(ScrapeRun).order_by(ScrapeRun.started_at.desc()).limit(limit)
).all()
+14
View File
@@ -0,0 +1,14 @@
from fastapi import APIRouter, Depends
from sqlalchemy import select
from sqlalchemy.orm import Session
from ..database import get_db
from ..models import Store
from ..schemas import Store as StoreSchema
router = APIRouter(prefix="/api/stores", tags=["stores"])
@router.get("", response_model=list[StoreSchema])
def list_stores(db: Session = Depends(get_db)):
return db.scalars(select(Store).order_by(Store.name)).all()
+75
View File
@@ -0,0 +1,75 @@
from datetime import datetime
from pydantic import BaseModel
class Store(BaseModel):
id: int
name: str
slug: str
country: str
website: str | None = None
model_config = {"from_attributes": True}
class Product(BaseModel):
id: int
store_id: int
external_id: str
ean: str | None = None
name: str
brand: str | None = None
category: str | None = None
unit_size: str | None = None
url: str | None = None
created_at: datetime
updated_at: datetime
store: Store | None = None
model_config = {"from_attributes": True}
class ProductWithLatestPrice(Product):
latest_price: int | None = None
latest_price_timestamp: datetime | None = None
is_on_sale: bool = False
class PriceSnapshot(BaseModel):
id: int
product_id: int
scrape_run_id: int
price: int
unit_price: int | None = None
unit_description: str | None = None
currency: str
discount_label: str | None = None
discount_description: str | None = None
was_price: int | None = None
is_on_sale: bool
timestamp: datetime
model_config = {"from_attributes": True}
class ScrapeRun(BaseModel):
id: int
store_id: int
query: str
started_at: datetime
finished_at: datetime | None = None
status: str
products_found: int
error_message: str | None = None
model_config = {"from_attributes": True}
class CheapestProduct(BaseModel):
product: Product
price: int
unit_price: int | None = None
unit_description: str | None = None
is_on_sale: bool
timestamp: datetime
View File
+138
View File
@@ -0,0 +1,138 @@
from datetime import datetime
import httpx
from sqlalchemy import and_, select
from sqlalchemy.orm import Session
from ..models import PriceSnapshot, Product, ScrapeRun, Store
AH_AUTH_URL = "https://api.ah.nl/mobile-auth/v1/auth/token/anonymous"
AH_SEARCH_URL = "https://api.ah.nl/mobile-services/product/search/v2"
AH_BASE_URL = "https://www.ah.nl"
AH_CLIENT_ID = "appie"
def _get_token(client: httpx.Client) -> str:
resp = client.post(AH_AUTH_URL, json={"clientId": AH_CLIENT_ID})
resp.raise_for_status()
return resp.json()["access_token"]
def _search(client: httpx.Client, token: str, query: str, page: int = 0, size: int = 30) -> dict:
resp = client.get(
AH_SEARCH_URL,
params={"query": query, "page": page, "size": size},
headers={"Authorization": f"Bearer {token}"},
)
resp.raise_for_status()
return resp.json()
def _to_cents(value: float | int | None) -> int | None:
if value is None:
return None
return round(float(value) * 100)
def _upsert_store(db: Session) -> Store:
store = db.scalar(select(Store).where(Store.slug == "albert-heijn"))
if not store:
store = Store(
name="Albert Heijn",
slug="albert-heijn",
country="NL",
website="https://www.ah.nl",
)
db.add(store)
db.commit()
db.refresh(store)
return store
def scrape_query(db: Session, query: str) -> ScrapeRun:
store = _upsert_store(db)
run = ScrapeRun(store_id=store.id, query=query, started_at=datetime.utcnow())
db.add(run)
db.commit()
db.refresh(run)
try:
with httpx.Client(timeout=30.0) as client:
token = _get_token(client)
data = _search(client, token, query)
count = 0
now = datetime.utcnow()
for card in data.get("cards", []):
for raw in card.get("products", []):
external_id = str(raw.get("webshopId", "")).strip()
if not external_id:
continue
product = db.scalar(
select(Product).where(
and_(
Product.store_id == store.id,
Product.external_id == external_id,
)
)
)
if not product:
product = Product(store_id=store.id, external_id=external_id)
db.add(product)
link = raw.get("link", "") or ""
product.name = raw.get("title", "") or ""
product.brand = raw.get("brand") or None
product.category = raw.get("category") or None
product.ean = raw.get("eanCode") or None
product.url = f"{AH_BASE_URL}{link}" if link else None
product.updated_at = now
price_info = raw.get("price") or {}
price_cents = _to_cents(price_info.get("now"))
was_cents = _to_cents(price_info.get("was"))
unit_info = price_info.get("unitInfo") or {}
unit_price_cents = _to_cents(unit_info.get("price"))
unit_description = unit_info.get("description") or None
discount = raw.get("discount") or {}
discount_label = discount.get("label") or None
discount_description = discount.get("description") or None
db.flush() # get product.id if newly created
if price_cents is not None:
snapshot = PriceSnapshot(
product_id=product.id,
scrape_run_id=run.id,
price=price_cents,
unit_price=unit_price_cents,
unit_description=unit_description,
was_price=was_cents,
is_on_sale=was_cents is not None or discount_label is not None,
discount_label=discount_label,
discount_description=discount_description,
timestamp=now,
)
db.add(snapshot)
count += 1
db.commit()
run.status = "success"
run.products_found = count
run.finished_at = datetime.utcnow()
db.commit()
except Exception as exc:
db.rollback()
run.status = "failed"
run.error_message = str(exc)[:900]
run.finished_at = datetime.utcnow()
db.commit()
raise
return run
+36
View File
@@ -0,0 +1,36 @@
import click
from app.database import SessionLocal
from app.scrapers.albert_heijn import scrape_query
@click.group()
def cli():
pass
@cli.command("scrape-ah")
@click.option(
"--query",
"queries",
multiple=True,
required=True,
help="Search term to scrape (repeatable)",
)
def scrape_ah(queries: tuple[str, ...]):
"""Scrape Albert Heijn product prices for one or more search queries."""
db = SessionLocal()
try:
for query in queries:
click.echo(f"Scraping Albert Heijn: {query!r}")
run = scrape_query(db, query)
if run.status == "success":
click.echo(f" {run.products_found} products stored (run id={run.id})")
else:
click.echo(f" Failed: {run.error_message}", err=True)
finally:
db.close()
if __name__ == "__main__":
cli()
+10
View File
@@ -0,0 +1,10 @@
fastapi==0.115.0
uvicorn[standard]==0.30.6
sqlalchemy==2.0.35
alembic==1.13.3
psycopg2-binary==2.9.9
pydantic==2.9.2
pydantic-settings==2.5.2
httpx==0.27.2
click==8.1.7
python-dotenv==1.0.1
+50
View File
@@ -0,0 +1,50 @@
services:
db:
image: postgres:16
restart: unless-stopped
environment:
POSTGRES_USER: postgres
POSTGRES_PASSWORD: postgres
POSTGRES_DB: food_prices
volumes:
- postgres_data:/var/lib/postgresql/data
ports:
- "5432:5432"
healthcheck:
test: ["CMD-SHELL", "pg_isready -U postgres"]
interval: 10s
timeout: 5s
retries: 5
backend:
build: ./backend
restart: unless-stopped
depends_on:
db:
condition: service_healthy
environment:
DATABASE_URL: postgresql://postgres:postgres@db:5432/food_prices
ports:
- "8000:8000"
volumes:
- ./backend:/app
frontend:
build: ./frontend
restart: unless-stopped
depends_on:
- backend
environment:
NEXT_PUBLIC_API_URL: http://localhost:8000
ports:
- "3000:3000"
volumes:
- ./frontend/src:/app/src
- ./frontend/public:/app/public
- frontend_node_modules:/app/node_modules
- frontend_next:/app/.next
volumes:
postgres_data:
frontend_node_modules:
frontend_next:
+12
View File
@@ -0,0 +1,12 @@
FROM node:20-alpine
WORKDIR /app
COPY package*.json ./
RUN npm ci
COPY . .
EXPOSE 3000
ENV NODE_ENV=development
CMD ["npm", "run", "dev"]
+5
View File
@@ -0,0 +1,5 @@
import type { NextConfig } from "next";
const nextConfig: NextConfig = {};
export default nextConfig;
+26
View File
@@ -0,0 +1,26 @@
{
"name": "dutch-food-price-tracker-frontend",
"version": "0.1.0",
"private": true,
"scripts": {
"dev": "next dev",
"build": "next build",
"start": "next start",
"lint": "next lint"
},
"dependencies": {
"next": "14.2.15",
"react": "^18",
"react-dom": "^18",
"recharts": "^2.12.7"
},
"devDependencies": {
"@types/node": "^20",
"@types/react": "^18",
"@types/react-dom": "^18",
"autoprefixer": "^10.0.1",
"postcss": "^8",
"tailwindcss": "^3.4.1",
"typescript": "^5"
}
}
+8
View File
@@ -0,0 +1,8 @@
const config = {
plugins: {
tailwindcss: {},
autoprefixer: {},
},
};
export default config;
+78
View File
@@ -0,0 +1,78 @@
"use client";
import Link from "next/link";
import { useEffect, useState } from "react";
import { CheapestProduct, formatPrice, getCheapestProducts } from "@/lib/api";
export default function CheapestPage() {
const today = new Date().toISOString().split("T")[0];
const [date, setDate] = useState(today);
const [products, setProducts] = useState<CheapestProduct[]>([]);
const [loading, setLoading] = useState(true);
const [error, setError] = useState<string | null>(null);
useEffect(() => {
setLoading(true);
setError(null);
getCheapestProducts(date)
.then(setProducts)
.catch(() => setError("Kon data niet laden."))
.finally(() => setLoading(false));
}, [date]);
return (
<div>
<div className="flex flex-wrap items-center gap-4 mb-6">
<h1 className="text-2xl font-bold">Goedkoopste producten</h1>
<input
type="date"
value={date}
max={today}
onChange={(e) => setDate(e.target.value)}
className="border rounded-lg px-2 py-1 text-sm focus:outline-none focus:ring-2 focus:ring-blue-500"
/>
</div>
{loading && <p className="text-gray-400 text-sm">Laden</p>}
{error && <p className="text-red-500 text-sm">{error}</p>}
{!loading && !error && products.length === 0 && (
<p className="text-gray-400 text-sm">
Geen data voor {date}. Voer een scrape uit voor deze datum.
</p>
)}
<div className="space-y-2">
{products.map(({ product, price, unit_price, unit_description, is_on_sale }) => (
<Link
key={product.id}
href={`/products/${product.id}`}
className="flex items-center justify-between bg-white border rounded-lg px-4 py-3 hover:shadow-sm transition-shadow"
>
<div className="min-w-0">
<p className="text-xs text-gray-400">{product.store?.name}</p>
<p className="font-medium text-sm truncate">{product.name}</p>
{product.brand && <p className="text-xs text-gray-400">{product.brand}</p>}
</div>
<div className="text-right ml-4 shrink-0">
<p className={`font-bold text-base ${is_on_sale ? "text-red-600" : ""}`}>
{formatPrice(price)}
</p>
{unit_price != null && unit_description && (
<p className="text-xs text-gray-400">
{formatPrice(unit_price)} {unit_description}
</p>
)}
{is_on_sale && (
<span className="text-xs bg-red-50 text-red-600 px-1.5 py-0.5 rounded">
aanbieding
</span>
)}
</div>
</Link>
))}
</div>
</div>
);
}
+3
View File
@@ -0,0 +1,3 @@
@tailwind base;
@tailwind components;
@tailwind utilities;
+22
View File
@@ -0,0 +1,22 @@
import type { Metadata } from "next";
import { Inter } from "next/font/google";
import "./globals.css";
import Nav from "@/components/Nav";
const inter = Inter({ subsets: ["latin"] });
export const metadata: Metadata = {
title: "Dutch Food Price Tracker",
description: "Track supermarket food prices in the Netherlands",
};
export default function RootLayout({ children }: { children: React.ReactNode }) {
return (
<html lang="nl">
<body className={`${inter.className} bg-gray-50 min-h-screen`}>
<Nav />
<main className="container mx-auto px-4 py-8 max-w-5xl">{children}</main>
</body>
</html>
);
}
+70
View File
@@ -0,0 +1,70 @@
"use client";
import { useState } from "react";
import { searchProducts, Product } from "@/lib/api";
import ProductCard from "@/components/ProductCard";
export default function SearchPage() {
const [query, setQuery] = useState("");
const [results, setResults] = useState<Product[]>([]);
const [loading, setLoading] = useState(false);
const [searched, setSearched] = useState(false);
const [error, setError] = useState<string | null>(null);
async function handleSearch(e: React.FormEvent) {
e.preventDefault();
if (!query.trim()) return;
setLoading(true);
setError(null);
try {
const data = await searchProducts(query.trim());
setResults(data);
setSearched(true);
} catch {
setError("Kon producten niet laden. Is de backend bereikbaar?");
} finally {
setLoading(false);
}
}
return (
<div>
<h1 className="text-2xl font-bold mb-2">Zoek producten</h1>
<p className="text-gray-500 mb-6 text-sm">
Zoek in de database van bijgehouden supermarktprijzen.
</p>
<form onSubmit={handleSearch} className="flex gap-2 mb-8">
<input
type="text"
value={query}
onChange={(e) => setQuery(e.target.value)}
placeholder="bijv. melk, brood, kaas..."
className="flex-1 border rounded-lg px-3 py-2 text-sm focus:outline-none focus:ring-2 focus:ring-blue-500"
/>
<button
type="submit"
disabled={loading}
className="bg-blue-700 text-white px-5 py-2 rounded-lg text-sm font-medium disabled:opacity-50 hover:bg-blue-800 transition-colors"
>
{loading ? "Zoeken…" : "Zoeken"}
</button>
</form>
{error && <p className="text-red-500 text-sm mb-4">{error}</p>}
{searched && results.length === 0 && !error && (
<p className="text-gray-400 text-sm">
Geen producten gevonden voor <strong>{query}</strong>. Voer eerst een scrape uit via de
CLI.
</p>
)}
<div className="grid grid-cols-1 sm:grid-cols-2 lg:grid-cols-3 gap-3">
{results.map((product) => (
<ProductCard key={product.id} product={product} />
))}
</div>
</div>
);
}
+111
View File
@@ -0,0 +1,111 @@
"use client";
import Link from "next/link";
import { useParams } from "next/navigation";
import { useEffect, useState } from "react";
import PriceChart from "@/components/PriceChart";
import { PriceSnapshot, Product, formatPrice, getProduct, getProductPrices } from "@/lib/api";
export default function ProductPage() {
const { id } = useParams<{ id: string }>();
const productId = Number(id);
const [product, setProduct] = useState<Product | null>(null);
const [prices, setPrices] = useState<PriceSnapshot[]>([]);
const [loading, setLoading] = useState(true);
const [notFound, setNotFound] = useState(false);
useEffect(() => {
Promise.all([getProduct(productId), getProductPrices(productId)])
.then(([p, ps]) => {
setProduct(p);
setPrices(ps);
})
.catch(() => setNotFound(true))
.finally(() => setLoading(false));
}, [productId]);
if (loading) return <p className="text-gray-400 text-sm">Laden</p>;
if (notFound || !product) return <p className="text-red-500">Product niet gevonden.</p>;
const latest = prices[prices.length - 1];
return (
<div className="max-w-2xl mx-auto space-y-6">
<Link href="/" className="text-blue-600 hover:underline text-sm">
Terug naar zoeken
</Link>
<div className="bg-white border rounded-xl p-6 space-y-4">
<div>
<p className="text-xs text-gray-400 uppercase tracking-wide mb-1">
{product.store?.name}
</p>
<h1 className="text-xl font-bold">{product.name}</h1>
{product.brand && <p className="text-gray-500 text-sm">{product.brand}</p>}
{product.category && <p className="text-gray-400 text-xs">{product.category}</p>}
{product.ean && (
<p className="text-gray-300 text-xs mt-1 font-mono">EAN {product.ean}</p>
)}
</div>
{latest && (
<div className="flex flex-wrap gap-6 items-end border-t pt-4">
<div>
<p className="text-xs text-gray-400 mb-0.5">Huidige prijs</p>
<p className={`text-3xl font-bold ${latest.is_on_sale ? "text-red-600" : ""}`}>
{formatPrice(latest.price)}
</p>
</div>
{latest.unit_price != null && latest.unit_description && (
<div>
<p className="text-xs text-gray-400 mb-0.5">Per eenheid</p>
<p className="text-lg text-gray-700">
{formatPrice(latest.unit_price)}{" "}
<span className="text-sm text-gray-400">{latest.unit_description}</span>
</p>
</div>
)}
{latest.is_on_sale && latest.was_price != null && (
<div>
<p className="text-xs text-gray-400 mb-0.5">Was</p>
<p className="text-lg text-gray-400 line-through">{formatPrice(latest.was_price)}</p>
</div>
)}
</div>
)}
{latest?.discount_description && (
<p className="inline-block bg-red-50 text-red-700 text-sm px-3 py-1 rounded-full">
{latest.discount_description}
</p>
)}
{product.url && (
<a
href={product.url}
target="_blank"
rel="noopener noreferrer"
className="inline-block text-sm text-blue-600 hover:underline"
>
Bekijk op {product.store?.name ?? "winkel"}
</a>
)}
</div>
<div className="bg-white border rounded-xl p-6">
<h2 className="font-semibold mb-4">Prijsgeschiedenis</h2>
{prices.length < 2 ? (
<p className="text-gray-400 text-sm">
Nog niet genoeg datapunten voor een grafiek ({prices.length}/2).
</p>
) : (
<PriceChart snapshots={prices} />
)}
<p className="text-xs text-gray-300 mt-3">{prices.length} meetpunt(en) opgeslagen</p>
</div>
</div>
);
}
+19
View File
@@ -0,0 +1,19 @@
import Link from "next/link";
export default function Nav() {
return (
<nav className="bg-blue-700 text-white shadow">
<div className="container mx-auto px-4 max-w-5xl flex items-center gap-6 h-14">
<Link href="/" className="font-bold text-lg tracking-tight">
Prijstracker NL
</Link>
<Link href="/" className="text-blue-100 hover:text-white transition-colors text-sm">
Zoeken
</Link>
<Link href="/cheapest" className="text-blue-100 hover:text-white transition-colors text-sm">
Goedkoopste vandaag
</Link>
</div>
</nav>
);
}
+67
View File
@@ -0,0 +1,67 @@
"use client";
import {
CartesianGrid,
Line,
LineChart,
ResponsiveContainer,
Tooltip,
XAxis,
YAxis,
} from "recharts";
import { PriceSnapshot } from "@/lib/api";
interface Props {
snapshots: PriceSnapshot[];
}
export default function PriceChart({ snapshots }: Props) {
const data = snapshots.map((s) => ({
date: new Date(s.timestamp).toLocaleDateString("nl-NL", {
day: "numeric",
month: "short",
}),
prijs: +(s.price / 100).toFixed(2),
was: s.was_price != null ? +(s.was_price / 100).toFixed(2) : null,
}));
return (
<ResponsiveContainer width="100%" height={280}>
<LineChart data={data} margin={{ top: 5, right: 16, left: 8, bottom: 5 }}>
<CartesianGrid strokeDasharray="3 3" stroke="#f0f0f0" />
<XAxis dataKey="date" tick={{ fontSize: 11 }} />
<YAxis
tickFormatter={(v) => `${v.toFixed(2)}`}
width={68}
tick={{ fontSize: 11 }}
domain={["auto", "auto"]}
/>
<Tooltip
formatter={(value: number, name: string) => [
`${value.toFixed(2).replace(".", ",")}`,
name === "prijs" ? "Prijs" : "Was",
]}
labelStyle={{ fontWeight: 600 }}
/>
<Line
type="stepAfter"
dataKey="prijs"
stroke="#1d4ed8"
strokeWidth={2}
dot={{ r: 3 }}
activeDot={{ r: 5 }}
connectNulls
/>
<Line
type="stepAfter"
dataKey="was"
stroke="#9ca3af"
strokeWidth={1.5}
strokeDasharray="4 2"
dot={false}
connectNulls
/>
</LineChart>
</ResponsiveContainer>
);
}
+39
View File
@@ -0,0 +1,39 @@
import Link from "next/link";
import { Product, formatPrice } from "@/lib/api";
interface Props {
product: Product;
}
export default function ProductCard({ product }: Props) {
return (
<Link
href={`/products/${product.id}`}
className="bg-white border rounded-lg p-4 hover:shadow-md transition-shadow flex justify-between items-start gap-3"
>
<div className="min-w-0">
<p className="text-xs text-gray-400 mb-0.5">{product.store?.name ?? "Onbekend"}</p>
<h3 className="font-medium text-sm leading-snug">{product.name}</h3>
{product.brand && <p className="text-xs text-gray-400 mt-0.5">{product.brand}</p>}
{product.category && <p className="text-xs text-gray-300">{product.category}</p>}
</div>
<div className="text-right shrink-0">
{product.latest_price != null ? (
<>
<p className={`font-bold ${product.is_on_sale ? "text-red-600" : "text-gray-900"}`}>
{formatPrice(product.latest_price)}
</p>
{product.is_on_sale && (
<span className="text-xs bg-red-50 text-red-600 px-1.5 py-0.5 rounded">
aanbieding
</span>
)}
</>
) : (
<p className="text-gray-300 text-sm"></p>
)}
</div>
</Link>
);
}
+75
View File
@@ -0,0 +1,75 @@
const API_BASE = process.env.NEXT_PUBLIC_API_URL || "http://localhost:8000";
export interface Store {
id: number;
name: string;
slug: string;
country: string;
website: string | null;
}
export interface Product {
id: number;
store_id: number;
external_id: string;
ean: string | null;
name: string;
brand: string | null;
category: string | null;
unit_size: string | null;
url: string | null;
created_at: string;
updated_at: string;
store: Store | null;
latest_price: number | null;
latest_price_timestamp: string | null;
is_on_sale: boolean;
}
export interface PriceSnapshot {
id: number;
product_id: number;
scrape_run_id: number;
price: number;
unit_price: number | null;
unit_description: string | null;
currency: string;
discount_label: string | null;
discount_description: string | null;
was_price: number | null;
is_on_sale: boolean;
timestamp: string;
}
export interface CheapestProduct {
product: Product;
price: number;
unit_price: number | null;
unit_description: string | null;
is_on_sale: boolean;
timestamp: string;
}
export function formatPrice(cents: number): string {
return `${(cents / 100).toFixed(2).replace(".", ",")}`;
}
async function apiFetch<T>(path: string): Promise<T> {
const res = await fetch(`${API_BASE}${path}`);
if (!res.ok) throw new Error(`API error ${res.status}: ${path}`);
return res.json();
}
export const searchProducts = (query: string) =>
apiFetch<Product[]>(`/api/products?search=${encodeURIComponent(query)}`);
export const getProduct = (id: number) =>
apiFetch<Product>(`/api/products/${id}`);
export const getProductPrices = (id: number) =>
apiFetch<PriceSnapshot[]>(`/api/products/${id}/prices`);
export const getCheapestProducts = (date?: string) =>
apiFetch<CheapestProduct[]>(
date ? `/api/prices/cheapest?date=${date}` : `/api/prices/cheapest`
);
+15
View File
@@ -0,0 +1,15 @@
import type { Config } from "tailwindcss";
const config: Config = {
content: [
"./src/pages/**/*.{js,ts,jsx,tsx,mdx}",
"./src/components/**/*.{js,ts,jsx,tsx,mdx}",
"./src/app/**/*.{js,ts,jsx,tsx,mdx}",
],
theme: {
extend: {},
},
plugins: [],
};
export default config;
+22
View File
@@ -0,0 +1,22 @@
{
"compilerOptions": {
"lib": ["dom", "dom.iterable", "esnext"],
"allowJs": true,
"skipLibCheck": true,
"strict": true,
"noEmit": true,
"esModuleInterop": true,
"module": "esnext",
"moduleResolution": "bundler",
"resolveJsonModule": true,
"isolatedModules": true,
"jsx": "preserve",
"incremental": true,
"plugins": [{ "name": "next" }],
"paths": {
"@/*": ["./src/*"]
}
},
"include": ["next-env.d.ts", "**/*.ts", "**/*.tsx", ".next/types/**/*.ts"],
"exclude": ["node_modules"]
}
+43
View File
@@ -0,0 +1,43 @@
# Seed / Import Data
Place historical CSV or JSON price datasets here for bulk import.
## CSV format
```csv
store_slug,external_id,ean,name,brand,category,url,price,unit_price,unit_description,was_price,is_on_sale,timestamp
albert-heijn,12345,8710400123456,AH Halfvolle melk,AH,Melk,https://www.ah.nl/...,129,108,per liter,,false,2024-01-15T10:00:00
```
- `price`, `unit_price`, `was_price` — euro cents (integer)
- `timestamp` — ISO 8601, UTC
## JSON format
```json
[
{
"store_slug": "albert-heijn",
"external_id": "12345",
"ean": "8710400123456",
"name": "AH Halfvolle melk",
"brand": "AH",
"category": "Melk",
"url": "https://www.ah.nl/producten/product/wi12345/ah-halfvolle-melk",
"price": 129,
"unit_price": 108,
"unit_description": "per liter",
"was_price": null,
"is_on_sale": false,
"timestamp": "2024-01-15T10:00:00"
}
]
```
## Import
A `import-seed` CLI command is planned. For now import via psql:
```bash
psql $DATABASE_URL -c "\copy price_snapshots FROM 'seed/prices.csv' CSV HEADER"
```