Initial project scaffold
Full-stack Dutch supermarket price tracker with FastAPI backend, PostgreSQL/SQLAlchemy, Albert Heijn scraper, and Next.js frontend. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,11 @@
|
||||
FROM python:3.12-slim
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
COPY requirements.txt .
|
||||
RUN pip install --no-cache-dir -r requirements.txt
|
||||
|
||||
COPY . .
|
||||
|
||||
EXPOSE 8000
|
||||
CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "8000", "--reload"]
|
||||
@@ -0,0 +1,39 @@
|
||||
[alembic]
|
||||
script_location = alembic
|
||||
prepend_sys_path = .
|
||||
version_path_separator = os
|
||||
sqlalchemy.url = postgresql://postgres:postgres@db:5432/food_prices
|
||||
|
||||
[loggers]
|
||||
keys = root,sqlalchemy,alembic
|
||||
|
||||
[handlers]
|
||||
keys = console
|
||||
|
||||
[formatters]
|
||||
keys = generic
|
||||
|
||||
[logger_root]
|
||||
level = WARN
|
||||
handlers = console
|
||||
qualname =
|
||||
|
||||
[logger_sqlalchemy]
|
||||
level = WARN
|
||||
handlers =
|
||||
qualname = sqlalchemy.engine
|
||||
|
||||
[logger_alembic]
|
||||
level = INFO
|
||||
handlers =
|
||||
qualname = alembic
|
||||
|
||||
[handler_console]
|
||||
class = StreamHandler
|
||||
args = (sys.stderr,)
|
||||
level = NOTSET
|
||||
formatter = generic
|
||||
|
||||
[formatter_generic]
|
||||
format = %(levelname)-5.5s [%(name)s] %(message)s
|
||||
datefmt = %H:%M:%S
|
||||
@@ -0,0 +1,46 @@
|
||||
import os
|
||||
from logging.config import fileConfig
|
||||
|
||||
from alembic import context
|
||||
from sqlalchemy import engine_from_config, pool
|
||||
|
||||
config = context.config
|
||||
|
||||
if config.config_file_name is not None:
|
||||
fileConfig(config.config_file_name)
|
||||
|
||||
db_url = os.getenv("DATABASE_URL") or config.get_main_option("sqlalchemy.url")
|
||||
config.set_main_option("sqlalchemy.url", db_url)
|
||||
|
||||
from app.models import Base # noqa: E402
|
||||
|
||||
target_metadata = Base.metadata
|
||||
|
||||
|
||||
def run_migrations_offline() -> None:
|
||||
context.configure(
|
||||
url=db_url,
|
||||
target_metadata=target_metadata,
|
||||
literal_binds=True,
|
||||
dialect_opts={"paramstyle": "named"},
|
||||
)
|
||||
with context.begin_transaction():
|
||||
context.run_migrations()
|
||||
|
||||
|
||||
def run_migrations_online() -> None:
|
||||
connectable = engine_from_config(
|
||||
config.get_section(config.config_ini_section, {}),
|
||||
prefix="sqlalchemy.",
|
||||
poolclass=pool.NullPool,
|
||||
)
|
||||
with connectable.connect() as connection:
|
||||
context.configure(connection=connection, target_metadata=target_metadata)
|
||||
with context.begin_transaction():
|
||||
context.run_migrations()
|
||||
|
||||
|
||||
if context.is_offline_mode():
|
||||
run_migrations_offline()
|
||||
else:
|
||||
run_migrations_online()
|
||||
@@ -0,0 +1,25 @@
|
||||
"""${message}
|
||||
|
||||
Revision ID: ${up_revision}
|
||||
Revises: ${down_revision | comma,n}
|
||||
Create Date: ${create_date}
|
||||
|
||||
"""
|
||||
from typing import Sequence, Union
|
||||
|
||||
from alembic import op
|
||||
import sqlalchemy as sa
|
||||
${imports if imports else ""}
|
||||
|
||||
revision: str = ${repr(up_revision)}
|
||||
down_revision: Union[str, None] = ${repr(down_revision)}
|
||||
branch_labels: Union[str, Sequence[str], None] = ${repr(branch_labels)}
|
||||
depends_on: Union[str, Sequence[str], None] = ${repr(depends_on)}
|
||||
|
||||
|
||||
def upgrade() -> None:
|
||||
${upgrades if upgrades else "pass"}
|
||||
|
||||
|
||||
def downgrade() -> None:
|
||||
${downgrades if downgrades else "pass"}
|
||||
@@ -0,0 +1,85 @@
|
||||
"""initial
|
||||
|
||||
Revision ID: 001
|
||||
Revises:
|
||||
Create Date: 2024-01-01 00:00:00.000000
|
||||
"""
|
||||
from typing import Sequence, Union
|
||||
|
||||
import sqlalchemy as sa
|
||||
from alembic import op
|
||||
|
||||
revision: str = "001"
|
||||
down_revision: Union[str, None] = None
|
||||
branch_labels: Union[str, Sequence[str], None] = None
|
||||
depends_on: Union[str, Sequence[str], None] = None
|
||||
|
||||
|
||||
def upgrade() -> None:
|
||||
op.create_table(
|
||||
"stores",
|
||||
sa.Column("id", sa.Integer(), primary_key=True),
|
||||
sa.Column("name", sa.String(100), nullable=False),
|
||||
sa.Column("slug", sa.String(50), nullable=False),
|
||||
sa.Column("country", sa.String(2), server_default="NL"),
|
||||
sa.Column("website", sa.String(255)),
|
||||
sa.UniqueConstraint("slug", name="uq_stores_slug"),
|
||||
)
|
||||
|
||||
op.create_table(
|
||||
"products",
|
||||
sa.Column("id", sa.Integer(), primary_key=True),
|
||||
sa.Column("store_id", sa.Integer(), sa.ForeignKey("stores.id"), nullable=False),
|
||||
sa.Column("external_id", sa.String(50), nullable=False),
|
||||
sa.Column("ean", sa.String(20)),
|
||||
sa.Column("name", sa.String(255), nullable=False),
|
||||
sa.Column("brand", sa.String(100)),
|
||||
sa.Column("category", sa.String(100)),
|
||||
sa.Column("unit_size", sa.String(50)),
|
||||
sa.Column("url", sa.String(500)),
|
||||
sa.Column("created_at", sa.DateTime(), server_default=sa.func.now()),
|
||||
sa.Column("updated_at", sa.DateTime(), server_default=sa.func.now()),
|
||||
sa.UniqueConstraint("store_id", "external_id", name="uq_products_store_external"),
|
||||
)
|
||||
op.create_index("ix_products_ean", "products", ["ean"])
|
||||
|
||||
op.create_table(
|
||||
"scrape_runs",
|
||||
sa.Column("id", sa.Integer(), primary_key=True),
|
||||
sa.Column("store_id", sa.Integer(), sa.ForeignKey("stores.id"), nullable=False),
|
||||
sa.Column("query", sa.String(255), nullable=False),
|
||||
sa.Column("started_at", sa.DateTime(), server_default=sa.func.now()),
|
||||
sa.Column("finished_at", sa.DateTime()),
|
||||
sa.Column("status", sa.String(20), server_default="running"),
|
||||
sa.Column("products_found", sa.Integer(), server_default="0"),
|
||||
sa.Column("error_message", sa.String(1000)),
|
||||
)
|
||||
|
||||
op.create_table(
|
||||
"price_snapshots",
|
||||
sa.Column("id", sa.Integer(), primary_key=True),
|
||||
sa.Column("product_id", sa.Integer(), sa.ForeignKey("products.id"), nullable=False),
|
||||
sa.Column("scrape_run_id", sa.Integer(), sa.ForeignKey("scrape_runs.id"), nullable=False),
|
||||
sa.Column("price", sa.Integer(), nullable=False),
|
||||
sa.Column("unit_price", sa.Integer()),
|
||||
sa.Column("unit_description", sa.String(50)),
|
||||
sa.Column("currency", sa.String(3), server_default="EUR"),
|
||||
sa.Column("discount_label", sa.String(100)),
|
||||
sa.Column("discount_description", sa.String(255)),
|
||||
sa.Column("was_price", sa.Integer()),
|
||||
sa.Column("is_on_sale", sa.Boolean(), server_default="false"),
|
||||
sa.Column("timestamp", sa.DateTime(), server_default=sa.func.now()),
|
||||
)
|
||||
op.create_index("ix_price_snapshots_timestamp", "price_snapshots", ["timestamp"])
|
||||
op.create_index(
|
||||
"ix_price_snapshots_product_timestamp",
|
||||
"price_snapshots",
|
||||
["product_id", "timestamp"],
|
||||
)
|
||||
|
||||
|
||||
def downgrade() -> None:
|
||||
op.drop_table("price_snapshots")
|
||||
op.drop_table("scrape_runs")
|
||||
op.drop_table("products")
|
||||
op.drop_table("stores")
|
||||
@@ -0,0 +1,11 @@
|
||||
from pydantic_settings import BaseSettings
|
||||
|
||||
|
||||
class Settings(BaseSettings):
|
||||
database_url: str = "postgresql://postgres:postgres@db:5432/food_prices"
|
||||
ah_client_id: str = "appie"
|
||||
|
||||
model_config = {"env_file": ".env"}
|
||||
|
||||
|
||||
settings = Settings()
|
||||
@@ -0,0 +1,19 @@
|
||||
from sqlalchemy import create_engine
|
||||
from sqlalchemy.orm import DeclarativeBase, sessionmaker
|
||||
|
||||
from .config import settings
|
||||
|
||||
engine = create_engine(settings.database_url)
|
||||
SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine)
|
||||
|
||||
|
||||
class Base(DeclarativeBase):
|
||||
pass
|
||||
|
||||
|
||||
def get_db():
|
||||
db = SessionLocal()
|
||||
try:
|
||||
yield db
|
||||
finally:
|
||||
db.close()
|
||||
@@ -0,0 +1,26 @@
|
||||
from fastapi import FastAPI
|
||||
from fastapi.middleware.cors import CORSMiddleware
|
||||
|
||||
from .database import Base, engine
|
||||
from .routers import prices, products, scrape_runs, stores
|
||||
|
||||
Base.metadata.create_all(bind=engine)
|
||||
|
||||
app = FastAPI(title="Dutch Food Price Tracker", version="0.1.0")
|
||||
|
||||
app.add_middleware(
|
||||
CORSMiddleware,
|
||||
allow_origins=["*"],
|
||||
allow_methods=["*"],
|
||||
allow_headers=["*"],
|
||||
)
|
||||
|
||||
app.include_router(products.router)
|
||||
app.include_router(stores.router)
|
||||
app.include_router(prices.router)
|
||||
app.include_router(scrape_runs.router)
|
||||
|
||||
|
||||
@app.get("/")
|
||||
def root():
|
||||
return {"status": "ok", "service": "dutch-food-price-tracker"}
|
||||
@@ -0,0 +1,83 @@
|
||||
from datetime import datetime
|
||||
|
||||
import sqlalchemy as sa
|
||||
from sqlalchemy.orm import Mapped, mapped_column, relationship
|
||||
|
||||
from .database import Base
|
||||
|
||||
|
||||
class Store(Base):
|
||||
__tablename__ = "stores"
|
||||
|
||||
id: Mapped[int] = mapped_column(sa.Integer, primary_key=True)
|
||||
name: Mapped[str] = mapped_column(sa.String(100), nullable=False)
|
||||
slug: Mapped[str] = mapped_column(sa.String(50), unique=True, nullable=False)
|
||||
country: Mapped[str] = mapped_column(sa.String(2), default="NL")
|
||||
website: Mapped[str | None] = mapped_column(sa.String(255))
|
||||
|
||||
products: Mapped[list["Product"]] = relationship(back_populates="store")
|
||||
scrape_runs: Mapped[list["ScrapeRun"]] = relationship(back_populates="store")
|
||||
|
||||
|
||||
class Product(Base):
|
||||
__tablename__ = "products"
|
||||
__table_args__ = (
|
||||
sa.UniqueConstraint("store_id", "external_id", name="uq_products_store_external"),
|
||||
)
|
||||
|
||||
id: Mapped[int] = mapped_column(sa.Integer, primary_key=True)
|
||||
store_id: Mapped[int] = mapped_column(sa.Integer, sa.ForeignKey("stores.id"), nullable=False)
|
||||
external_id: Mapped[str] = mapped_column(sa.String(50), nullable=False)
|
||||
ean: Mapped[str | None] = mapped_column(sa.String(20), index=True)
|
||||
name: Mapped[str] = mapped_column(sa.String(255), nullable=False)
|
||||
brand: Mapped[str | None] = mapped_column(sa.String(100))
|
||||
category: Mapped[str | None] = mapped_column(sa.String(100))
|
||||
unit_size: Mapped[str | None] = mapped_column(sa.String(50))
|
||||
url: Mapped[str | None] = mapped_column(sa.String(500))
|
||||
created_at: Mapped[datetime] = mapped_column(sa.DateTime, default=datetime.utcnow)
|
||||
updated_at: Mapped[datetime] = mapped_column(
|
||||
sa.DateTime, default=datetime.utcnow, onupdate=datetime.utcnow
|
||||
)
|
||||
|
||||
store: Mapped["Store"] = relationship(back_populates="products")
|
||||
price_snapshots: Mapped[list["PriceSnapshot"]] = relationship(back_populates="product")
|
||||
|
||||
|
||||
class ScrapeRun(Base):
|
||||
__tablename__ = "scrape_runs"
|
||||
|
||||
id: Mapped[int] = mapped_column(sa.Integer, primary_key=True)
|
||||
store_id: Mapped[int] = mapped_column(sa.Integer, sa.ForeignKey("stores.id"), nullable=False)
|
||||
query: Mapped[str] = mapped_column(sa.String(255), nullable=False)
|
||||
started_at: Mapped[datetime] = mapped_column(sa.DateTime, default=datetime.utcnow)
|
||||
finished_at: Mapped[datetime | None] = mapped_column(sa.DateTime)
|
||||
status: Mapped[str] = mapped_column(sa.String(20), default="running")
|
||||
products_found: Mapped[int] = mapped_column(sa.Integer, default=0)
|
||||
error_message: Mapped[str | None] = mapped_column(sa.String(1000))
|
||||
|
||||
store: Mapped["Store"] = relationship(back_populates="scrape_runs")
|
||||
price_snapshots: Mapped[list["PriceSnapshot"]] = relationship(back_populates="scrape_run")
|
||||
|
||||
|
||||
class PriceSnapshot(Base):
|
||||
__tablename__ = "price_snapshots"
|
||||
|
||||
id: Mapped[int] = mapped_column(sa.Integer, primary_key=True)
|
||||
product_id: Mapped[int] = mapped_column(
|
||||
sa.Integer, sa.ForeignKey("products.id"), nullable=False
|
||||
)
|
||||
scrape_run_id: Mapped[int] = mapped_column(
|
||||
sa.Integer, sa.ForeignKey("scrape_runs.id"), nullable=False
|
||||
)
|
||||
price: Mapped[int] = mapped_column(sa.Integer, nullable=False) # euro cents
|
||||
unit_price: Mapped[int | None] = mapped_column(sa.Integer) # euro cents
|
||||
unit_description: Mapped[str | None] = mapped_column(sa.String(50))
|
||||
currency: Mapped[str] = mapped_column(sa.String(3), default="EUR")
|
||||
discount_label: Mapped[str | None] = mapped_column(sa.String(100))
|
||||
discount_description: Mapped[str | None] = mapped_column(sa.String(255))
|
||||
was_price: Mapped[int | None] = mapped_column(sa.Integer) # euro cents, original price
|
||||
is_on_sale: Mapped[bool] = mapped_column(sa.Boolean, default=False)
|
||||
timestamp: Mapped[datetime] = mapped_column(sa.DateTime, default=datetime.utcnow, index=True)
|
||||
|
||||
product: Mapped["Product"] = relationship(back_populates="price_snapshots")
|
||||
scrape_run: Mapped["ScrapeRun"] = relationship(back_populates="price_snapshots")
|
||||
@@ -0,0 +1,57 @@
|
||||
from datetime import date, datetime
|
||||
|
||||
from fastapi import APIRouter, Depends, Query
|
||||
from sqlalchemy import func, select
|
||||
from sqlalchemy.orm import Session, selectinload
|
||||
|
||||
from ..database import get_db
|
||||
from ..models import PriceSnapshot, Product
|
||||
from ..schemas import CheapestProduct, Product as ProductSchema
|
||||
|
||||
router = APIRouter(prefix="/api/prices", tags=["prices"])
|
||||
|
||||
|
||||
@router.get("/cheapest", response_model=list[CheapestProduct])
|
||||
def get_cheapest(
|
||||
date_filter: date = Query(default=None, alias="date"),
|
||||
limit: int = Query(default=20, le=100),
|
||||
db: Session = Depends(get_db),
|
||||
):
|
||||
target = date_filter or date.today()
|
||||
day_start = datetime(target.year, target.month, target.day, 0, 0, 0)
|
||||
day_end = datetime(target.year, target.month, target.day, 23, 59, 59)
|
||||
|
||||
min_per_product = (
|
||||
select(
|
||||
PriceSnapshot.product_id,
|
||||
func.min(PriceSnapshot.price).label("min_price"),
|
||||
)
|
||||
.where(PriceSnapshot.timestamp.between(day_start, day_end))
|
||||
.group_by(PriceSnapshot.product_id)
|
||||
.subquery()
|
||||
)
|
||||
|
||||
rows = db.execute(
|
||||
select(PriceSnapshot, Product)
|
||||
.join(
|
||||
min_per_product,
|
||||
(PriceSnapshot.product_id == min_per_product.c.product_id)
|
||||
& (PriceSnapshot.price == min_per_product.c.min_price),
|
||||
)
|
||||
.join(Product, PriceSnapshot.product_id == Product.id)
|
||||
.options(selectinload(Product.store))
|
||||
.order_by(PriceSnapshot.price.asc())
|
||||
.limit(limit)
|
||||
).all()
|
||||
|
||||
return [
|
||||
CheapestProduct(
|
||||
product=ProductSchema.model_validate(product),
|
||||
price=snapshot.price,
|
||||
unit_price=snapshot.unit_price,
|
||||
unit_description=snapshot.unit_description,
|
||||
is_on_sale=snapshot.is_on_sale,
|
||||
timestamp=snapshot.timestamp,
|
||||
)
|
||||
for snapshot, product in rows
|
||||
]
|
||||
@@ -0,0 +1,64 @@
|
||||
from fastapi import APIRouter, Depends, HTTPException, Query
|
||||
from sqlalchemy import select
|
||||
from sqlalchemy.orm import Session, selectinload
|
||||
|
||||
from ..database import get_db
|
||||
from ..models import PriceSnapshot, Product
|
||||
from ..schemas import PriceSnapshot as PriceSnapshotSchema, ProductWithLatestPrice
|
||||
|
||||
router = APIRouter(prefix="/api/products", tags=["products"])
|
||||
|
||||
|
||||
def _attach_latest_price(product: Product, db: Session) -> ProductWithLatestPrice:
|
||||
p = ProductWithLatestPrice.model_validate(product)
|
||||
latest = db.scalar(
|
||||
select(PriceSnapshot)
|
||||
.where(PriceSnapshot.product_id == product.id)
|
||||
.order_by(PriceSnapshot.timestamp.desc())
|
||||
.limit(1)
|
||||
)
|
||||
if latest:
|
||||
p.latest_price = latest.price
|
||||
p.latest_price_timestamp = latest.timestamp
|
||||
p.is_on_sale = latest.is_on_sale
|
||||
return p
|
||||
|
||||
|
||||
@router.get("", response_model=list[ProductWithLatestPrice])
|
||||
def search_products(
|
||||
search: str = Query(default=""),
|
||||
limit: int = Query(default=20, le=100),
|
||||
db: Session = Depends(get_db),
|
||||
):
|
||||
q = select(Product).options(selectinload(Product.store))
|
||||
if search:
|
||||
q = q.where(Product.name.ilike(f"%{search}%"))
|
||||
q = q.order_by(Product.name).limit(limit)
|
||||
products = db.scalars(q).all()
|
||||
return [_attach_latest_price(p, db) for p in products]
|
||||
|
||||
|
||||
@router.get("/{product_id}", response_model=ProductWithLatestPrice)
|
||||
def get_product(product_id: int, db: Session = Depends(get_db)):
|
||||
product = db.scalar(
|
||||
select(Product)
|
||||
.where(Product.id == product_id)
|
||||
.options(selectinload(Product.store))
|
||||
)
|
||||
if not product:
|
||||
raise HTTPException(status_code=404, detail="Product not found")
|
||||
return _attach_latest_price(product, db)
|
||||
|
||||
|
||||
@router.get("/{product_id}/prices", response_model=list[PriceSnapshotSchema])
|
||||
def get_product_prices(
|
||||
product_id: int,
|
||||
limit: int = Query(default=200, le=1000),
|
||||
db: Session = Depends(get_db),
|
||||
):
|
||||
return db.scalars(
|
||||
select(PriceSnapshot)
|
||||
.where(PriceSnapshot.product_id == product_id)
|
||||
.order_by(PriceSnapshot.timestamp.asc())
|
||||
.limit(limit)
|
||||
).all()
|
||||
@@ -0,0 +1,19 @@
|
||||
from fastapi import APIRouter, Depends, Query
|
||||
from sqlalchemy import select
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from ..database import get_db
|
||||
from ..models import ScrapeRun
|
||||
from ..schemas import ScrapeRun as ScrapeRunSchema
|
||||
|
||||
router = APIRouter(prefix="/api/scrape-runs", tags=["scrape-runs"])
|
||||
|
||||
|
||||
@router.get("", response_model=list[ScrapeRunSchema])
|
||||
def list_scrape_runs(
|
||||
limit: int = Query(default=20, le=100),
|
||||
db: Session = Depends(get_db),
|
||||
):
|
||||
return db.scalars(
|
||||
select(ScrapeRun).order_by(ScrapeRun.started_at.desc()).limit(limit)
|
||||
).all()
|
||||
@@ -0,0 +1,14 @@
|
||||
from fastapi import APIRouter, Depends
|
||||
from sqlalchemy import select
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from ..database import get_db
|
||||
from ..models import Store
|
||||
from ..schemas import Store as StoreSchema
|
||||
|
||||
router = APIRouter(prefix="/api/stores", tags=["stores"])
|
||||
|
||||
|
||||
@router.get("", response_model=list[StoreSchema])
|
||||
def list_stores(db: Session = Depends(get_db)):
|
||||
return db.scalars(select(Store).order_by(Store.name)).all()
|
||||
@@ -0,0 +1,75 @@
|
||||
from datetime import datetime
|
||||
|
||||
from pydantic import BaseModel
|
||||
|
||||
|
||||
class Store(BaseModel):
|
||||
id: int
|
||||
name: str
|
||||
slug: str
|
||||
country: str
|
||||
website: str | None = None
|
||||
|
||||
model_config = {"from_attributes": True}
|
||||
|
||||
|
||||
class Product(BaseModel):
|
||||
id: int
|
||||
store_id: int
|
||||
external_id: str
|
||||
ean: str | None = None
|
||||
name: str
|
||||
brand: str | None = None
|
||||
category: str | None = None
|
||||
unit_size: str | None = None
|
||||
url: str | None = None
|
||||
created_at: datetime
|
||||
updated_at: datetime
|
||||
store: Store | None = None
|
||||
|
||||
model_config = {"from_attributes": True}
|
||||
|
||||
|
||||
class ProductWithLatestPrice(Product):
|
||||
latest_price: int | None = None
|
||||
latest_price_timestamp: datetime | None = None
|
||||
is_on_sale: bool = False
|
||||
|
||||
|
||||
class PriceSnapshot(BaseModel):
|
||||
id: int
|
||||
product_id: int
|
||||
scrape_run_id: int
|
||||
price: int
|
||||
unit_price: int | None = None
|
||||
unit_description: str | None = None
|
||||
currency: str
|
||||
discount_label: str | None = None
|
||||
discount_description: str | None = None
|
||||
was_price: int | None = None
|
||||
is_on_sale: bool
|
||||
timestamp: datetime
|
||||
|
||||
model_config = {"from_attributes": True}
|
||||
|
||||
|
||||
class ScrapeRun(BaseModel):
|
||||
id: int
|
||||
store_id: int
|
||||
query: str
|
||||
started_at: datetime
|
||||
finished_at: datetime | None = None
|
||||
status: str
|
||||
products_found: int
|
||||
error_message: str | None = None
|
||||
|
||||
model_config = {"from_attributes": True}
|
||||
|
||||
|
||||
class CheapestProduct(BaseModel):
|
||||
product: Product
|
||||
price: int
|
||||
unit_price: int | None = None
|
||||
unit_description: str | None = None
|
||||
is_on_sale: bool
|
||||
timestamp: datetime
|
||||
@@ -0,0 +1,138 @@
|
||||
from datetime import datetime
|
||||
|
||||
import httpx
|
||||
from sqlalchemy import and_, select
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from ..models import PriceSnapshot, Product, ScrapeRun, Store
|
||||
|
||||
AH_AUTH_URL = "https://api.ah.nl/mobile-auth/v1/auth/token/anonymous"
|
||||
AH_SEARCH_URL = "https://api.ah.nl/mobile-services/product/search/v2"
|
||||
AH_BASE_URL = "https://www.ah.nl"
|
||||
AH_CLIENT_ID = "appie"
|
||||
|
||||
|
||||
def _get_token(client: httpx.Client) -> str:
|
||||
resp = client.post(AH_AUTH_URL, json={"clientId": AH_CLIENT_ID})
|
||||
resp.raise_for_status()
|
||||
return resp.json()["access_token"]
|
||||
|
||||
|
||||
def _search(client: httpx.Client, token: str, query: str, page: int = 0, size: int = 30) -> dict:
|
||||
resp = client.get(
|
||||
AH_SEARCH_URL,
|
||||
params={"query": query, "page": page, "size": size},
|
||||
headers={"Authorization": f"Bearer {token}"},
|
||||
)
|
||||
resp.raise_for_status()
|
||||
return resp.json()
|
||||
|
||||
|
||||
def _to_cents(value: float | int | None) -> int | None:
|
||||
if value is None:
|
||||
return None
|
||||
return round(float(value) * 100)
|
||||
|
||||
|
||||
def _upsert_store(db: Session) -> Store:
|
||||
store = db.scalar(select(Store).where(Store.slug == "albert-heijn"))
|
||||
if not store:
|
||||
store = Store(
|
||||
name="Albert Heijn",
|
||||
slug="albert-heijn",
|
||||
country="NL",
|
||||
website="https://www.ah.nl",
|
||||
)
|
||||
db.add(store)
|
||||
db.commit()
|
||||
db.refresh(store)
|
||||
return store
|
||||
|
||||
|
||||
def scrape_query(db: Session, query: str) -> ScrapeRun:
|
||||
store = _upsert_store(db)
|
||||
|
||||
run = ScrapeRun(store_id=store.id, query=query, started_at=datetime.utcnow())
|
||||
db.add(run)
|
||||
db.commit()
|
||||
db.refresh(run)
|
||||
|
||||
try:
|
||||
with httpx.Client(timeout=30.0) as client:
|
||||
token = _get_token(client)
|
||||
data = _search(client, token, query)
|
||||
|
||||
count = 0
|
||||
now = datetime.utcnow()
|
||||
|
||||
for card in data.get("cards", []):
|
||||
for raw in card.get("products", []):
|
||||
external_id = str(raw.get("webshopId", "")).strip()
|
||||
if not external_id:
|
||||
continue
|
||||
|
||||
product = db.scalar(
|
||||
select(Product).where(
|
||||
and_(
|
||||
Product.store_id == store.id,
|
||||
Product.external_id == external_id,
|
||||
)
|
||||
)
|
||||
)
|
||||
if not product:
|
||||
product = Product(store_id=store.id, external_id=external_id)
|
||||
db.add(product)
|
||||
|
||||
link = raw.get("link", "") or ""
|
||||
product.name = raw.get("title", "") or ""
|
||||
product.brand = raw.get("brand") or None
|
||||
product.category = raw.get("category") or None
|
||||
product.ean = raw.get("eanCode") or None
|
||||
product.url = f"{AH_BASE_URL}{link}" if link else None
|
||||
product.updated_at = now
|
||||
|
||||
price_info = raw.get("price") or {}
|
||||
price_cents = _to_cents(price_info.get("now"))
|
||||
was_cents = _to_cents(price_info.get("was"))
|
||||
|
||||
unit_info = price_info.get("unitInfo") or {}
|
||||
unit_price_cents = _to_cents(unit_info.get("price"))
|
||||
unit_description = unit_info.get("description") or None
|
||||
|
||||
discount = raw.get("discount") or {}
|
||||
discount_label = discount.get("label") or None
|
||||
discount_description = discount.get("description") or None
|
||||
|
||||
db.flush() # get product.id if newly created
|
||||
|
||||
if price_cents is not None:
|
||||
snapshot = PriceSnapshot(
|
||||
product_id=product.id,
|
||||
scrape_run_id=run.id,
|
||||
price=price_cents,
|
||||
unit_price=unit_price_cents,
|
||||
unit_description=unit_description,
|
||||
was_price=was_cents,
|
||||
is_on_sale=was_cents is not None or discount_label is not None,
|
||||
discount_label=discount_label,
|
||||
discount_description=discount_description,
|
||||
timestamp=now,
|
||||
)
|
||||
db.add(snapshot)
|
||||
count += 1
|
||||
|
||||
db.commit()
|
||||
run.status = "success"
|
||||
run.products_found = count
|
||||
run.finished_at = datetime.utcnow()
|
||||
db.commit()
|
||||
|
||||
except Exception as exc:
|
||||
db.rollback()
|
||||
run.status = "failed"
|
||||
run.error_message = str(exc)[:900]
|
||||
run.finished_at = datetime.utcnow()
|
||||
db.commit()
|
||||
raise
|
||||
|
||||
return run
|
||||
@@ -0,0 +1,36 @@
|
||||
import click
|
||||
|
||||
from app.database import SessionLocal
|
||||
from app.scrapers.albert_heijn import scrape_query
|
||||
|
||||
|
||||
@click.group()
|
||||
def cli():
|
||||
pass
|
||||
|
||||
|
||||
@cli.command("scrape-ah")
|
||||
@click.option(
|
||||
"--query",
|
||||
"queries",
|
||||
multiple=True,
|
||||
required=True,
|
||||
help="Search term to scrape (repeatable)",
|
||||
)
|
||||
def scrape_ah(queries: tuple[str, ...]):
|
||||
"""Scrape Albert Heijn product prices for one or more search queries."""
|
||||
db = SessionLocal()
|
||||
try:
|
||||
for query in queries:
|
||||
click.echo(f"Scraping Albert Heijn: {query!r}")
|
||||
run = scrape_query(db, query)
|
||||
if run.status == "success":
|
||||
click.echo(f" {run.products_found} products stored (run id={run.id})")
|
||||
else:
|
||||
click.echo(f" Failed: {run.error_message}", err=True)
|
||||
finally:
|
||||
db.close()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
cli()
|
||||
@@ -0,0 +1,10 @@
|
||||
fastapi==0.115.0
|
||||
uvicorn[standard]==0.30.6
|
||||
sqlalchemy==2.0.35
|
||||
alembic==1.13.3
|
||||
psycopg2-binary==2.9.9
|
||||
pydantic==2.9.2
|
||||
pydantic-settings==2.5.2
|
||||
httpx==0.27.2
|
||||
click==8.1.7
|
||||
python-dotenv==1.0.1
|
||||
Reference in New Issue
Block a user