erowid-bot/app/models.py

57 lines
2.2 KiB
Python

from sqlalchemy import Column, Integer, String, Text, Boolean, Float, ForeignKey
from sqlalchemy.dialects.postgresql import ARRAY, TIMESTAMP
from pgvector.sqlalchemy import Vector
from datetime import datetime, timezone
from app.database import Base
class Substance(Base):
__tablename__ = "substances"
id = Column(Integer, primary_key=True, autoincrement=True)
name = Column(String(255), unique=True, nullable=False, index=True)
url = Column(String(1024))
category = Column(String(255)) # e.g. "Psychedelics", "Stimulants"
description = Column(Text)
effects = Column(Text)
dosage = Column(Text)
duration = Column(Text)
chemistry = Column(Text)
health = Column(Text)
law = Column(Text)
raw_html = Column(Text)
scraped_at = Column(TIMESTAMP(timezone=True), default=lambda: datetime.now(timezone.utc))
class Experience(Base):
__tablename__ = "experiences"
id = Column(Integer, primary_key=True, autoincrement=True)
erowid_id = Column(Integer, unique=True, index=True)
title = Column(String(512))
author = Column(String(255))
substance = Column(String(512)) # may list multiple substances
substance_list = Column(ARRAY(String)) # parsed list
body = Column(Text, nullable=False)
category = Column(String(255)) # e.g. "General", "First Times", "Bad Trips"
gender = Column(String(50))
age = Column(String(50))
year = Column(Integer)
url = Column(String(1024))
intensity = Column(String(100))
raw_html = Column(Text)
scraped_at = Column(TIMESTAMP(timezone=True), default=lambda: datetime.now(timezone.utc))
class DocumentChunk(Base):
__tablename__ = "document_chunks"
id = Column(Integer, primary_key=True, autoincrement=True)
source_type = Column(String(50), nullable=False, index=True) # "experience" or "substance"
source_id = Column(Integer, nullable=False, index=True)
chunk_index = Column(Integer, nullable=False)
content = Column(Text, nullable=False)
metadata_json = Column(Text) # JSON string with extra metadata
embedding = Column(Vector(768)) # nomic-embed-text dimension
created_at = Column(TIMESTAMP(timezone=True), default=lambda: datetime.now(timezone.utc))