from sqlalchemy import Column, Integer, String, Text, Boolean, Float, ForeignKey from sqlalchemy.dialects.postgresql import ARRAY, TIMESTAMP from pgvector.sqlalchemy import Vector from datetime import datetime, timezone from app.database import Base class Substance(Base): __tablename__ = "substances" id = Column(Integer, primary_key=True, autoincrement=True) name = Column(String(255), unique=True, nullable=False, index=True) url = Column(String(1024)) category = Column(String(255)) # e.g. "Psychedelics", "Stimulants" description = Column(Text) effects = Column(Text) dosage = Column(Text) duration = Column(Text) chemistry = Column(Text) health = Column(Text) law = Column(Text) raw_html = Column(Text) scraped_at = Column(TIMESTAMP(timezone=True), default=lambda: datetime.now(timezone.utc)) class Experience(Base): __tablename__ = "experiences" id = Column(Integer, primary_key=True, autoincrement=True) erowid_id = Column(Integer, unique=True, index=True) title = Column(String(512)) author = Column(String(255)) substance = Column(String(512)) # may list multiple substances substance_list = Column(ARRAY(String)) # parsed list body = Column(Text, nullable=False) category = Column(String(255)) # e.g. "General", "First Times", "Bad Trips" gender = Column(String(50)) age = Column(String(50)) year = Column(Integer) url = Column(String(1024)) intensity = Column(String(100)) raw_html = Column(Text) scraped_at = Column(TIMESTAMP(timezone=True), default=lambda: datetime.now(timezone.utc)) class DocumentChunk(Base): __tablename__ = "document_chunks" id = Column(Integer, primary_key=True, autoincrement=True) source_type = Column(String(50), nullable=False, index=True) # "experience" or "substance" source_id = Column(Integer, nullable=False, index=True) chunk_index = Column(Integer, nullable=False) content = Column(Text, nullable=False) metadata_json = Column(Text) # JSON string with extra metadata embedding = Column(Vector(768)) # nomic-embed-text dimension created_at = Column(TIMESTAMP(timezone=True), default=lambda: datetime.now(timezone.utc))