57 lines
2.2 KiB
Python
57 lines
2.2 KiB
Python
from sqlalchemy import Column, Integer, String, Text, Boolean, Float, ForeignKey
|
|
from sqlalchemy.dialects.postgresql import ARRAY, TIMESTAMP
|
|
from pgvector.sqlalchemy import Vector
|
|
from datetime import datetime, timezone
|
|
from app.database import Base
|
|
|
|
|
|
class Substance(Base):
|
|
__tablename__ = "substances"
|
|
|
|
id = Column(Integer, primary_key=True, autoincrement=True)
|
|
name = Column(String(255), unique=True, nullable=False, index=True)
|
|
url = Column(String(1024))
|
|
category = Column(String(255)) # e.g. "Psychedelics", "Stimulants"
|
|
description = Column(Text)
|
|
effects = Column(Text)
|
|
dosage = Column(Text)
|
|
duration = Column(Text)
|
|
chemistry = Column(Text)
|
|
health = Column(Text)
|
|
law = Column(Text)
|
|
raw_html = Column(Text)
|
|
scraped_at = Column(TIMESTAMP(timezone=True), default=lambda: datetime.now(timezone.utc))
|
|
|
|
|
|
class Experience(Base):
|
|
__tablename__ = "experiences"
|
|
|
|
id = Column(Integer, primary_key=True, autoincrement=True)
|
|
erowid_id = Column(Integer, unique=True, index=True)
|
|
title = Column(String(512))
|
|
author = Column(String(255))
|
|
substance = Column(String(512)) # may list multiple substances
|
|
substance_list = Column(ARRAY(String)) # parsed list
|
|
body = Column(Text, nullable=False)
|
|
category = Column(String(255)) # e.g. "General", "First Times", "Bad Trips"
|
|
gender = Column(String(50))
|
|
age = Column(String(50))
|
|
year = Column(Integer)
|
|
url = Column(String(1024))
|
|
intensity = Column(String(100))
|
|
raw_html = Column(Text)
|
|
scraped_at = Column(TIMESTAMP(timezone=True), default=lambda: datetime.now(timezone.utc))
|
|
|
|
|
|
class DocumentChunk(Base):
|
|
__tablename__ = "document_chunks"
|
|
|
|
id = Column(Integer, primary_key=True, autoincrement=True)
|
|
source_type = Column(String(50), nullable=False, index=True) # "experience" or "substance"
|
|
source_id = Column(Integer, nullable=False, index=True)
|
|
chunk_index = Column(Integer, nullable=False)
|
|
content = Column(Text, nullable=False)
|
|
metadata_json = Column(Text) # JSON string with extra metadata
|
|
embedding = Column(Vector(768)) # nomic-embed-text dimension
|
|
created_at = Column(TIMESTAMP(timezone=True), default=lambda: datetime.now(timezone.utc))
|