Skip to content

Schema Definition Guide

This guide covers defining type-safe database schemas using surql's functional composition API.

Table of Contents

Overview

surql provides a code-first approach to schema definition using:

  • Pure functions - All schema builders return immutable data structures
  • Pydantic models - Type-safe schema definitions with validation
  • Functional composition - Compose schemas using pure functions
  • SurrealQL mapping - Schemas map directly to SurrealQL statements

Key Concepts

from surql.schema.fields import string_field, int_field
from surql.schema.table import table_schema, unique_index, TableMode

# Define a schema using pure functions
schema = table_schema(
  'user',
  mode=TableMode.SCHEMAFULL,
  fields=[
    string_field('name'),
    int_field('age'),
  ],
  indexes=[
    unique_index('name_idx', ['name']),
  ],
)

Field Types

Basic Field Types

surql supports all SurrealDB field types:

from surql.schema.fields import (
  FieldType,
  string_field,
  int_field,
  float_field,
  bool_field,
  datetime_field,
  duration_field,
  decimal_field,
  number_field,
  array_field,
  object_field,
  record_field,
  geometry_field,
  computed_field,
)

String Fields

# Basic string
string_field('name')

# With length validation
string_field('username', assertion='string::len($value) >= 3 AND string::len($value) <= 20')

# Email validation
string_field('email', assertion='string::is::email($value)')

# Default value
string_field('status', default='"active"')

# Read-only
string_field('id', readonly=True)

Numeric Fields

# Integer
int_field('age', assertion='$value >= 0 AND $value <= 150')

# Float
float_field('price', assertion='$value > 0')

# Decimal
decimal_field('balance', assertion='$value >= 0')

# Number (int or float)
number_field('quantity')

# With default
int_field('count', default='0')

Boolean Fields

# Basic boolean
bool_field('is_active')

# With default
bool_field('is_verified', default='false')

# Read-only
bool_field('is_deleted', default='false', readonly=True)

Datetime Fields

# Basic datetime
datetime_field('birthday')

# Auto-set on creation
datetime_field('created_at', default='time::now()', readonly=True)

# Auto-update
datetime_field('updated_at', default='time::now()')

# With validation
datetime_field('expires_at', assertion='$value > time::now()')

Record Fields (Foreign Keys)

# Link to any record
record_field('owner')

# Link to specific table
record_field('author', table='user')

# With custom assertion
record_field('category', assertion='$value.table = "category" OR $value.table = "tag"')

# Optional
record_field('parent', default='NONE')

Array Fields

# Basic array
array_field('tags')

# With default
array_field('roles', default='[]')

# With validation
array_field('scores', assertion='array::len($value) <= 10')

Object Fields

# Flexible object
object_field('metadata', flexible=True)

# With default
object_field('settings', default='{}')

# Strict object (define nested fields separately)
object_field('address', flexible=False)
# Then define nested fields:
# field('address.street', FieldType.STRING)
# field('address.city', FieldType.STRING)

Computed Fields

# Computed from other fields
computed_field(
  'full_name',
  'string::concat(name.first, " ", name.last)',
  FieldType.STRING,
)

# Computed with function
computed_field(
  'age_years',
  'math::floor(time::now() - birthday)',
  FieldType.INT,
)

Nested Fields

Use dot notation for nested structure:

from surql.schema.fields import field, FieldType

fields = [
  field('name.first', FieldType.STRING),
  field('name.last', FieldType.STRING),
  field('address.street', FieldType.STRING),
  field('address.city', FieldType.STRING),
  field('address.zip', FieldType.STRING),
]

Table Schemas

Basic Table Definition

from surql.schema.table import table_schema, TableMode

# Schemafull (strict)
user_table = table_schema(
  'user',
  mode=TableMode.SCHEMAFULL,
  fields=[...],
)

# Schemaless (flexible)
log_table = table_schema(
  'log',
  mode=TableMode.SCHEMALESS,
)

# Drop (marks for deletion)
old_table = table_schema(
  'old_table',
  mode=TableMode.DROP,
  drop=True,
)

Complete Table Example

from surql.schema.fields import (
  string_field,
  int_field,
  datetime_field,
  record_field,
  array_field,
  bool_field,
)
from surql.schema.table import (
  table_schema,
  unique_index,
  search_index,
  event,
  TableMode,
)

user_table = table_schema(
  'user',
  mode=TableMode.SCHEMAFULL,
  fields=[
    # Basic fields
    string_field('username', assertion='string::len($value) >= 3'),
    string_field('email', assertion='string::is::email($value)'),
    string_field('password_hash'),

    # Nested fields
    string_field('name.first'),
    string_field('name.last'),

    # Optional fields
    string_field('bio', default='""'),
    string_field('avatar_url', default='NONE'),

    # Numbers
    int_field('age', assertion='$value >= 13'),
    int_field('follower_count', default='0'),

    # Boolean
    bool_field('is_verified', default='false'),
    bool_field('is_active', default='true'),

    # Timestamps
    datetime_field('created_at', default='time::now()', readonly=True),
    datetime_field('updated_at', default='time::now()'),
    datetime_field('last_login', default='NONE'),

    # Collections
    array_field('roles', default='["user"]'),
    array_field('tags', default='[]'),
  ],
  indexes=[
    unique_index('username_idx', ['username']),
    unique_index('email_idx', ['email']),
    search_index('bio_search', ['bio']),
  ],
  events=[
    event(
      'update_timestamp',
      '$event = "UPDATE"',
      'UPDATE $value SET updated_at = time::now()',
    ),
  ],
  permissions={
    'select': '$auth.id = id OR $auth.admin = true',
    'update': '$auth.id = id',
    'delete': '$auth.admin = true',
  },
)

Edge Schemas

Edges represent relationships between records in SurrealDB's graph model.

surql supports two edge table modes:

  1. TYPE RELATION (default): Modern SurrealDB graph edges with automatic in/out fields
  2. SCHEMAFULL: Traditional tables with explicit in/out fields

TYPE RELATION Edges (Default)

The modern approach uses TYPE RELATION syntax where SurrealDB automatically manages in/out fields.

from surql.schema.edge import edge_schema
from surql.schema.fields import datetime_field

follows_edge = edge_schema(
  'follows',
  from_table='user',
  to_table='user',
  fields=[
    datetime_field('followed_at', default='time::now()', readonly=True),
  ],
)

Generates:

DEFINE TABLE follows TYPE RELATION FROM user TO user;
DEFINE FIELD followed_at ON TABLE follows TYPE datetime DEFAULT time::now() READONLY;

Edge with Properties

likes_edge = edge_schema(
  'likes',
  from_table='user',
  to_table='post',
  fields=[
    datetime_field('liked_at', default='time::now()', readonly=True),
    string_field('reaction', default='"like"'),  # like, love, wow, etc.
  ],
)

SCHEMAFULL Edges

For compatibility with traditional schemas, use SCHEMAFULL mode with explicit in/out fields:

from surql.schema.edge import EdgeMode, schemafull_edge
from surql.schema.fields import record_field, string_field, float_field, array_field

# entity_relation edge with explicit in/out fields
entity_relation = schemafull_edge(
  'entity_relation',
  fields=[
    record_field('in', table='entity'),
    record_field('out', table='entity'),
    string_field('relation_type'),
    float_field('confidence'),
    array_field('source_documents', default='[]'),
  ],
)

Generates:

DEFINE TABLE entity_relation SCHEMAFULL;
DEFINE FIELD in ON TABLE entity_relation TYPE record<entity>;
DEFINE FIELD out ON TABLE entity_relation TYPE record<entity>;
DEFINE FIELD relation_type ON TABLE entity_relation TYPE string;
DEFINE FIELD confidence ON TABLE entity_relation TYPE float;
DEFINE FIELD source_documents ON TABLE entity_relation TYPE array DEFAULT [];

Alternative syntax using edge_schema:

entity_relation = edge_schema(
  'entity_relation',
  mode=EdgeMode.SCHEMAFULL,
  fields=[
    record_field('in', table='entity'),
    record_field('out', table='entity'),
    string_field('relation_type'),
    float_field('confidence'),
  ],
)

Weighted Edges

from surql.schema.fields import float_field

similarity_edge = edge_schema(
  'similar_to',
  from_table='post',
  to_table='post',
  fields=[
    float_field('score', assertion='$value >= 0 AND $value <= 1'),
    string_field('algorithm'),
  ],
)

Multi-Type Edges

# Edges can connect different table types
tagged_edge = edge_schema(
  'tagged',
  from_table='post',  # Posts are tagged
  to_table='tag',     # With tags
  fields=[
    datetime_field('tagged_at', default='time::now()'),
  ],
)

Choosing Edge Mode

Use TYPE RELATION (default) when:

  • Building new applications with SurrealDB
  • You want SurrealDB to manage edge structure automatically
  • You need constrained edge endpoints (FROM/TO tables)

Use SCHEMAFULL when:

  • Migrating from traditional graph databases
  • Compatibility with existing schemas
  • You need full control over edge field definitions
  • Working with legacy SurrealDB schemas

Indexes

Index Types

from surql.schema.table import index, unique_index, search_index, IndexType

# Standard index
index('name_idx', ['name'], IndexType.STANDARD)

# Unique index
unique_index('email_idx', ['email'])

# Full-text search index
search_index('content_search', ['title', 'description', 'content'])

Composite Indexes

# Index on multiple columns
unique_index('user_post_idx', ['user_id', 'slug'])

# Order matters for range queries
index('date_user_idx', ['created_at', 'user_id'])

Index Examples

from surql.schema.table import table_schema, unique_index, search_index, index

product_table = table_schema(
  'product',
  fields=[
    string_field('sku'),
    string_field('name'),
    string_field('description'),
    float_field('price'),
    string_field('category'),
    datetime_field('created_at', default='time::now()'),
  ],
  indexes=[
    # Unique constraint
    unique_index('sku_idx', ['sku']),

    # Full-text search
    search_index('product_search', ['name', 'description']),

    # Filtering/sorting
    index('category_price_idx', ['category', 'price']),
    index('created_idx', ['created_at']),
  ],
)

Vector Indexes (HNSW)

HNSW (Hierarchical Navigable Small World) indexes enable approximate nearest-neighbor search for vector embeddings. HNSW is the recommended vector index type for SurrealDB 2.x+.

from surql.schema.table import hnsw_index, HnswDistanceType, MTreeVectorType

# Basic HNSW index (defaults: EUCLIDEAN distance, F64 vector type)
hnsw_index('embedding_idx', 'embedding', 1536)

# OpenAI embeddings with cosine similarity
hnsw_index(
  'embedding_idx',
  'embedding',
  1536,
  distance=HnswDistanceType.COSINE,
  vector_type=MTreeVectorType.F32,
)

# With EFC and M tuning parameters
hnsw_index(
  'embedding_idx',
  'embedding',
  3072,
  distance=HnswDistanceType.COSINE,
  vector_type=MTreeVectorType.F32,
  efc=500,   # Exploration factor during construction (default: 150)
  m=16,      # Max bidirectional links per node (default: 12)
)

Distance metrics (HnswDistanceType):

Metric Use case
COSINE Normalized vectors (OpenAI, Cohere, sentence transformers)
EUCLIDEAN Raw feature vectors, spatial data
MANHATTAN Grid-based or sparse vectors
CHEBYSHEV Maximum-dimension deviation
HAMMING Binary or categorical vectors
JACCARD Set similarity
MINKOWSKI Generalized distance
PEARSON Correlation-based similarity

Vector types (MTreeVectorType): F64, F32, I64, I32, I16

Generated SQL:

DEFINE INDEX embedding_idx ON TABLE documents
  COLUMNS embedding HNSW DIMENSION 1536 DIST COSINE TYPE F32 EFC 500 M 16;

Vector Indexes (MTREE)

MTREE indexes are the legacy vector index type. For new projects, prefer HNSW.

from surql.schema.table import mtree_index, MTreeDistanceType, MTreeVectorType

mtree_index(
  'embedding_idx',
  'embedding',
  1536,
  distance=MTreeDistanceType.COSINE,
  vector_type=MTreeVectorType.F32,
)

MTreeDistanceType supports: COSINE, EUCLIDEAN, MANHATTAN, MINKOWSKI.

Events and Triggers

Events are database triggers that execute when conditions are met.

Basic Event

from surql.schema.table import event

email_change_event = event(
  'email_changed',
  '$before.email != $after.email',
  '''
    CREATE audit_log SET
      table = 'user',
      record = $value.id,
      field = 'email',
      old_value = $before.email,
      new_value = $after.email,
      changed_at = time::now()
  '''
)

Event Types

# On INSERT
event(
  'new_user',
  '$event = "CREATE"',
  'CREATE notification SET type = "new_user", user = $value.id',
)

# On UPDATE
event(
  'user_updated',
  '$event = "UPDATE"',
  'UPDATE $value SET updated_at = time::now()',
)

# On DELETE
event(
  'user_deleted',
  '$event = "DELETE"',
  'CREATE audit_log SET action = "delete", user = $before.id',
)

# Conditional
event(
  'verify_email',
  '$before.is_verified = false AND $after.is_verified = true',
  'CREATE email_queue SET type = "welcome", user = $value.id',
)

Complex Event Logic

event(
  'auto_publish',
  '''
    $event = "UPDATE" AND
    $before.status = "draft" AND
    $after.status = "published" AND
    $after.published_at = NONE
  ''',
  '''
    UPDATE $value SET
      published_at = time::now(),
      updated_at = time::now()
  '''
)

Permissions

Define row-level security with permissions.

Basic Permissions

permissions = {
  'select': 'true',  # Anyone can read
  'create': '$auth != NONE',  # Must be authenticated
  'update': '$auth.id = id',  # Can only update own records
  'delete': '$auth.admin = true',  # Only admins can delete
}

user_table = table_schema(
  'user',
  fields=[...],
  permissions=permissions,
)

Complex Permission Rules

post_permissions = {
  # Anyone can read published posts, author can read drafts
  'select': 'published = true OR author = $auth.id',

  # Only authenticated users can create
  'create': '$auth != NONE',

  # Only author can update
  'update': 'author = $auth.id',

  # Author or admin can delete
  'delete': 'author = $auth.id OR $auth.admin = true',
}

Field-Level Permissions

from surql.schema.fields import string_field

# Field with custom permissions
email_field = string_field(
  'email',
  permissions={
    'select': '$auth.id = $parent.id OR $auth.admin = true',
    'update': '$auth.id = $parent.id',
  }
)

Functional Composition

surql emphasizes functional composition for building schemas.

Composing Tables

from surql.schema.table import (
  table_schema,
  with_fields,
  with_indexes,
  with_events,
  with_permissions,
)
from surql.schema.fields import string_field, datetime_field

# Start with base table
base_table = table_schema('user', mode=TableMode.SCHEMAFULL)

# Add fields
table_with_fields = with_fields(
  base_table,
  string_field('username'),
  string_field('email'),
)

# Add indexes
table_with_indexes = with_indexes(
  table_with_fields,
  unique_index('username_idx', ['username']),
  unique_index('email_idx', ['email']),
)

# Add timestamps
final_table = with_fields(
  table_with_indexes,
  datetime_field('created_at', default='time::now()'),
  datetime_field('updated_at', default='time::now()'),
)

Reusable Components

# Define reusable field sets
def timestamp_fields():
  return [
    datetime_field('created_at', default='time::now()', readonly=True),
    datetime_field('updated_at', default='time::now()'),
  ]

def soft_delete_fields():
  return [
    bool_field('is_deleted', default='false'),
    datetime_field('deleted_at', default='NONE'),
  ]

# Compose tables
user_table = table_schema(
  'user',
  fields=[
    string_field('username'),
    string_field('email'),
    *timestamp_fields(),
    *soft_delete_fields(),
  ],
)

Schema Builders

def auditable_table(name: str, fields: list):
  """Create a table with audit fields."""
  return table_schema(
    name,
    mode=TableMode.SCHEMAFULL,
    fields=[
      *fields,
      *timestamp_fields(),
      record_field('created_by', table='user'),
      record_field('updated_by', table='user'),
    ],
  )

# Use the builder
product_table = auditable_table(
  'product',
  [
    string_field('name'),
    float_field('price'),
  ],
)

Best Practices

1. Use Type-Safe Helpers

# Good - Type-safe helpers
string_field('email', assertion='string::is::email($value)')

# Avoid - Raw field definition
field('email', FieldType.STRING, assertion='string::is::email($value)')

2. Validate Data at Schema Level

# Add assertions for data integrity
int_field('age', assertion='$value >= 0 AND $value <= 150')
string_field('status', assertion='$value INSIDE ["active", "inactive", "pending"]')
float_field('price', assertion='$value > 0')

3. Use Readonly for Immutable Fields

# Prevent modification of critical fields
datetime_field('created_at', default='time::now()', readonly=True)
string_field('id', readonly=True)

4. Provide Sensible Defaults

# Make optional fields clear with defaults
array_field('tags', default='[]')
bool_field('is_active', default='true')
string_field('status', default='"pending"')
# Group related fields together
fields = [
  # Identity
  string_field('username'),
  string_field('email'),

  # Profile
  string_field('name.first'),
  string_field('name.last'),
  string_field('bio'),

  # Metadata
  datetime_field('created_at', default='time::now()'),
  datetime_field('updated_at', default='time::now()'),
]

6. Use Meaningful Index Names

# Good - Clear purpose
unique_index('email_unique', ['email'])
index('user_created_idx', ['created_at'])

# Avoid - Unclear names
index('idx1', ['email'])

7. Document Complex Assertions

# Document complex validation
string_field(
  'phone',
  assertion='''
    # US phone number format: (XXX) XXX-XXXX
    string::len($value) = 14 AND
    string::slice($value, 0, 1) = "(" AND
    string::slice($value, 4, 5) = ")"
  '''
)

8. Use Events for Automation

# Auto-update timestamps
event(
  'auto_update_timestamp',
  '$event = "UPDATE"',
  'UPDATE $value SET updated_at = time::now()',
)

# Auto-compute fields
event(
  'compute_full_name',
  '$event = "CREATE" OR $event = "UPDATE"',
  'UPDATE $value SET full_name = string::concat(name.first, " ", name.last)',
)

9. Implement Soft Deletes

# Use soft deletes instead of hard deletes
table_schema(
  'user',
  fields=[
    # ... other fields
    bool_field('is_deleted', default='false'),
    datetime_field('deleted_at', default='NONE'),
  ],
  permissions={
    'select': 'is_deleted = false OR $auth.admin = true',
    'delete': '$auth.admin = true',
  },
  events=[
    event(
      'soft_delete',
      '$event = "DELETE"',
      'UPDATE $value SET is_deleted = true, deleted_at = time::now()',
    ),
  ],
)

10. Version Your Schemas

# Include version in schema
table_schema(
  'user',
  fields=[
    # ... other fields
    int_field('schema_version', default='1', readonly=True),
  ],
)

Complete Examples

E-commerce Product Schema

from surql.schema.fields import *
from surql.schema.table import *

product_schema = table_schema(
  'product',
  mode=TableMode.SCHEMAFULL,
  fields=[
    # Identity
    string_field('sku', assertion='string::len($value) > 0'),
    string_field('name', assertion='string::len($value) > 0'),
    string_field('slug', assertion='string::len($value) > 0'),

    # Details
    string_field('description'),
    array_field('images', default='[]'),
    array_field('tags', default='[]'),

    # Pricing
    decimal_field('price', assertion='$value > 0'),
    decimal_field('cost', assertion='$value >= 0'),
    string_field('currency', default='"USD"'),

    # Inventory
    int_field('stock', default='0', assertion='$value >= 0'),
    bool_field('in_stock', default='true'),
    int_field('low_stock_threshold', default='10'),

    # Organization
    record_field('category', table='category'),
    record_field('brand', table='brand'),

    # Status
    bool_field('is_active', default='true'),
    bool_field('is_featured', default='false'),

    # Timestamps
    datetime_field('created_at', default='time::now()', readonly=True),
    datetime_field('updated_at', default='time::now()'),
  ],
  indexes=[
    unique_index('sku_idx', ['sku']),
    unique_index('slug_idx', ['slug']),
    search_index('product_search', ['name', 'description']),
    index('category_idx', ['category']),
    index('price_idx', ['price']),
  ],
  events=[
    event(
      'update_in_stock',
      '$event = "UPDATE"',
      'UPDATE $value SET in_stock = (stock > 0)',
    ),
  ],
)

Social Media Schema

# User schema
user_schema = table_schema(
  'user',
  mode=TableMode.SCHEMAFULL,
  fields=[
    string_field('username', assertion='string::len($value) >= 3'),
    string_field('email', assertion='string::is::email($value)'),
    string_field('bio', default='""'),
    array_field('interests', default='[]'),
    datetime_field('created_at', default='time::now()'),
  ],
  indexes=[
    unique_index('username_idx', ['username']),
    unique_index('email_idx', ['email']),
  ],
)

# Post schema
post_schema = table_schema(
  'post',
  mode=TableMode.SCHEMAFULL,
  fields=[
    string_field('content'),
    record_field('author', table='user'),
    array_field('media', default='[]'),
    int_field('like_count', default='0'),
    datetime_field('created_at', default='time::now()'),
  ],
  indexes=[
    index('author_created_idx', ['author', 'created_at']),
  ],
)

# Relationships
follows_edge = edge_schema(
  'follows',
  from_table='user',
  to_table='user',
  fields=[
    datetime_field('followed_at', default='time::now()'),
  ],
)

likes_edge = edge_schema(
  'likes',
  from_table='user',
  to_table='post',
  fields=[
    datetime_field('liked_at', default='time::now()'),
  ],
)

Next Steps

  • Learn about Migrations to apply your schemas to the database
  • Explore Query Building to work with your schema data
  • Check out Examples for more complex schema patterns