https://github.com/tinybirdco/tinybird-sdk-python
https://github.com/tinybirdco/tinybird-sdk-python
Last synced: 3 months ago
JSON representation
- Host: GitHub
- URL: https://github.com/tinybirdco/tinybird-sdk-python
- Owner: tinybirdco
- License: mit
- Created: 2022-01-24T13:15:36.000Z (over 4 years ago)
- Default Branch: main
- Last Pushed: 2026-03-23T16:50:29.000Z (3 months ago)
- Last Synced: 2026-03-24T14:30:10.722Z (3 months ago)
- Language: Python
- Size: 244 KB
- Stars: 7
- Watchers: 3
- Forks: 1
- Open Issues: 2
-
Metadata Files:
- Readme: README.md
- Contributing: CONTRIBUTING.md
- License: LICENSE
Awesome Lists containing this project
README
# tinybird-sdk (Python)
> **Note:** This package is experimental. APIs may change between versions.
A Python SDK for defining Tinybird resources with a TypeScript-SDK-like workflow.
Define your datasources, pipes, and queries in Python and sync them directly to Tinybird.
## Installation
```bash
pip install tinybird-sdk
```
## Requirements
- Python `>=3.11`
- Server-side usage only (do not expose Tinybird credentials in browser code)
## Quick Start
### 1. Initialize your project
```bash
tinybird init
```
This creates:
- `tinybird.config.json` - Configuration file
- `lib/datasources.py` - Define your datasources
- `lib/pipes.py` - Define your pipes/endpoints
- `lib/client.py` - Your Tinybird client module
### 2. Configure your token
Create a `.env.local` file:
```env
TINYBIRD_TOKEN=p.your_token_here
```
### 3. Define your datasources
```python
# lib/datasources.py
from tinybird_sdk import define_datasource, t, engine
page_views = define_datasource(
"page_views",
{
"description": "Page view tracking data",
"schema": {
"timestamp": t.date_time(),
"pathname": t.string(),
"session_id": t.string(),
"country": t.string().low_cardinality().nullable(),
},
"engine": engine.merge_tree(
{
"sorting_key": ["pathname", "timestamp"],
}
),
},
)
```
### 4. Define your endpoints
```python
# lib/pipes.py
from tinybird_sdk import define_endpoint, node, p, t
top_pages = define_endpoint(
"top_pages",
{
"description": "Get the most visited pages",
"params": {
"start_date": p.date_time(),
"end_date": p.date_time(),
"limit": p.int32().optional(10),
},
"nodes": [
node(
{
"name": "aggregated",
"sql": """
SELECT pathname, count() AS views
FROM page_views
WHERE timestamp >= {{DateTime(start_date)}}
AND timestamp <= {{DateTime(end_date)}}
GROUP BY pathname
ORDER BY views DESC
LIMIT {{Int32(limit, 10)}}
""",
}
)
],
"output": {
"pathname": t.string(),
"views": t.uint64(),
},
},
)
```
### 5. Create your client
```python
# lib/client.py
from tinybird_sdk import Tinybird
from .datasources import page_views
from .pipes import top_pages
tinybird = Tinybird(
{
"datasources": {"page_views": page_views},
"pipes": {"top_pages": top_pages},
}
)
__all__ = ["tinybird", "page_views", "top_pages"]
```
### 6. Optional: use a stable local import path
In larger applications, keep a single module (for example `lib/client.py`) and import from there:
```python
from lib.client import tinybird
```
### 7. Start development
```bash
tinybird dev
```
This watches your schema files and syncs changes to Tinybird.
### 8. Use the client
```python
from lib.client import tinybird
# Ingest one row
tinybird.page_views.ingest(
{
"timestamp": "2024-01-15 10:30:00",
"pathname": "/home",
"session_id": "abc123",
"country": "US",
}
)
# Query endpoint
result = tinybird.top_pages.query(
{
"start_date": "2024-01-01 00:00:00",
"end_date": "2024-01-31 23:59:59",
"limit": 5,
}
)
```
### 9. Manage datasource rows
```python
from lib.client import tinybird
# Datasource accessors support: ingest, append, replace, delete, truncate
tinybird.page_views.ingest(
{
"timestamp": "2024-01-15 10:30:00",
"pathname": "/pricing",
"session_id": "session_123",
"country": "US",
}
)
tinybird.page_views.append(
{
"url": "https://example.com/page_views.csv",
}
)
tinybird.page_views.replace(
{
"url": "https://example.com/page_views_full_snapshot.csv",
}
)
tinybird.page_views.delete(
{
"delete_condition": "country = 'XX'",
}
)
tinybird.page_views.delete(
{
"delete_condition": "country = 'XX'",
"dry_run": True,
}
)
tinybird.page_views.truncate()
```
## Public Tinybird API (Optional)
If you want a low-level API wrapper decoupled from the high-level client layer,
use `create_tinybird_api()` directly with `base_url` and `token`:
```python
from tinybird_sdk import create_tinybird_api
api = create_tinybird_api(
{
"base_url": "https://api.tinybird.co",
"token": "p.your_token",
}
)
# Query endpoint pipe
top_pages = api.query(
"top_pages",
{
"start_date": "2024-01-01",
"end_date": "2024-01-31",
"limit": 5,
},
)
# Ingest one row
api.ingest(
"events",
{
"timestamp": "2024-01-15 10:30:00",
"event_name": "page_view",
"pathname": "/home",
},
)
# Ingest retry behavior (disabled by default):
# - 429 retries use Retry-After / X-RateLimit-Reset headers.
# - 503 retries use SDK default exponential backoff.
api.ingest(
"events",
{
"timestamp": "2024-01-15 10:31:00",
"event_name": "button_click",
"pathname": "/pricing",
},
{
"max_retries": 3,
},
)
# Import rows from URL/file
api.append_datasource(
"events",
{
"url": "https://example.com/events.csv",
},
)
# Delete rows matching a SQL condition
api.delete_datasource(
"events",
{
"delete_condition": "event_name = 'test'",
},
)
# Delete dry run
api.delete_datasource(
"events",
{
"delete_condition": "event_name = 'test'",
"dry_run": True,
},
)
# Truncate datasource
api.truncate_datasource("events")
# Execute raw SQL
sql_result = api.sql("SELECT count() AS total FROM events")
# Optional per-request token override
workspace_response = api.request_json(
"/v1/workspace",
token="p.branch_or_jwt_token",
)
```
This Tinybird API is standalone and can be used without `create_client()` or `Tinybird(...)`.
## JWT Token Creation
Create short-lived JWT tokens for secure scoped access to Tinybird resources.
```python
from datetime import datetime, timedelta, timezone
from tinybird_sdk import create_client
client = create_client(
{
"base_url": "https://api.tinybird.co",
"token": "p.your_admin_token",
}
)
result = client.tokens.create_jwt(
{
"name": "user_123_session",
"expires_at": datetime.now(tz=timezone.utc) + timedelta(hours=1),
"scopes": [
{
"type": "PIPES:READ",
"resource": "user_dashboard",
"fixed_params": {"user_id": 123},
}
],
"limits": {"rps": 10},
}
)
jwt_token = result["token"]
```
### Scope Types
| Scope | Description |
|-------|-------------|
| `PIPES:READ` | Read access to a specific pipe endpoint |
| `DATASOURCES:READ` | Read access to a datasource (with optional `filter`) |
| `DATASOURCES:APPEND` | Append access to a datasource |
### Scope Options
- **`fixed_params`**: For pipes, embed parameters that cannot be overridden by the caller.
- **`filter`**: For datasources, append a SQL WHERE clause (for example, `"org_id = 'acme'"`).
## CLI Commands
This package installs `tinybird` as a runtime dependency.
`tinybird generate` is handled by this SDK; other commands are delegated to the Tinybird CLI.
### `tinybird init`
Initialize a new Tinybird project:
```bash
tinybird init
tinybird init --force
tinybird init --skip-login
```
### `tinybird migrate`
Migrate local Tinybird datafiles (`.datasource`, `.pipe`, `.connection`) into a Python definitions file.
```bash
tinybird migrate "tinybird/**/*.datasource" "tinybird/**/*.pipe" "tinybird/**/*.connection"
tinybird migrate tinybird/legacy --out ./tinybird.migration.py
tinybird migrate tinybird --dry-run
```
### `tinybird dev`
```bash
tinybird dev
tinybird dev --local
tinybird dev --branch
```
### `tinybird build`
```bash
tinybird build
tinybird build --dry-run
tinybird build --local
tinybird build --branch
```
### `tinybird deploy`
```bash
tinybird deploy
tinybird deploy --check
tinybird deploy --allow-destructive-operations
```
### `tinybird pull`
```bash
tinybird pull
tinybird pull --output-dir ./tinybird-datafiles
tinybird pull --force
```
### `tinybird login`
```bash
tinybird login
```
### `tinybird branch`
```bash
tinybird branch list
tinybird branch status
tinybird branch delete
```
### `tinybird info`
```bash
tinybird info
tinybird info --json
```
## Configuration
Create a `tinybird.config.json` (or `tinybird.config.py` / `tinybird_config.py` for dynamic logic) in your project root:
```json
{
"include": [
"lib/*.py",
"tinybird/**/*.datasource",
"tinybird/**/*.pipe",
"tinybird/**/*.connection"
],
"token": "${TINYBIRD_TOKEN}",
"base_url": "https://api.tinybird.co",
"dev_mode": "branch"
}
```
You can mix Python files with raw `.datasource`, `.pipe`, and `.connection` files for incremental migration.
`include` supports glob patterns.
### Config File Formats
Supported config files (search order):
| File | Description |
|------|-------------|
| `tinybird.config.py` | Python config with dynamic logic |
| `tinybird_config.py` | Python config alias |
| `tinybird.config.json` | JSON config (default for new projects) |
| `tinybird.json` | Legacy JSON config |
For Python configs, export one of:
- `config` dict
- `CONFIG` dict
- `default` dict
- `get_config()` returning a dict
Example:
```python
# tinybird.config.py
config = {
"include": ["lib/*.py"],
"token": "${TINYBIRD_TOKEN}",
"base_url": "https://api.tinybird.co",
"dev_mode": "branch",
}
```
### Configuration Options
| Option | Type | Default | Description |
|--------|------|---------|-------------|
| `include` | `list[str]` | *required* | File paths or glob patterns for Python and raw datafiles |
| `token` | `str` | *required* | API token; supports `${ENV_VAR}` interpolation. If missing, SDK falls back to `TINYBIRD_TOKEN`, then `.tinyb` |
| `base_url` | `str` | `"https://api.tinybird.co"` | Tinybird API URL |
| `dev_mode` | `"branch"` \| `"local"` | `"branch"` | Development mode |
If `base_url` is omitted, SDK resolves it from `TINYBIRD_URL`, then `TINYBIRD_HOST`, then `.tinyb` (`host`), and finally defaults to `https://api.tinybird.co`.
### Local Development Mode
Use a local Tinybird container for development without affecting cloud workspaces:
1. Start the local container:
```bash
docker run -d -p 7181:7181 --name tinybird-local tinybirdco/tinybird-local:latest
```
2. Configure your project:
```json
{
"dev_mode": "local"
}
```
Or use CLI flag:
```bash
tinybird dev --local
```
## Defining Resources
### Connections
```python
from tinybird_sdk import define_gcs_connection, define_kafka_connection, define_s3_connection, secret
events_kafka = define_kafka_connection(
"events_kafka",
{
"bootstrap_servers": "kafka.example.com:9092",
"security_protocol": "SASL_SSL",
"sasl_mechanism": "PLAIN",
"key": secret("KAFKA_KEY"),
"secret": secret("KAFKA_SECRET"),
},
)
landing_s3 = define_s3_connection(
"landing_s3",
{
"region": "us-east-1",
"arn": "arn:aws:iam::123456789012:role/tinybird-s3-access",
},
)
landing_gcs = define_gcs_connection(
"landing_gcs",
{
"service_account_credentials_json": secret("GCS_SERVICE_ACCOUNT_CREDENTIALS_JSON"),
},
)
```
### Datasources
```python
from tinybird_sdk import define_datasource, engine, t
events = define_datasource(
"events",
{
"description": "Event tracking data",
"schema": {
"timestamp": t.date_time(),
"event_name": t.string().low_cardinality(),
"user_id": t.string().nullable(),
"properties": t.string(),
},
"engine": engine.merge_tree(
{
"sorting_key": ["event_name", "timestamp"],
"partition_key": "toYYYYMM(timestamp)",
"ttl": "timestamp + INTERVAL 90 DAY",
}
),
},
)
```
### Endpoints (API pipes)
```python
from tinybird_sdk import define_endpoint, node, p, t
top_events = define_endpoint(
"top_events",
{
"description": "Get the most frequent events",
"params": {
"start_date": p.date_time(),
"end_date": p.date_time(),
"limit": p.int32().optional(10),
},
"nodes": [
node(
{
"name": "aggregated",
"sql": """
SELECT event_name, count() AS event_count
FROM events
WHERE timestamp >= {{DateTime(start_date)}}
AND timestamp <= {{DateTime(end_date)}}
GROUP BY event_name
ORDER BY event_count DESC
LIMIT {{Int32(limit, 10)}}
""",
}
)
],
"output": {
"event_name": t.string(),
"event_count": t.uint64(),
},
},
)
```
### Internal Pipes (not exposed as API)
```python
from tinybird_sdk import define_pipe, node, p
filtered_events = define_pipe(
"filtered_events",
{
"description": "Filter events by date range",
"params": {
"start_date": p.date_time(),
"end_date": p.date_time(),
},
"nodes": [
node(
{
"name": "filtered",
"sql": """
SELECT * FROM events
WHERE timestamp >= {{DateTime(start_date)}}
AND timestamp <= {{DateTime(end_date)}}
""",
}
)
],
},
)
```
### Materialized Views
```python
from tinybird_sdk import define_datasource, define_materialized_view, engine, node, t
daily_stats = define_datasource(
"daily_stats",
{
"schema": {
"date": t.date(),
"pathname": t.string(),
"views": t.simple_aggregate_function("sum", t.uint64()),
"unique_sessions": t.aggregate_function("uniq", t.string()),
},
"engine": engine.aggregating_merge_tree({"sorting_key": ["date", "pathname"]}),
},
)
daily_stats_mv = define_materialized_view(
"daily_stats_mv",
{
"datasource": daily_stats,
"nodes": [
node(
{
"name": "aggregate",
"sql": """
SELECT
toDate(timestamp) AS date,
pathname,
count() AS views,
uniqState(session_id) AS unique_sessions
FROM page_views
GROUP BY date, pathname
""",
}
)
],
},
)
```
### Copy Pipes
```python
from tinybird_sdk import define_copy_pipe, node
# Scheduled copy pipe
daily_snapshot = define_copy_pipe(
"daily_snapshot",
{
"datasource": events,
"copy_schedule": "0 0 * * *",
"copy_mode": "append",
"nodes": [
node(
{
"name": "snapshot",
"sql": """
SELECT today() AS snapshot_date, event_name, count() AS events
FROM events
WHERE toDate(timestamp) = today() - 1
GROUP BY event_name
""",
}
)
],
},
)
# On-demand copy pipe
manual_report = define_copy_pipe(
"manual_report",
{
"datasource": events,
"copy_schedule": "@on-demand",
"copy_mode": "replace",
"nodes": [
node(
{
"name": "report",
"sql": "SELECT * FROM events WHERE timestamp >= now() - interval 7 day",
}
)
],
},
)
```
### Sink Pipes
Use sink pipes to publish query results to external systems.
The SDK supports Kafka and S3 sinks.
```python
from tinybird_sdk import define_sink_pipe, node
# Kafka sink
kafka_events_sink = define_sink_pipe(
"kafka_events_sink",
{
"sink": {
"connection": events_kafka,
"topic": "events_export",
"schedule": "@on-demand",
},
"nodes": [
node(
{
"name": "publish",
"sql": "SELECT timestamp, payload FROM kafka_events",
}
)
],
},
)
# S3 sink
s3_events_sink = define_sink_pipe(
"s3_events_sink",
{
"sink": {
"connection": landing_s3,
"bucket_uri": "s3://my-bucket/exports/",
"file_template": "events_{date}",
"format": "csv",
"schedule": "@once",
"strategy": "create_new",
"compression": "gzip",
},
"nodes": [
node(
{
"name": "export",
"sql": "SELECT timestamp, session_id FROM s3_landing",
}
)
],
},
)
```
### Static Tokens
```python
from tinybird_sdk import define_datasource, define_endpoint, define_token, node, t
app_token = define_token("app_read")
ingest_token = define_token("ingest_token")
events = define_datasource(
"events",
{
"schema": {
"timestamp": t.date_time(),
"event_name": t.string(),
},
"tokens": [
{"token": app_token, "scope": "READ"},
{"token": ingest_token, "scope": "APPEND"},
],
},
)
top_events = define_endpoint(
"top_events",
{
"nodes": [node({"name": "endpoint", "sql": "SELECT * FROM events LIMIT 10"})],
"output": {"timestamp": t.date_time(), "event_name": t.string()},
"tokens": [{"token": app_token, "scope": "READ"}],
},
)
```
## Type Validators
Use `t.*` to define column types:
```python
from tinybird_sdk import t
schema = {
# Strings
"name": t.string(),
"id": t.uuid(),
"code": t.fixed_string(3),
# Numbers
"count": t.int32(),
"amount": t.float64(),
"big_number": t.uint64(),
"price": t.decimal(10, 2),
# Date/Time
"created_at": t.date_time(),
"updated_at": t.date_time64(3),
"birth_date": t.date(),
# Boolean
"is_active": t.bool(),
# Complex types
"tags": t.array(t.string()),
"metadata": t.map(t.string(), t.string()),
# Aggregate functions
"total": t.simple_aggregate_function("sum", t.uint64()),
"unique_users": t.aggregate_function("uniq", t.string()),
# Modifiers
"optional_field": t.string().nullable(),
"category": t.string().low_cardinality(),
"status": t.string().default("pending"),
}
```
## Parameter Validators
Use `p.*` to define query parameters:
```python
from tinybird_sdk import p
params = {
"start_date": p.date_time(),
"user_id": p.string(),
"limit": p.int32().optional(10),
"offset": p.int32().optional(0),
"status": p.string().optional("active").describe("Filter by status"),
}
```
## Engine Configurations
```python
from tinybird_sdk import engine
engine.merge_tree(
{
"sorting_key": ["user_id", "timestamp"],
"partition_key": "toYYYYMM(timestamp)",
"ttl": "timestamp + INTERVAL 90 DAY",
}
)
engine.replacing_merge_tree(
{
"sorting_key": ["id"],
"ver": "updated_at",
}
)
engine.summing_merge_tree(
{
"sorting_key": ["date", "category"],
"columns": ["count", "total"],
}
)
engine.aggregating_merge_tree(
{
"sorting_key": ["date"],
}
)
```
## Python App Integration
For Python web apps (FastAPI, Django, Flask), keep Tinybird definitions and client in a dedicated module and import that module from your app services.
The CLI automatically loads `.env.local` and `.env` files in project root when resolving configuration.
## Schema Inference Helpers
The `tinybird_sdk.infer` module can inspect datasource and pipe definitions:
```python
from tinybird_sdk.infer import infer_output_schema, infer_params_schema, infer_row_schema
row_schema = infer_row_schema(page_views)
params_schema = infer_params_schema(top_pages)
output_schema = infer_output_schema(top_pages)
```
## License
MIT