Example
{
"clusters": [
{ "name": "ingestion" },
{ "name": "warehouse" },
{ "name": "serving", "parent": "warehouse" }
],
"pipelines": [
{
"name": "ingest-events",
"input_sources": ["raw_clickstream"],
"output_sources": ["events"],
"cluster": "ingestion",
"schedule": "Every 5 min",
"duration": 2,
"tags": ["streaming"],
"links": { "airflow": "https://airflow.co/dags/ingest_events" }
},
{
"name": "build-profiles",
"input_sources": ["events", "raw_users"],
"output_sources": ["user_profiles"],
"cluster": "warehouse",
"schedule": "Daily 02:00",
"duration": 45,
"cost": 12.50,
"owner": "data-team@co.com",
"upstream_pipelines": ["ingest-events"],
"tags": ["daily", "core"]
},
{
"name": "export-crm",
"input_sources": ["user_profiles"],
"output_sources": ["salesforce_sync"],
"cluster": "serving",
"group": "exports",
"upstream_pipelines": ["build-profiles"]
},
{
"name": "export-analytics",
"input_sources": ["user_profiles"],
"output_sources": ["amplitude_sync"],
"cluster": "serving",
"group": "exports",
"upstream_pipelines": ["build-profiles"]
}
],
"datasources": [
{
"name": "raw_users",
"type": "postgres",
"owner": "platform@co.com",
"tags": ["pii"],
"metadata": { "schema": "public", "table": "users" },
"attributes": [
{ "name": "id" },
{ "name": "email" },
{ "name": "created_at" }
]
},
{
"name": "raw_clickstream",
"type": "kafka",
"metadata": { "topic": "clicks.prod" }
},
{
"name": "events",
"type": "s3",
"attributes": [
{ "name": "event_id" },
{ "name": "user_id", "from": "raw_users::id" },
{ "name": "event_type" },
{ "name": "ts" }
]
},
{
"name": "user_profiles",
"type": "snowflake",
"attributes": [
{ "name": "user_id", "from": "raw_users::id" },
{ "name": "email", "from": "raw_users::email" },
{ "name": "event_count", "from": "events::event_id" },
{ "name": "last_seen", "from": "events::ts" },
{ "name": "tenure_days", "from": ["raw_users::created_at", "events::ts"] }
]
}
]
}