diff --git a/dbt tutorials/fabric-medallion-dbt-tutorial/README.md b/dbt tutorials/fabric-medallion-dbt-tutorial/README.md new file mode 100644 index 0000000..54a667c --- /dev/null +++ b/dbt tutorials/fabric-medallion-dbt-tutorial/README.md @@ -0,0 +1,40 @@ +# Build a medallion architecture in Microsoft Fabric using dbt (Preview) + +## Overview +This repository supports the Microsoft Fabric article on building a medallion architecture by using dbt. + +It contains the sample data and dbt project used to demonstrate an end-to-end flow from raw data ingestion to curated and reporting-ready outputs in Fabric. + +GitHub is used as a convenient sample source. The same architecture can ingest data from any source supported by Copy Job. + +## Architecture +GitHub → Copy Job → Lakehouse (Bronze) → dbt (Silver, Gold) → Warehouse + +- Copy Job ingests data into Bronze tables +- dbt transforms data into Silver and Gold models +- Warehouse stores curated outputs +- A Fabric pipeline orchestrates the workflow + +## Repository contents +- Sample CSV files (`customers.csv`, `products.csv`, `orders.csv`) +- dbt project for building Silver and Gold models + +## dbt models + +### Silver +- `silver_customers` standardizes country values +- `silver_products` cleans and converts price data +- `silver_orders` removes duplicates and standardizes quantity + +### Gold +- `customer_sales_summary` aggregates spend and quantity by customer +- `product_performance` aggregates units sold by product +- `daily_sales` aggregates quantity by day + +## Pipeline +Execution sequence: **Copy Jobs → Silver models → Gold models** + +## Tutorial +Microsoft Learn article: **[Add tutorial link here]** + +> Replace this link after the article is published. diff --git a/dbt tutorials/fabric-medallion-dbt-tutorial/data/customers.csv b/dbt tutorials/fabric-medallion-dbt-tutorial/data/customers.csv new file mode 100644 index 0000000..a73048a --- /dev/null +++ b/dbt tutorials/fabric-medallion-dbt-tutorial/data/customers.csv @@ -0,0 +1,12 @@ +customer_id,customer_name,country +1,John Doe,USA +2,Jane Smith, +3,Ravi Kumar,india +4,Anna Lee,UK +5,John Doe,USA +6,Maria Garcia,spain +7,Li Wei,china +8,Ahmed Khan,uae +9,Emily Clark,Canada +10,Robert Brown, +11,Sophia Davis,USa diff --git a/dbt tutorials/fabric-medallion-dbt-tutorial/data/orders.csv b/dbt tutorials/fabric-medallion-dbt-tutorial/data/orders.csv new file mode 100644 index 0000000..594c336 --- /dev/null +++ b/dbt tutorials/fabric-medallion-dbt-tutorial/data/orders.csv @@ -0,0 +1,12 @@ +order_id,customer_id,product_id,quantity,order_date +1001,1,101,2,2024-01-01 +1002,2,102,1,2024-01-02 +1003,3,103,3,2024-01-03 +1003,3,103,3,2024-01-03 +1004,4,104,NULL,2024-01-04 +1005,5,105,2,2024-01-05 +1006,6,106,1,2024-01-06 +1007,7,107,4,2024-01-07 +1008,8,108,1,2024-01-08 +1009,9,109,2,2024-01-09 +1010,10,110,1,2024-01-10 diff --git a/dbt tutorials/fabric-medallion-dbt-tutorial/data/products.csv b/dbt tutorials/fabric-medallion-dbt-tutorial/data/products.csv new file mode 100644 index 0000000..898f040 --- /dev/null +++ b/dbt tutorials/fabric-medallion-dbt-tutorial/data/products.csv @@ -0,0 +1,11 @@ +product_id,product_name,category,price +101,Smart Shirt,Apparel,49.99 +102,Fitness Tracker,Accessories,"129.99" +103,Yoga Pants,Apparel,NaN +104,Smart Shoes,Footwear,199.99 +105,Gym Gloves,Accessories,15.5 +106,Water Bottle,Accessories,NULL +107,Running Shorts,Apparel,39.00 +108,Smart Cap,Apparel,59.99 +109,Fitness Band,Accessories,89.99 +110,Training Mat,Equipment,25.75 diff --git a/dbt tutorials/fabric-medallion-dbt-tutorial/dbt_project.yml b/dbt tutorials/fabric-medallion-dbt-tutorial/dbt_project.yml new file mode 100644 index 0000000..4d8bedb --- /dev/null +++ b/dbt tutorials/fabric-medallion-dbt-tutorial/dbt_project.yml @@ -0,0 +1,16 @@ +name: zava_medallion_dbt +version: 1.0 +config-version: 2 + +profile: zava_profile + +model-paths: ["models"] + +models: + zava_medallion_dbt: + silver: + materialized: table + tags: ["silver"] + gold: + materialized: table + tags: ["gold"] diff --git a/dbt tutorials/fabric-medallion-dbt-tutorial/models/gold/customer_sales_summary.sql b/dbt tutorials/fabric-medallion-dbt-tutorial/models/gold/customer_sales_summary.sql new file mode 100644 index 0000000..278f5d4 --- /dev/null +++ b/dbt tutorials/fabric-medallion-dbt-tutorial/models/gold/customer_sales_summary.sql @@ -0,0 +1,13 @@ +{{ config(materialized='table', tags=['gold']) }} + +SELECT + c.customer_id, + c.customer_name, + SUM(o.quantity) AS total_items, + SUM(o.quantity * p.price) AS total_spent +FROM {{ ref('silver_customers') }} c +JOIN {{ ref('silver_orders') }} o + ON c.customer_id = o.customer_id +JOIN {{ ref('silver_products') }} p + ON o.product_id = p.product_id +GROUP BY c.customer_id, c.customer_name diff --git a/dbt tutorials/fabric-medallion-dbt-tutorial/models/gold/daily_sales.sql b/dbt tutorials/fabric-medallion-dbt-tutorial/models/gold/daily_sales.sql new file mode 100644 index 0000000..1031c4e --- /dev/null +++ b/dbt tutorials/fabric-medallion-dbt-tutorial/models/gold/daily_sales.sql @@ -0,0 +1,7 @@ +{{ config(materialized='table', tags=['gold']) }} + +SELECT + order_date, + SUM(quantity) AS total_quantity +FROM {{ ref('silver_orders') }} +GROUP BY order_date diff --git a/dbt tutorials/fabric-medallion-dbt-tutorial/models/gold/product_performance.sql b/dbt tutorials/fabric-medallion-dbt-tutorial/models/gold/product_performance.sql new file mode 100644 index 0000000..a973510 --- /dev/null +++ b/dbt tutorials/fabric-medallion-dbt-tutorial/models/gold/product_performance.sql @@ -0,0 +1,9 @@ +{{ config(materialized='table', tags=['gold']) }} + +SELECT + p.product_name, + SUM(o.quantity) AS total_sold +FROM {{ ref('silver_products') }} p +JOIN {{ ref('silver_orders') }} o + ON p.product_id = o.product_id +GROUP BY p.product_name diff --git a/dbt tutorials/fabric-medallion-dbt-tutorial/models/silver/silver_customers.sql b/dbt tutorials/fabric-medallion-dbt-tutorial/models/silver/silver_customers.sql new file mode 100644 index 0000000..6ef3a95 --- /dev/null +++ b/dbt tutorials/fabric-medallion-dbt-tutorial/models/silver/silver_customers.sql @@ -0,0 +1,11 @@ +{{ config(materialized='table', tags=['silver']) }} + +SELECT + customer_id, + customer_name, + CASE + WHEN country IS NULL OR LTRIM(RTRIM(country)) = '' + THEN 'UNKNOWN' + ELSE UPPER(country) + END AS country +FROM {{ source('bronze', 'customers') }} diff --git a/dbt tutorials/fabric-medallion-dbt-tutorial/models/silver/silver_orders.sql b/dbt tutorials/fabric-medallion-dbt-tutorial/models/silver/silver_orders.sql new file mode 100644 index 0000000..3e0a255 --- /dev/null +++ b/dbt tutorials/fabric-medallion-dbt-tutorial/models/silver/silver_orders.sql @@ -0,0 +1,23 @@ +{{ config(materialized='table', tags=['silver']) }} + +WITH ranked AS ( + + SELECT + *, + ROW_NUMBER() OVER ( + PARTITION BY order_id + ORDER BY order_date DESC + ) AS rn + FROM {{ source('bronze', 'orders') }} + +) + +SELECT + order_id, + customer_id, + product_id, + TRY_CAST(quantity AS INT) AS quantity, + order_date +FROM ranked +WHERE rn = 1 +AND TRY_CAST(quantity AS INT) IS NOT NULL diff --git a/dbt tutorials/fabric-medallion-dbt-tutorial/models/silver/silver_products.sql b/dbt tutorials/fabric-medallion-dbt-tutorial/models/silver/silver_products.sql new file mode 100644 index 0000000..a44dadd --- /dev/null +++ b/dbt tutorials/fabric-medallion-dbt-tutorial/models/silver/silver_products.sql @@ -0,0 +1,9 @@ +{{ config(materialized='table', tags=['silver']) }} + +SELECT + product_id, + product_name, + category, + TRY_CAST(price AS DECIMAL(10,2)) AS price +FROM {{ source('bronze', 'products') }} +WHERE TRY_CAST(price AS DECIMAL(10,2)) IS NOT NULL diff --git a/dbt tutorials/fabric-medallion-dbt-tutorial/models/sources.yml b/dbt tutorials/fabric-medallion-dbt-tutorial/models/sources.yml new file mode 100644 index 0000000..14b395b --- /dev/null +++ b/dbt tutorials/fabric-medallion-dbt-tutorial/models/sources.yml @@ -0,0 +1,13 @@ +version: 2 + +sources: + - name: bronze + database: ZavaBronzeLakehouse + schema: dbo + tables: + - name: customers + identifier: bronze_customers + - name: products + identifier: bronze_products + - name: orders + identifier: bronze_orders diff --git a/dbt tutorials/fabric-medallion-dbt-tutorial/packages.yml b/dbt tutorials/fabric-medallion-dbt-tutorial/packages.yml new file mode 100644 index 0000000..1600223 --- /dev/null +++ b/dbt tutorials/fabric-medallion-dbt-tutorial/packages.yml @@ -0,0 +1,3 @@ +packages: + - package: dbt-labs/dbt_utils + version: 1.3.3