10BC0 Fix/sources yml dbt2 compatibility by Derrick-Ryan-Giggs · Pull Request #816 · DataTalksClub/data-engineering-zoomcamp · GitHub
[go: up one dir, main page]

Skip to content
3 changes: 2 additions & 1 deletion 04-analytics-engineering/taxi_rides_ny/.gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
target/
dbt_packages/
logs/
profiles.yml
profiles.yml
.user.yml

# Data files for DuckDB
Expand Down Expand Up @@ -90,3 +90,4 @@ google-*.json
.env.local
.env.*.local
*.env
dbt_internal_packages/
41 changes: 26 additions & 15 deletions 04-analytics-engineering/taxi_rides_ny/models/staging/sources.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ sources:
description: Raw taxi trip data from NYC TLC
database: |
{%- if target.type == 'bigquery' -%}
{{ env_var('GCP_PROJECT_ID', 'please-add-your-gcp-project-id-here') }}
{{ env_var('GCP_PROJECT_ID', 'dtc-de-course-484520') }}
{%- else -%}
taxi_rides_ny
{%- endif -%}
Expand All @@ -13,16 +13,12 @@ sources:
{%- else -%}
prod
{%- endif -%}
freshness:
warn_after: {count: 24, period: hour}
error_after: {count: 48, period: hour}
tables:
- name: green_tripdata
description: Raw green taxi trip records
loaded_at_field: lpep_pickup_datetime
columns:
- name: vendorid
description: "Taxi technology provider (1 = Creative Mobile Technologies, 2 = VeriFone Inc.) - Note: Raw data may contain nulls, filtered in staging"
description: "Taxi technology provider"
- name: lpep_pickup_datetime
description: Date and time when the meter was engaged
- name: lpep_dropoff_datetime
Expand All @@ -36,36 +32,35 @@ sources:
- name: dolocationid
description: TLC Taxi Zone where the meter was disengaged
- name: ratecodeid
description: Rate code (1=Standard, 2=JFK, 3=Newark, 4=Nassau/Westchester, 5=Negotiated, 6=Group)
description: Rate code
- name: store_and_fwd_flag
description: Trip record held in vehicle memory (Y/N)
- name: payment_type
description: Payment method (1=Credit card, 2=Cash, 3=No charge, 4=Dispute, 5=Unknown, 6=Voided)
description: Payment method
- name: fare_amount
description: Time and distance fare
- name: extra
description: Miscellaneous extras and surcharges
- name: mta_tax
description: MTA tax
- name: tip_amount
description: Tip amount (credit card only)
description: Tip amount
- name: tolls_amount
description: Total tolls paid
- name: improvement_surcharge
description: Improvement surcharge
- name: total_amount
description: Total amount charged
- name: trip_type
description: Trip type (1=Street-hail, 2=Dispatch)
description: Trip type
- name: ehail_fee
description: E-hail fee

- name: yellow_tripdata
description: Raw yellow taxi trip records
loaded_at_field: tpep_pickup_datetime
columns:
- name: vendorid
description: "Taxi technology provider (1 = Creative Mobile Technologies, 2 = VeriFone Inc.) - Note: Raw data may contain nulls, filtered in staging"
description: "Taxi technology provider"
- name: tpep_pickup_datetime
description: Date and time when the meter was engaged
- name: tpep_dropoff_datetime
Expand All @@ -79,22 +74,38 @@ sources:
- name: dolocationid
description: TLC Taxi Zone where the meter was disengaged
- name: ratecodeid
description: Rate code (1=Standard, 2=JFK, 3=Newark, 4=Nassau/Westchester, 5=Negotiated, 6=Group)
description: Rate code
- name: store_and_fwd_flag
description: Trip record held in vehicle memory (Y/N)
- name: payment_type
description: Payment method (1=Credit card, 2=Cash, 3=No charge, 4=Dispute, 5=Unknown, 6=Voided)
description: Payment method
- name: fare_amount
description: Time and distance fare
- name: extra
description: Miscellaneous extras and surcharges
- name: mta_tax
description: MTA tax
- name: tip_amount
description: Tip amount (credit card only)
description: Tip amount
- name: tolls_amount
description: Total tolls paid
- name: improvement_surcharge
description: Improvement surcharge
- name: total_amount
description: Total amount charged

- name: fhv_tripdata
description: Raw FHV trip records 2019
columns:
- name: dispatching_base_num
description: TLC Base License Number
- name: pickup_datetime
description: Date and time of pickup
- name: dropOff_datetime
description: Date and time of dropoff
- name: PUlocationID
description: TLC Taxi Zone where trip started
- name: DOlocationID
description: TLC Taxi Zone where trip ended
- name: SR_Flag
description: Shared ride flag
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
with source as (
select * from {{ source('raw', 'fhv_tripdata') }}
),

renamed as (
select
802E dispatching_base_num,
cast(pickup_datetime as timestamp) as pickup_datetime,
cast(dropOff_datetime as timestamp) as dropoff_datetime,
cast(PUlocationID as integer) as pickup_location_id,
cast(DOlocationID as integer) as dropoff_location_id,
SR_Flag as sr_flag
from source
where dispatching_base_num is not null
and pickup_datetime >= '2019-01-01'
and pickup_datetime < '2020-01-01'
)

select * from renamed
Original file line number Diff line number Diff line change
Expand Up @@ -36,8 +36,11 @@ renamed as (
)

select * from renamed
-- Filter to 2019-2020 data only (homework requirement)
where pickup_datetime >= '2019-01-01'
and pickup_datetime < '2021-01-01'

-- Sample records for dev environment using deterministic date filter
-- Additional dev sampling
{% if target.name == 'dev' %}
where pickup_datetime >= '2019-01-01' and pickup_datetime < '2019-02-01'
{% endif %}
and pickup_datetime < '2019-02-01'
{% endif %}
Original file line number Diff line number Diff line change
Expand Up @@ -35,8 +35,11 @@ renamed as (
)

select * from renamed
-- Filter to 2019-2020 data only (homework requirement)
where pickup_datetime >= '2019-01-01'
and pickup_datetime < '2021-01-01'

-- Sample records for dev environment using deterministic date filter
-- Additional dev sampling
{% if target.name == 'dev' %}
where pickup_datetime >= '2019-01-01' and pickup_datetime < '2019-02-01'
{% endif %}
and pickup_datetime < '2019-02-01'
{% endif %}
14 changes: 7 additions & 7 deletions 04-analytics-engineering/taxi_rides_ny/package-lock.yml
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
packages:
- name: dbt_utils
package: dbt-labs/dbt_utils
version: 1.3.3
- name: codegen
package: dbt-labs/codegen
version: 0.14.0
sha1_hash: 01f31e0d658d76121f50e62b998342ebf138df11
- package: dbt-labs/codegen
name: codegen
version: 0.14.0
- package: dbt-labs/dbt_utils
name: dbt_utils
version: 1.3.3
sha1_hash: 41a9a95a7d1e8d4dff67de764f3b1b8e9094807c
0