Τι πρέπει να γνωρίζει ένας μηχανικός δεδομένων σχετικά με τη διχρονική μοντελοποίηση – Win Vector LLC

Τι πρέπει να γνωρίζει ένας μηχανικός δεδομένων σχετικά με τη διχρονική μοντελοποίηση – Win Vector LLC

März 4, 2023 0 Von admin
-- data_algebra SQL https://github.com/WinVector/data_algebra
--  dialect: SQLiteModel 1.4.3
--       string quote: '
--   identifier quote: "
WITH
 "project_1" AS (
  SELECT  -- .project({ }, group_by=['target_date', 'as_of_date'])
   "target_date" ,
   "as_of_date"
  FROM
   "d_observations"
  GROUP BY
   "target_date" ,
   "as_of_date"
 ) ,
 "extend_2" AS (
  SELECT  -- .extend({ 'reservation_count': '0'})
   "target_date" ,
   "as_of_date" ,
   0 AS "reservation_count"
  FROM
   "project_1"
 ) ,
 "map_columns_3" AS (
  SELECT  -- .map_columns({'as_of_date': 'action_date'})
   "target_date" ,
   "as_of_date" AS "action_date" ,
   "reservation_count"
  FROM
   "extend_2"
 ) ,
 "extend_4" AS (
  SELECT  -- .extend({ 'source_name': "'a'"})
   "target_date" ,
   "action_date" ,
   "reservation_count" ,
   'a' AS "source_name"
  FROM
   "map_columns_3"
 ) ,
 "extend_5" AS (
  SELECT  -- .extend({ 'source_name': "'b'"})
   "target_date" ,
   "action_date" ,
   "reservation_count" ,
   'b' AS "source_name"
  FROM
   "d_actions"
 ) ,
 "concat_rows_6" AS (
  SELECT  -- _0..concat_rows(b= _1, id_column='source_name', a_name='a', b_name='b')
   "target_date" ,
   "reservation_count" ,
   "action_date"
  FROM
  (
   SELECT
    *
   FROM
    "extend_4"
  UNION ALL
   SELECT
    *
   FROM
    "extend_5"
  ) "concat_rows_6"
 ) ,
 "project_7" AS (
  SELECT  -- .project({ 'reservation_count': 'reservation_count.sum()'}, group_by=['target_date', 'action_date'])
   "target_date" ,
   "action_date" ,
   SUM("reservation_count") AS "reservation_count"
  FROM
   "concat_rows_6"
  GROUP BY
   "target_date" ,
   "action_date"
 ) ,
 "extend_8" AS (
  SELECT  -- .extend({ 'reservation_count': 'reservation_count.cumsum()'}, partition_by=['target_date'], order_by=['action_date'])
   "target_date" ,
   "action_date" ,
   SUM("reservation_count") OVER ( PARTITION BY "target_date" ORDER BY "action_date"  )  AS "reservation_count"
  FROM
   "project_7"
 ) ,
 "map_columns_9" AS (
  SELECT  -- .map_columns({'action_date': 'as_of_date'})
   "target_date" ,
   "action_date" AS "as_of_date" ,
   "reservation_count"
  FROM
   "extend_8"
 ) ,
 "natural_join_0" AS (
  SELECT  -- _0..natural_join(b= _1, on=['target_date', 'as_of_date'], jointype='INNER')
   COALESCE("join_source_left_0"."target_date", "join_source_right_0"."target_date") AS "target_date" ,
   COALESCE("join_source_left_0"."as_of_date", "join_source_right_0"."as_of_date") AS "as_of_date" ,
   "reservation_count"
  FROM
  (
   "map_columns_9" "join_source_left_0"
  INNER JOIN
   "d_observations" "join_source_right_0"
  ON
   "join_source_left_0"."target_date" = "join_source_right_0"."target_date" AND
   "join_source_left_0"."as_of_date" = "join_source_right_0"."as_of_date"
  )
 )
SELECT  -- .order_rows(['target_date', 'as_of_date'])
 *
FROM
 "natural_join_0"
ORDER BY
 "target_date" ,
 "as_of_date"