top of page
Search
  • Writer's pictureTim Burns

Data Vault Model

Updated: Feb 24, 2021


Photo by Engin Akyurt on Unsplash


Background on the Data Vault

The Data Vault Model is useful when you have evolving dimensional star schemas.

The following code builds out the data model.


Schema from Pycharm



JSON Objects

Put the JSON objects in your Snowflake stage.

{
  "colors": [
    {
      "color": "white",
      "category": "hue",
      "type": "primary",
      "code": {
        "rgba": [
          255,
          255,
          255,
          1
        ],
        "hex": "#FFFFF"
      }
    },
    {
      "color": "green",
      "category": "hue",
      "type": "secondary",
      "code": {
        "rgba": [
          0,
          255,
          0,
          1
        ],
        "hex": "#0F0"
      }
    }
  ]
}

Create the Schema


------- hub_color_category
create or replace table hub_color_category as
select distinct md5( parse_json(value)['category']::string) hub_color_category_MD5_key,
                parse_json(value)['category']::string category_code,
                current_timestamp load_dts,
                rec_source
from (
    select metadata$filename rec_source,
           t.$1:colors colors
    FROM /*@&database.*/@stage.json_examples
                            (PATTERN => 'data_vault/snowflake_example.json$',
                            FILE_FORMAT => json) as t
) colors,
lateral flatten(input => colors) color;

alter table hub_color_category add primary key (hub_color_category_MD5_key);
alter table hub_color_category add unique(category_code);

select * from hub_color_category;


------- hub_color_type

create or replace table hub_color_type as
select distinct md5( parse_json(value)['type']::string) hub_type_MD5_key,
                parse_json(value)['type']::string type_code,
                current_timestamp load_dts,
                rec_source
from (
    select metadata$filename rec_source,
           t.$1:colors colors
    FROM /*@&database.*/@stage.json_examples
                            (PATTERN => 'data_vault/snowflake_example.json$',
                            FILE_FORMAT => json) as t
) colors,
lateral flatten(input => colors) color;

alter table hub_color_type add primary key (hub_type_MD5_key);
alter table hub_color_type add unique(type_code);

select * from hub_color_type;


------- hub_color_code
create or replace table hub_color_code as
select distinct md5( parse_json(value)['type']::string) hub_color_code_MD5_key,
       parse_json(value)['code']['hex']::string hex,
              current_timestamp load_dts,
                rec_source
from (
    select metadata$filename rec_source,
        t.$1:colors colors
    FROM /*@&database.*/@stage.json_examples
                            (PATTERN => 'data_vault/snowflake_example.json$',
                            FILE_FORMAT => json) as t
) colors,
lateral flatten(input => colors) color;

alter table hub_color_code add primary key (hub_color_code_MD5_key);
alter table hub_color_code add unique(hex);

select * from hub_color_code;


------- sat_color_code
create or replace table sat_color_code as
select distinct md5( parse_json(value)['type']::string) hub_color_code_MD5_key,
                  current_timestamp load_dts,
       parse_json(value)['code']['rgba'][0] red,
       parse_json(value)['code']['rgba'][1] blue,
       parse_json(value)['code']['rgba'][2] green,
       parse_json(value)['code']['rgba'][3] alpha,
       hash(parse_json(value)['code']['rgba'][0],
            parse_json(value)['code']['rgba'][1],
            parse_json(value)['code']['rgba'][2],
            parse_json(value)['code']['rgba'][3]) hash_diff,
       rec_source
from (
    select metadata$filename rec_source,
        t.$1:colors colors
    FROM /*@&database.*/@stage.json_examples
                            (PATTERN => 'data_vault/snowflake_example.json$',
                            FILE_FORMAT => json) as t
) colors,
lateral flatten(input => colors) color;

alter table sat_color_code add primary key (hub_color_code_MD5_key, load_dts);

alter table sat_color_code add foreign key  (hub_color_code_MD5_key)
    references hub_color_code(hub_color_code_MD5_key);

select * from sat_color_code;


------------------ link_colors

create or replace table link_colors as
select md5(parse_json(value)['type']::string ||
           parse_json(value)['type']::string ||
           parse_json(value)['code']['hex']::string) link_md5_key,
       md5(parse_json(value)['type']::string)        hub_type_MD5_key,
       md5(parse_json(value)['code']['hex']::string) hub_color_code_MD5_key,
       md5(parse_json(value)['category']::string)    hub_color_category_MD5_key,
       parse_json(value)['type']::string             type_code,
       parse_json(value)['code']['hex']::string      hex,
       parse_json(value)['category']::string         category_code,
       current_timestamp                             load_dts,
       rec_source

from (
         select metadata$filename rec_source,
                t.$1:colors       colors
         FROM /*@&database.*/@stage.json_examples
                                 (PATTERN => 'data_vault/snowflake_example.json$',
                                 FILE_FORMAT => json) as t
     ) colors,
     lateral flatten(input => colors) color;


alter table link_colors add primary key (link_md5_key);
alter table link_colors add unique(hub_type_MD5_key, hub_color_code_MD5_key, hub_color_category_MD5_key);
alter table link_colors add unique(type_code, hex, category_code);

alter table link_colors add foreign key  (hub_type_MD5_key)
    references hub_color_type(hub_type_MD5_key);

alter table link_colors add foreign key  (hub_color_category_MD5_key)
    references hub_color_category(hub_color_category_MD5_key);

alter table link_colors add foreign key  (hub_color_code_MD5_key)
    references hub_color_code(hub_color_code_MD5_key);

select * from link_colors;

------------------- Sat_Colors

create or replace table sat_colors as
select md5(parse_json(value)['type']::string ||
           parse_json(value)['type']::string ||
           parse_json(value)['code']['hex']::string) link_md5_key,
       current_timestamp                             load_dts,
        parse_json(value)['color']::string color_name,
       hash(parse_json(value)['color']::string) hash_def,
        rec_source

from (
    select metadata$filename rec_source,
           t.$1:colors colors
    FROM /*@&database.*/@stage.json_examples
                            (PATTERN => 'data_vault/snowflake_example.json$',
                            FILE_FORMAT => json) as t
) colors,
lateral flatten(input => colors) color;


alter table sat_colors add primary key (link_md5_key,load_dts );


alter table sat_colors add foreign key  (link_md5_key) references link_colors(link_md5_key);


select * from sat_colors;

19 views0 comments

Recent Posts

See All

Carto, Snowflake, and Data Management

A basic principle of data management: Don't move data unless you have to. Moving data is expensive and error-prone. Data Egress Cost: How To Take Back Control And Reduce Egress Charges Archiving to S

bottom of page