Background on the Data Vault
The Data Vault Model is useful when you have evolving dimensional star schemas.
The following code builds out the data model.
Schema from Pycharm
JSON Objects
Put the JSON objects in your Snowflake stage.
{
"colors": [
{
"color": "white",
"category": "hue",
"type": "primary",
"code": {
"rgba": [
255,
255,
255,
1
],
"hex": "#FFFFF"
}
},
{
"color": "green",
"category": "hue",
"type": "secondary",
"code": {
"rgba": [
0,
255,
0,
1
],
"hex": "#0F0"
}
}
]
}
Create the Schema
------- hub_color_category
create or replace table hub_color_category as
select distinct md5( parse_json(value)['category']::string) hub_color_category_MD5_key,
parse_json(value)['category']::string category_code,
current_timestamp load_dts,
rec_source
from (
select metadata$filename rec_source,
t.$1:colors colors
FROM /*@&database.*/@stage.json_examples
(PATTERN => 'data_vault/snowflake_example.json$',
FILE_FORMAT => json) as t
) colors,
lateral flatten(input => colors) color;
alter table hub_color_category add primary key (hub_color_category_MD5_key);
alter table hub_color_category add unique(category_code);
select * from hub_color_category;
------- hub_color_type
create or replace table hub_color_type as
select distinct md5( parse_json(value)['type']::string) hub_type_MD5_key,
parse_json(value)['type']::string type_code,
current_timestamp load_dts,
rec_source
from (
select metadata$filename rec_source,
t.$1:colors colors
FROM /*@&database.*/@stage.json_examples
(PATTERN => 'data_vault/snowflake_example.json$',
FILE_FORMAT => json) as t
) colors,
lateral flatten(input => colors) color;
alter table hub_color_type add primary key (hub_type_MD5_key);
alter table hub_color_type add unique(type_code);
select * from hub_color_type;
------- hub_color_code
create or replace table hub_color_code as
select distinct md5( parse_json(value)['type']::string) hub_color_code_MD5_key,
parse_json(value)['code']['hex']::string hex,
current_timestamp load_dts,
rec_source
from (
select metadata$filename rec_source,
t.$1:colors colors
FROM /*@&database.*/@stage.json_examples
(PATTERN => 'data_vault/snowflake_example.json$',
FILE_FORMAT => json) as t
) colors,
lateral flatten(input => colors) color;
alter table hub_color_code add primary key (hub_color_code_MD5_key);
alter table hub_color_code add unique(hex);
select * from hub_color_code;
------- sat_color_code
create or replace table sat_color_code as
select distinct md5( parse_json(value)['type']::string) hub_color_code_MD5_key,
current_timestamp load_dts,
parse_json(value)['code']['rgba'][0] red,
parse_json(value)['code']['rgba'][1] blue,
parse_json(value)['code']['rgba'][2] green,
parse_json(value)['code']['rgba'][3] alpha,
hash(parse_json(value)['code']['rgba'][0],
parse_json(value)['code']['rgba'][1],
parse_json(value)['code']['rgba'][2],
parse_json(value)['code']['rgba'][3]) hash_diff,
rec_source
from (
select metadata$filename rec_source,
t.$1:colors colors
FROM /*@&database.*/@stage.json_examples
(PATTERN => 'data_vault/snowflake_example.json$',
FILE_FORMAT => json) as t
) colors,
lateral flatten(input => colors) color;
alter table sat_color_code add primary key (hub_color_code_MD5_key, load_dts);
alter table sat_color_code add foreign key (hub_color_code_MD5_key)
references hub_color_code(hub_color_code_MD5_key);
select * from sat_color_code;
------------------ link_colors
create or replace table link_colors as
select md5(parse_json(value)['type']::string ||
parse_json(value)['type']::string ||
parse_json(value)['code']['hex']::string) link_md5_key,
md5(parse_json(value)['type']::string) hub_type_MD5_key,
md5(parse_json(value)['code']['hex']::string) hub_color_code_MD5_key,
md5(parse_json(value)['category']::string) hub_color_category_MD5_key,
parse_json(value)['type']::string type_code,
parse_json(value)['code']['hex']::string hex,
parse_json(value)['category']::string category_code,
current_timestamp load_dts,
rec_source
from (
select metadata$filename rec_source,
t.$1:colors colors
FROM /*@&database.*/@stage.json_examples
(PATTERN => 'data_vault/snowflake_example.json$',
FILE_FORMAT => json) as t
) colors,
lateral flatten(input => colors) color;
alter table link_colors add primary key (link_md5_key);
alter table link_colors add unique(hub_type_MD5_key, hub_color_code_MD5_key, hub_color_category_MD5_key);
alter table link_colors add unique(type_code, hex, category_code);
alter table link_colors add foreign key (hub_type_MD5_key)
references hub_color_type(hub_type_MD5_key);
alter table link_colors add foreign key (hub_color_category_MD5_key)
references hub_color_category(hub_color_category_MD5_key);
alter table link_colors add foreign key (hub_color_code_MD5_key)
references hub_color_code(hub_color_code_MD5_key);
select * from link_colors;
------------------- Sat_Colors
create or replace table sat_colors as
select md5(parse_json(value)['type']::string ||
parse_json(value)['type']::string ||
parse_json(value)['code']['hex']::string) link_md5_key,
current_timestamp load_dts,
parse_json(value)['color']::string color_name,
hash(parse_json(value)['color']::string) hash_def,
rec_source
from (
select metadata$filename rec_source,
t.$1:colors colors
FROM /*@&database.*/@stage.json_examples
(PATTERN => 'data_vault/snowflake_example.json$',
FILE_FORMAT => json) as t
) colors,
lateral flatten(input => colors) color;
alter table sat_colors add primary key (link_md5_key,load_dts );
alter table sat_colors add foreign key (link_md5_key) references link_colors(link_md5_key);
select * from sat_colors;
Comments