Data Warehouse Guide
- Craig Appl
This document and the child pages provide information on the data warehouse
Tables
The SQL queries for the tables can be found below:
CREATE TABLE IF NOT EXISTS jurisdictions ( id VARCHAR(36) UNIQUE NOT NULL, uid VARCHAR(36) UNIQUE NOT NULL, parent_id VARCHAR(36) NULL, code VARCHAR(36) NOT NULL, type VARCHAR(255) NOT NULL, name VARCHAR(255) NOT NULL, status VARCHAR(45) NOT NULL, geometry GEOMETRY NOT NULL, geographic_level INT NOT NULL, effective_start_date DATE NULL, effective_end_date DATE NULL, version INT NOT NULL DEFAULT 0, server_version BIGINT NOT NULL DEFAULT 0, PRIMARY KEY (id), CONSTRAINT unique_code UNIQUE (code) ); CREATE TABLE IF NOT EXISTS structures ( id VARCHAR(36) UNIQUE NOT NULL, uid VARCHAR(36) UNIQUE NOT NULL, jurisdiction_id VARCHAR(36) NOT NULL, code VARCHAR(36) NOT NULL, type VARCHAR(255) NOT NULL, name VARCHAR(255) NULL, status VARCHAR(45) NOT NULL, geometry GEOMETRY NOT NULL, geographic_level INT NOT NULL, effective_start_date DATE NULL, effective_end_date DATE NULL, version INT NOT NULL DEFAULT 0, server_version BIGINT NOT NULL DEFAULT 0, PRIMARY KEY (id), CONSTRAINT fk_structures_jurisdictions FOREIGN KEY (jurisdiction_id) REFERENCES jurisdictions (id) ON DELETE CASCADE ON UPDATE CASCADE ); CREATE TABLE IF NOT EXISTS plans ( identifier VARCHAR(36) UNIQUE NOT NULL, version VARCHAR(3) NOT NULL, name VARCHAR NOT NULL, title VARCHAR NOT NULL, status VARCHAR(45) NOT NULL, fi_status VARCHAR NULL, fi_reason VARCHAR NULL, intervention_type VARCHAR NULL, date DATE NULL, effective_period_start DATE NULL, effective_period_end DATE NULL, PRIMARY KEY (identifier) ); CREATE TABLE IF NOT EXISTS tasks ( identifier VARCHAR(36) UNIQUE NOT NULL, plan_identifier VARCHAR(36) NOT NULL, group_identifier VARCHAR(36) NOT NULL, status VARCHAR(45) NOT NULL, business_status VARCHAR(45) NOT NULL, priority VARCHAR(36) NOT NULL, code VARCHAR(36) NOT NULL, description VARCHAR(255) NULL, focus VARCHAR(255) NOT NULL, task_for VARCHAR(36) NOT NULL, execution_start_date DATE NULL, execution_end_date DATE NULL, owner VARCHAR(100) NOT NULL, note JSON NULL, server_version BIGINT NOT NULL DEFAULT 0, PRIMARY KEY (identifier), CONSTRAINT fk_tasks_plan FOREIGN KEY (plan_identifier) REFERENCES plans (identifier) ON DELETE CASCADE ON UPDATE CASCADE, CONSTRAINT fk_tasks_jurisdiction FOREIGN KEY (group_identifier) REFERENCES jurisdictions (id) ON DELETE CASCADE ON UPDATE CASCADE ); CREATE TABLE IF NOT EXISTS plan_jurisdiction ( id uuid UNIQUE NOT NULL DEFAULT uuid_generate_v4(), jurisdiction_id VARCHAR(36) NOT NULL, plan_id VARCHAR(36) NOT NULL, PRIMARY KEY (id), UNIQUE (jurisdiction_id, plan_id), CONSTRAINT fk_jurisdiction FOREIGN KEY (jurisdiction_id) REFERENCES jurisdictions (code) ON DELETE RESTRICT, CONSTRAINT fk_plan FOREIGN KEY (plan_id) REFERENCES plans (identifier) ON DELETE CASCADE ON UPDATE CASCADE ); CREATE TABLE IF NOT EXISTS goals ( id uuid UNIQUE NOT NULL DEFAULT uuid_generate_v4(), goal_id VARCHAR(255) NOT NULL, plan_id VARCHAR(36) NOT NULL, description VARCHAR(255) NULL, priority VARCHAR(255) NULL, PRIMARY KEY (id), UNIQUE (goal_id, plan_id), CONSTRAINT fk_plan FOREIGN KEY (plan_id) REFERENCES plans (identifier) ON DELETE CASCADE ON UPDATE CASCADE ); CREATE TABLE IF NOT EXISTS goal_target ( id uuid UNIQUE NOT NULL DEFAULT uuid_generate_v4(), goal_id VARCHAR(255) NOT NULL, plan_id VARCHAR(36) NOT NULL, measure VARCHAR(255) NOT NULL, due DATE NOT NULL, detail_quantity_value NUMERIC(64,2) NOT NULL, detail_quantity_comparator VARCHAR(2) NOT NULL, detail_quantity_unit VARCHAR(10) NOT NULL, PRIMARY KEY (id), UNIQUE (goal_id, plan_id), CONSTRAINT fk_plan FOREIGN KEY (plan_id) REFERENCES plans (identifier) ON DELETE CASCADE ON UPDATE CASCADE ); CREATE TABLE IF NOT EXISTS actions ( identifier VARCHAR(36) UNIQUE NOT NULL, plan_id VARCHAR(36) NOT NULL, prefix VARCHAR(3) NOT NULL, title VARCHAR(255) NOT NULL, description VARCHAR(255) NULL, code VARCHAR(255) NOT NULL, timingPeriod_start DATE NULL, timingPeriod_end DATE NULL, reason VARCHAR(255) NOT NULL, goal_id VARCHAR(255) NOT NULL, subject_codable_concept_text VARCHAR(36) NOT NULL, PRIMARY KEY (identifier), CONSTRAINT fk_actions_plans FOREIGN KEY (plan_id) REFERENCES plans (identifier) ON DELETE CASCADE ON UPDATE CASCADE ); CREATE TABLE IF NOT EXISTS clients ( id VARCHAR(36) UNIQUE NOT NULL, baseEntityId VARCHAR(36) UNIQUE NOT NULL, dateCreated BIGINT NULL, dateVoided BIGINT NULL, firstName VARCHAR NOT NULL, middleName VARCHAR NOT NULL, lastName VARCHAR NOT NULL, gender VARCHAR NOT NULL, birthdate BIGINT NULL, identifiers JSONB NOT NULL DEFAULT '{}'::jsonb, attributes JSONB NOT NULL DEFAULT '{}'::jsonb, relationships JSONB NOT NULL DEFAULT '{}'::jsonb, addresses JSONB NOT NULL DEFAULT '{}'::jsonb, residence VARCHAR(36) NOT NULL, birthdateApprox BOOLEAN NOT NULL DEFAULT 'False', deathdateApprox BOOLEAN NOT NULL DEFAULT 'False', clientApplicationVersion INT NOT NULL, clientDatabaseVersion INT NOT NULL, server_version BIGINT NOT NULL DEFAULT 0, PRIMARY KEY (id), CONSTRAINT fk_clients_residence FOREIGN KEY (residence) REFERENCES structures (id) ON DELETE CASCADE ON UPDATE CASCADE );
The SQL queries for the table indices can be found below
CREATE INDEX IF NOT EXISTS jurisdiction_uid_idx ON jurisdictions (uid); CREATE INDEX IF NOT EXISTS jurisdiction_code_idx ON jurisdictions (code); CREATE INDEX IF NOT EXISTS jurisdiction_parent_idx ON jurisdictions (parent_id); CREATE INDEX IF NOT EXISTS jurisdiction_server_version_idx ON jurisdictions (server_version); CREATE INDEX IF NOT EXISTS structure_uid_idx ON structures (uid); CREATE INDEX IF NOT EXISTS structure_code_idx ON structures (code); CREATE INDEX IF NOT EXISTS structures_server_version_idx ON structures (server_version); CREATE INDEX IF NOT EXISTS for_idx ON tasks (task_for); CREATE INDEX IF NOT EXISTS status_idx ON tasks (status); CREATE INDEX IF NOT EXISTS business_status_idx ON tasks (business_status); CREATE INDEX IF NOT EXISTS task_server_version_idx ON tasks (server_version); CREATE INDEX IF NOT EXISTS goal_id_idx ON goals (goal_id); CREATE INDEX IF NOT EXISTS baseEntityId_idx ON clients (baseEntityId); CREATE INDEX IF NOT EXISTS residence_idx ON clients (residence);
jurisdictions
Stores all the jurisdictions from the OpenSRP server.
structures
Stores all the structures from the OpenSRP server.
plans
Stores all the plans from the OpenSRP server.
tasks
Stores all the tasks from the OpenSRP server. The tasks that are synced are only those that are tied to plans
that are in the data warehouse.
plan_jurisdiction
plans
and jurisdictions
have a one to many relationship which is captured in this table.
goals
Stores all the goals for each plan. This table is extracted from the plan definition that is from the OpenSRP server.
goal_target
Stores all the goal targets for each plan. This table is extracted from the plan definition that is from the OpenSRP server.
actions
Stores all the actions for each plan. This table is extracted from the plan definition that is from the OpenSRP server.
clients
Stores clients from the OpenSRP server. The clients that are synced are only those that are tied to tasks
that are in the data warehouse.
Materialized views
plans_materialzied_view
This view holds data that represents a plan-jurisdiction combination. Each record in this view is unique by plan and jurisdiction combined. It also includes the jurisdiction hierarchy on each record.
The SQL for this view is below:
CREATE MATERIALIZED VIEW IF NOT EXISTS plans_materialzied_view AS WITH RECURSIVE tree AS ( --base case SELECT jurisdictions.id AS jurisdiction_id, jurisdictions.parent_id AS jurisdiction_parent_id, jurisdictions.name AS jurisdiction_name, jurisdictions.geometry AS jurisdiction_geometry, 0 AS jurisdiction_depth, ARRAY[]::varchar[] AS jurisdiction_path, ARRAY[]::varchar[] AS jurisdiction_names FROM jurisdictions -- setting this to id includes current object -- setting to parent_id includes only children -- setting to '' gets the whole tree WHERE jurisdictions.parent_id = '' UNION ALL --recursive part SELECT jurisdictions.id AS jurisdictions_id, jurisdictions.parent_id AS jurisdictions_parent_id, jurisdictions.name AS jurisdictions_name, jurisdictions.geometry AS jurisdictions_geometry, tree.jurisdiction_depth + 1, tree.jurisdiction_path || ARRAY[jurisdictions.parent_id], tree.jurisdiction_names || ARRAY[jurisdictions_parent.name] FROM jurisdictions, tree LEFT JOIN jurisdictions AS jurisdictions_parent ON tree.jurisdiction_id = jurisdictions_parent.id WHERE tree.jurisdiction_id = jurisdictions.parent_id ) SELECT uuid_generate_v5( '6ba7b810-9dad-11d1-80b4-00c04fd430c8', concat(plans.identifier, tree.jurisdiction_id)) AS id, plans.identifier AS plan_id, plans.title AS plan_title, plans.name AS plan_name, plans.status AS plan_status, tree.jurisdiction_id AS jurisdiction_id, plans.fi_status AS plan_fi_status, plans.fi_reason AS plan_fi_reason, plans.date AS plan_date, plans.effective_period_start AS plan_effective_period_start, plans.effective_period_end AS plan_effective_period_end, plans.intervention_type AS plan_intervention_type, plans.version AS plan_version, tree.jurisdiction_parent_id AS jurisdiction_parent_id, tree.jurisdiction_name AS jurisdiction_name, tree.jurisdiction_geometry AS jurisdiction_geometry, tree.jurisdiction_depth AS jurisdiction_depth, tree.jurisdiction_path AS jurisdiction_path, tree.jurisdiction_names AS jurisdiction_name_path FROM plans LEFT JOIN plan_jurisdiction ON plans.identifier = plan_jurisdiction.plan_id LEFT JOIN tree ON plan_jurisdiction.jurisdiction_id = tree.jurisdiction_id ORDER BY plans.date DESC, plans.identifier; CREATE UNIQUE INDEX IF NOT EXISTS plans_materialzied_view_idx ON plans_materialzied_view (id);
targets_materialized_view
Holds data about each goal in a plan. Each record in this view is unique by goal, plan, and jurisdiction. It also includes the task count and task business statuses counts for each goal-plan-jurisdiction combo.
The SQL for this view is below:
CREATE MATERIALIZED VIEW IF NOT EXISTS targets_materialized_view AS SELECT DISTINCT ON (id) id, plan_id, jurisdiction_id, goal_id, measure, goal_value, goal_comparator, goal_unit, action_code, task_count, completed_task_count, COALESCE(task_business_status_map, '{}'::json) AS task_business_status_map FROM (SELECT uuid_generate_v5( '6ba7b810-9dad-11d1-80b4-00c04fd430c8', concat( goal_target.plan_id, goal_target.goal_id, plan_jurisdiction.jurisdiction_id) ) AS id, goal_target.plan_id AS plan_id, subq.task_count AS task_count, subq1.completed_task_count AS completed_task_count, subq2.business_status_map AS task_business_status_map, plan_jurisdiction.jurisdiction_id AS jurisdiction_id, goal_target.goal_id AS goal_id, goal_target.measure AS measure, goal_target.detail_quantity_value AS goal_value, goal_target.detail_quantity_comparator AS goal_comparator, goal_target.detail_quantity_unit AS goal_unit, actions.code AS action_code FROM goal_target LEFT JOIN plan_jurisdiction AS plan_jurisdiction ON goal_target.plan_id = plan_jurisdiction.plan_id LEFT JOIN actions AS actions ON goal_target.plan_id = actions.plan_id AND goal_target.goal_id = actions.goal_id LEFT JOIN LATERAL ( SELECT count(1) AS task_count FROM tasks WHERE plan_identifier = plan_jurisdiction.plan_id AND code = actions.code AND group_identifier = plan_jurisdiction.jurisdiction_id ) AS subq ON true LEFT JOIN LATERAL ( SELECT count(1) AS completed_task_count FROM tasks WHERE plan_identifier = plan_jurisdiction.plan_id AND code = actions.code AND group_identifier = plan_jurisdiction.jurisdiction_id AND business_status IN ('Complete', 'Sprayed') ) AS subq1 ON true LEFT JOIN LATERAL ( SELECT json_object_agg(each.business_status, each.status_count) AS business_status_map FROM ( SELECT business_status, count(1) AS status_count FROM tasks WHERE subq.task_count > 0 AND plan_identifier = plan_jurisdiction.plan_id AND code = actions.code AND group_identifier = plan_jurisdiction.jurisdiction_id GROUP BY business_status ) AS each ) AS subq2 ON true ORDER BY goal_target.plan_id) AS main; CREATE UNIQUE INDEX IF NOT EXISTS targets_materialized_view_idx ON targets_materialized_view (id);
task_structures_materialized_view
In this view, each record is a unique task with additional action, jurisdiction, and structure data.
The SQL for this view is below:
CREATE MATERIALIZED VIEW IF NOT EXISTS task_structures_materialized_view AS SELECT tasks.identifier AS task_identifier, tasks.plan_identifier AS plan_id, tasks.group_identifier AS jurisdiction_id, tasks.status AS task_status, tasks.business_status AS task_business_status, tasks.focus AS task_focus, tasks.task_for AS task_task_for, tasks.execution_start_date AS task_execution_start_date, tasks.execution_end_date AS task_execution_end_date, actions.goal_id AS goal_id, actions.code AS action_code, jurisdictions.name AS jurisdiction_name, jurisdictions.parent_id AS jurisdiction_parent_id, COALESCE(structures_query.id, structures_query2.id) AS structure_id, COALESCE(structures_query.code, structures_query2.code) AS structure_code, COALESCE(structures_query.name, structures_query2.name) AS structure_name, COALESCE(structures_query.type, structures_query2.type) AS structure_type, COALESCE(structures_query.geometry, structures_query2.geometry) AS structure_geometry FROM tasks LEFT JOIN actions ON actions.code = tasks.code AND actions.plan_id = tasks.plan_identifier LEFT JOIN jurisdictions ON jurisdictions.id = tasks.group_identifier LEFT JOIN LATERAL ( SELECT id, code, name, type, geometry FROM structures WHERE structures.id = tasks.task_for AND structures.jurisdiction_id = jurisdictions.id ) AS structures_query ON true LEFT JOIN LATERAL ( SELECT structures.id AS id, structures.code AS code, structures.name AS name, structures.type AS type, structures.geometry AS geometry FROM clients LEFT JOIN structures ON structures.id = clients.residence WHERE clients.baseEntityId = tasks.task_for ) AS structures_query2 ON true ORDER BY tasks.identifier; CREATE UNIQUE INDEX IF NOT EXISTS task_structures_materialized_view_idx ON task_structures_materialized_view (task_identifier);
The queries for refreshing the materialized views are below:
REFRESH MATERIALIZED VIEW CONCURRENTLY plans_materialzied_view; REFRESH MATERIALIZED VIEW CONCURRENTLY targets_materialized_view; REFRESH MATERIALIZED VIEW CONCURRENTLY task_structures_materialized_view;
Other Queries
task_structures_geojson
Simply rearranges the data from task_structures_materialized_view
to be output as valid geojson. This query might be used to create a chart in superset.
The SQL for this is below:
SELECT task_identifier, plan_id, jurisdiction_id, goal_id, jsonb_build_object( 'type', 'Feature', 'id', task_identifier, 'geometry', ST_AsGeoJSON(structure_geometry)::jsonb, 'properties', to_jsonb(row) - 'task_identifier' - 'structure_geometry' ) AS geojson FROM ( SELECT task_identifier, plan_id, jurisdiction_id, task_status, task_business_status, task_focus, task_task_for, task_execution_start_date, task_execution_end_date, goal_id, action_code, jurisdiction_name, jurisdiction_parent_id, structure_id, structure_code, structure_name, structure_type, structure_geometry FROM task_structures_materialized_view ) row;
jurisdictions_geojson
Simply rearranges the data from the jurisdictions
table to be output as valid geojson. This query might be used to create a chart in superset.
The SQL for this is below:
SELECT jurisdiction_id, jsonb_build_object( 'type', 'Feature', 'id', jurisdiction_id, 'geometry', ST_AsGeoJSON(jurisdiction_geometry)::jsonb, 'properties', to_jsonb(row) - 'jurisdiction_id' - 'jurisdiction_geometry' ) AS geojson FROM ( SELECT jurisdiction_id, jurisdiction_name, jurisdiction_parent_id, jurisdiction_geometry FROM plans_materialzied_view ) row;
plans_slice
Simply queries for plans from the plans_materialized_view
view. This query might be used to create a chart in superset.
The SQL for this is below:
SELECT id, plan_id, plan_title, plan_status, jurisdiction_id, plan_fi_status, plan_fi_reason, plan_intervention_type, jurisdiction_parent_id, jurisdiction_name, jurisdiction_depth, jurisdiction_path, jurisdiction_name_path FROM plans_materialzied_view;
get_operational_areas
This is a simple query to get all the operational areas that we care about.
The SQL for this is below:
SELECT DISTINCT(jurisdiction_id) AS id FROM plan_jurisdiction;
upsert_clients
This is a simple query to insert or update clients.
The SQL for this is below:
INSERT INTO clients VALUES ( '${client_id}', '${client_baseEntityId}', ${client_dateCreated}, ${client_dateVoided}, '${client_firstName}', '${client_middleName}', '${client_lastName}', '${client_gender}', ${client_birthdate}, '${client_identifiers}'::jsonb, '${client_attributes}'::jsonb, '${client_relationships}'::jsonb, '${client_addresses}'::jsonb, '${client_residence}', '${client_birthdateApprox}', '${client_deathdateApprox}', ${client_clientApplicationVersion}, ${client_clientDatabaseVersion}, ${client_serverVersion} ) ON CONFLICT (id) DO UPDATE SET id = '${client_id}', baseEntityId = '${client_baseEntityId}', dateCreated = ${client_dateCreated}, dateVoided = ${client_dateVoided}, firstName = '${client_firstName}', middleName = '${client_middleName}', lastName = '${client_lastName}', gender = '${client_gender}', birthdate = ${client_birthdate}, identifiers = '${client_identifiers}'::jsonb, attributes = '${client_attributes}'::jsonb, relationships = '${client_relationships}'::jsonb, addresses = '${client_addresses}'::jsonb, residence = '${client_residence}', birthdateApprox = '${client_birthdateApprox}', deathdateApprox = '${client_deathdateApprox}', clientApplicationVersion = ${client_clientApplicationVersion}, clientDatabaseVersion = ${client_clientDatabaseVersion}, server_version = ${client_serverVersion};
upsert_plans
This is a simple query to insert or update plans.
The SQL for this is below:
INSERT INTO plans ( identifier, version, name, title, status, date, effective_period_start, effective_period_end ) VALUES ( '${identifier}', '${version}', '${name}', '${title}', '${status}', '${date}', '${effectivePeriod_start}', '${effectivePeriod_end}' ) ON CONFLICT (identifier) DO UPDATE SET version = '${version}', name = '${name}', title = '${title}', status = '${status}', date = '${date}', effective_period_start = '${effectivePeriod_start}', effective_period_end = '${effectivePeriod_end}';
This site is no longer maintained. Please visit docs.opensrp.io for current documentation.