Data Warehouse Guide

This document and the child pages provide information on the data warehouse

Revealv2ERD.png

Tables

The SQL queries for the tables can be found below:

create_tables_revea2.sql
CREATE TABLE IF NOT EXISTS jurisdictions (
  id VARCHAR(36) UNIQUE NOT NULL,
  uid VARCHAR(36) UNIQUE NOT NULL,
  parent_id VARCHAR(36) NULL,
  code VARCHAR(36) NOT NULL,
  type VARCHAR(255) NOT NULL,
  name VARCHAR(255) NOT NULL,
  status VARCHAR(45) NOT NULL,
  geometry GEOMETRY NOT NULL,
  geographic_level INT NOT NULL,
  effective_start_date DATE NULL,
  effective_end_date DATE NULL,
  version INT NOT NULL DEFAULT 0,
  server_version BIGINT NOT NULL DEFAULT 0,
  PRIMARY KEY (id),
  CONSTRAINT unique_code UNIQUE (code)
);

CREATE TABLE IF NOT EXISTS structures (
  id VARCHAR(36) UNIQUE NOT NULL,
  uid VARCHAR(36) UNIQUE NOT NULL,
  jurisdiction_id VARCHAR(36) NOT NULL,
  code VARCHAR(36) NOT NULL,
  type VARCHAR(255) NOT NULL,
  name VARCHAR(255) NULL,
  status VARCHAR(45) NOT NULL,
  geometry GEOMETRY NOT NULL,
  geographic_level INT NOT NULL,
  effective_start_date DATE NULL,
  effective_end_date DATE NULL,
  version INT NOT NULL DEFAULT 0,
  server_version BIGINT NOT NULL DEFAULT 0,
  PRIMARY KEY (id),
  CONSTRAINT fk_structures_jurisdictions
    FOREIGN KEY (jurisdiction_id)
    REFERENCES jurisdictions (id)
    ON DELETE CASCADE
    ON UPDATE CASCADE
);

CREATE TABLE IF NOT EXISTS plans (
    identifier VARCHAR(36) UNIQUE NOT NULL,
    version VARCHAR(3) NOT NULL,
    name VARCHAR NOT NULL,
    title VARCHAR NOT NULL,
    status VARCHAR(45) NOT NULL,
    fi_status VARCHAR NULL,
    fi_reason VARCHAR NULL,
    intervention_type VARCHAR NULL,
    date DATE NULL,
    effective_period_start DATE NULL,
    effective_period_end DATE NULL,
    PRIMARY KEY (identifier)
);

CREATE TABLE IF NOT EXISTS tasks (
    identifier VARCHAR(36) UNIQUE NOT NULL,
    plan_identifier VARCHAR(36) NOT NULL,
    group_identifier VARCHAR(36) NOT NULL,
    status VARCHAR(45) NOT NULL,
    business_status VARCHAR(45) NOT NULL,
    priority VARCHAR(36) NOT NULL,
    code VARCHAR(36) NOT NULL,
    description VARCHAR(255) NULL,
    focus VARCHAR(255) NOT NULL,
    task_for VARCHAR(36) NOT NULL,
    execution_start_date DATE NULL,
    execution_end_date DATE NULL,
    owner VARCHAR(100) NOT NULL,
    note JSON NULL,
    server_version BIGINT NOT NULL DEFAULT 0,
    PRIMARY KEY (identifier),
    CONSTRAINT fk_tasks_plan
        FOREIGN KEY (plan_identifier)
        REFERENCES plans (identifier)
        ON DELETE CASCADE
        ON UPDATE CASCADE,
    CONSTRAINT fk_tasks_jurisdiction
        FOREIGN KEY (group_identifier)
        REFERENCES jurisdictions (id)
        ON DELETE CASCADE
        ON UPDATE CASCADE
);


CREATE TABLE IF NOT EXISTS plan_jurisdiction (
    id uuid UNIQUE NOT NULL DEFAULT uuid_generate_v4(),
    jurisdiction_id VARCHAR(36) NOT NULL,
    plan_id VARCHAR(36) NOT NULL,
    PRIMARY KEY (id),
    UNIQUE (jurisdiction_id, plan_id),
    CONSTRAINT fk_jurisdiction
        FOREIGN KEY (jurisdiction_id)
        REFERENCES jurisdictions (code)
        ON DELETE RESTRICT,
    CONSTRAINT fk_plan
        FOREIGN KEY (plan_id)
        REFERENCES plans (identifier)
        ON DELETE CASCADE
        ON UPDATE CASCADE
);

CREATE TABLE IF NOT EXISTS goals (
    id uuid UNIQUE NOT NULL DEFAULT uuid_generate_v4(),
    goal_id VARCHAR(255) NOT NULL,
    plan_id VARCHAR(36) NOT NULL,
    description VARCHAR(255) NULL,
    priority VARCHAR(255) NULL,
    PRIMARY KEY (id),
    UNIQUE (goal_id, plan_id),
    CONSTRAINT fk_plan
        FOREIGN KEY (plan_id)
        REFERENCES plans (identifier)
        ON DELETE CASCADE
        ON UPDATE CASCADE
);

CREATE TABLE IF NOT EXISTS goal_target (
    id uuid UNIQUE NOT NULL DEFAULT uuid_generate_v4(),
    goal_id VARCHAR(255) NOT NULL,
    plan_id VARCHAR(36) NOT NULL,
    measure VARCHAR(255) NOT NULL,
    due DATE NOT NULL,
    detail_quantity_value NUMERIC(64,2) NOT NULL,
    detail_quantity_comparator VARCHAR(2) NOT NULL,
    detail_quantity_unit VARCHAR(10) NOT NULL,
    PRIMARY KEY (id),
    UNIQUE (goal_id, plan_id),
    CONSTRAINT fk_plan
        FOREIGN KEY (plan_id)
        REFERENCES plans (identifier)
        ON DELETE CASCADE
        ON UPDATE CASCADE
);

CREATE TABLE IF NOT EXISTS actions (
    identifier VARCHAR(36) UNIQUE NOT NULL,
    plan_id VARCHAR(36) NOT NULL,
    prefix VARCHAR(3) NOT NULL,
    title VARCHAR(255) NOT NULL,
    description VARCHAR(255) NULL,
    code VARCHAR(255) NOT NULL,
    timingPeriod_start DATE NULL,
    timingPeriod_end DATE NULL,
    reason VARCHAR(255) NOT NULL,
    goal_id VARCHAR(255) NOT NULL,
    subject_codable_concept_text VARCHAR(36) NOT NULL,
    PRIMARY KEY (identifier),
    CONSTRAINT fk_actions_plans
        FOREIGN KEY (plan_id)
        REFERENCES plans (identifier)
        ON DELETE CASCADE
        ON UPDATE CASCADE
);

CREATE TABLE IF NOT EXISTS clients (
    id VARCHAR(36) UNIQUE NOT NULL,
    baseEntityId VARCHAR(36) UNIQUE NOT NULL,
    dateCreated BIGINT NULL,
    dateVoided BIGINT NULL,
    firstName VARCHAR NOT NULL,
    middleName VARCHAR NOT NULL,
    lastName VARCHAR NOT NULL,
    gender VARCHAR NOT NULL,
    birthdate BIGINT NULL,
    identifiers JSONB NOT NULL DEFAULT '{}'::jsonb,
    attributes JSONB NOT NULL DEFAULT '{}'::jsonb,
    relationships JSONB NOT NULL DEFAULT '{}'::jsonb,
    addresses JSONB NOT NULL DEFAULT '{}'::jsonb,
    residence VARCHAR(36) NOT NULL,
    birthdateApprox BOOLEAN NOT NULL DEFAULT 'False',
    deathdateApprox BOOLEAN NOT NULL DEFAULT 'False',
    clientApplicationVersion INT NOT NULL,
    clientDatabaseVersion INT NOT NULL,
    server_version BIGINT NOT NULL DEFAULT 0,
    PRIMARY KEY (id),
    CONSTRAINT fk_clients_residence
        FOREIGN KEY (residence)
        REFERENCES structures (id)
        ON DELETE CASCADE
        ON UPDATE CASCADE
);


The SQL queries for the table indices can be found below

create_indices.sql
CREATE INDEX IF NOT EXISTS jurisdiction_uid_idx ON jurisdictions (uid);
CREATE INDEX IF NOT EXISTS jurisdiction_code_idx ON jurisdictions (code);
CREATE INDEX IF NOT EXISTS jurisdiction_parent_idx ON jurisdictions (parent_id);
CREATE INDEX IF NOT EXISTS jurisdiction_server_version_idx ON jurisdictions (server_version);
CREATE INDEX IF NOT EXISTS structure_uid_idx ON structures (uid);
CREATE INDEX IF NOT EXISTS structure_code_idx ON structures (code);
CREATE INDEX IF NOT EXISTS structures_server_version_idx ON structures (server_version);
CREATE INDEX IF NOT EXISTS for_idx ON tasks (task_for);
CREATE INDEX IF NOT EXISTS status_idx ON tasks (status);
CREATE INDEX IF NOT EXISTS business_status_idx ON tasks (business_status);
CREATE INDEX IF NOT EXISTS task_server_version_idx ON tasks (server_version);
CREATE INDEX IF NOT EXISTS goal_id_idx ON goals (goal_id);
CREATE INDEX IF NOT EXISTS baseEntityId_idx ON clients (baseEntityId);
CREATE INDEX IF NOT EXISTS residence_idx ON clients (residence);

jurisdictions

Stores all the jurisdictions from the OpenSRP server.

structures

Stores all the structures from the OpenSRP server.

plans

Stores all the plans from the OpenSRP server.

tasks

Stores all the tasks from the OpenSRP server. The tasks that are synced are only those that are tied to plans that are in the data warehouse.

plan_jurisdiction

plans and jurisdictions have a one to many relationship which is captured in this table.

goals

Stores all the goals for each plan. This table is extracted from the plan definition that is from the OpenSRP server.

goal_target

Stores all the goal targets for each plan. This table is extracted from the plan definition that is from the OpenSRP server.

actions

Stores all the actions for each plan. This table is extracted from the plan definition that is from the OpenSRP server.

clients

Stores clients from the OpenSRP server. The clients that are synced are only those that are tied to tasks that are in the data warehouse.

Materialized views

plans_materialzied_view

This view holds data that represents a plan-jurisdiction combination. Each record in this view is unique by plan and jurisdiction combined. It also includes the jurisdiction hierarchy on each record.

The SQL for this view is below:

create_plans_materialized_views.sql
CREATE MATERIALIZED VIEW IF NOT EXISTS plans_materialzied_view
AS
WITH RECURSIVE tree AS
(
    --base case
    SELECT
        jurisdictions.id AS jurisdiction_id,
        jurisdictions.parent_id AS jurisdiction_parent_id,
        jurisdictions.name AS jurisdiction_name,
        jurisdictions.geometry AS jurisdiction_geometry,
        0 AS jurisdiction_depth,
        ARRAY[]::varchar[] AS jurisdiction_path,
        ARRAY[]::varchar[] AS jurisdiction_names
    FROM jurisdictions
    -- setting this to id includes current object
    -- setting to parent_id includes only children
    -- setting to '' gets the whole tree
    WHERE jurisdictions.parent_id = ''
    UNION ALL
--recursive part
SELECT
    jurisdictions.id AS jurisdictions_id,
    jurisdictions.parent_id AS jurisdictions_parent_id,
    jurisdictions.name AS jurisdictions_name,
    jurisdictions.geometry AS jurisdictions_geometry,
    tree.jurisdiction_depth + 1,
    tree.jurisdiction_path || ARRAY[jurisdictions.parent_id],
    tree.jurisdiction_names || ARRAY[jurisdictions_parent.name]
FROM jurisdictions, tree
    LEFT JOIN jurisdictions AS jurisdictions_parent ON tree.jurisdiction_id = jurisdictions_parent.id
WHERE tree.jurisdiction_id = jurisdictions.parent_id
)
SELECT
    uuid_generate_v5(
        '6ba7b810-9dad-11d1-80b4-00c04fd430c8',
        concat(plans.identifier, tree.jurisdiction_id)) AS id,
    plans.identifier AS plan_id,
    plans.title AS plan_title,
    plans.name AS plan_name,
    plans.status AS plan_status,
    tree.jurisdiction_id AS jurisdiction_id,
    plans.fi_status AS plan_fi_status,
    plans.fi_reason AS plan_fi_reason,
    plans.date AS plan_date,
    plans.effective_period_start AS plan_effective_period_start,
    plans.effective_period_end AS plan_effective_period_end,
    plans.intervention_type AS plan_intervention_type,
    plans.version AS plan_version,
    tree.jurisdiction_parent_id AS jurisdiction_parent_id,
    tree.jurisdiction_name AS jurisdiction_name,
    tree.jurisdiction_geometry AS jurisdiction_geometry,
    tree.jurisdiction_depth AS jurisdiction_depth,
    tree.jurisdiction_path AS jurisdiction_path,
    tree.jurisdiction_names AS jurisdiction_name_path
FROM plans
LEFT JOIN plan_jurisdiction ON plans.identifier = plan_jurisdiction.plan_id
LEFT JOIN tree ON plan_jurisdiction.jurisdiction_id = tree.jurisdiction_id
ORDER BY plans.date DESC, plans.identifier;

CREATE UNIQUE INDEX IF NOT EXISTS plans_materialzied_view_idx ON plans_materialzied_view (id);

targets_materialized_view

Holds data about each goal in a plan. Each record in this view is unique by goal, plan, and jurisdiction. It also includes the task count and task business statuses counts for each goal-plan-jurisdiction combo.

The SQL for this view is below:

targets_materialized_view.sql
CREATE MATERIALIZED VIEW IF NOT EXISTS targets_materialized_view
AS
SELECT DISTINCT ON (id)
    id,
    plan_id,
    jurisdiction_id,
    goal_id,
    measure,
    goal_value,
    goal_comparator,
    goal_unit,
    action_code,
    task_count,
    completed_task_count,
    COALESCE(task_business_status_map, '{}'::json) AS task_business_status_map
FROM
(SELECT
    uuid_generate_v5(
        '6ba7b810-9dad-11d1-80b4-00c04fd430c8',
        concat(
            goal_target.plan_id,
            goal_target.goal_id,
            plan_jurisdiction.jurisdiction_id)
    ) AS id,
    goal_target.plan_id AS plan_id,
    subq.task_count AS task_count,
    subq1.completed_task_count AS completed_task_count,
    subq2.business_status_map AS task_business_status_map,
    plan_jurisdiction.jurisdiction_id AS jurisdiction_id,
    goal_target.goal_id AS goal_id,
    goal_target.measure AS measure,
    goal_target.detail_quantity_value AS goal_value,
    goal_target.detail_quantity_comparator AS goal_comparator,
    goal_target.detail_quantity_unit AS goal_unit,
    actions.code AS action_code
FROM goal_target
LEFT JOIN plan_jurisdiction AS plan_jurisdiction
    ON goal_target.plan_id = plan_jurisdiction.plan_id
LEFT JOIN actions AS actions
    ON goal_target.plan_id = actions.plan_id
    AND goal_target.goal_id = actions.goal_id
LEFT JOIN LATERAL (
    SELECT
        count(1) AS task_count
    FROM tasks
    WHERE
        plan_identifier = plan_jurisdiction.plan_id
        AND code = actions.code
        AND group_identifier = plan_jurisdiction.jurisdiction_id
) AS subq ON true
LEFT JOIN LATERAL (
    SELECT
        count(1) AS completed_task_count
    FROM tasks
    WHERE
        plan_identifier = plan_jurisdiction.plan_id
        AND code = actions.code
        AND group_identifier = plan_jurisdiction.jurisdiction_id
        AND business_status IN ('Complete', 'Sprayed')
) AS subq1 ON true
LEFT JOIN LATERAL (
    SELECT
        json_object_agg(each.business_status, each.status_count) AS business_status_map
    FROM (
        SELECT business_status, count(1) AS status_count
        FROM tasks
        WHERE
            subq.task_count > 0
            AND plan_identifier = plan_jurisdiction.plan_id
            AND code = actions.code
            AND group_identifier = plan_jurisdiction.jurisdiction_id
        GROUP BY business_status
    ) AS each
) AS subq2 ON true
ORDER BY goal_target.plan_id)
AS main;

CREATE UNIQUE INDEX IF NOT EXISTS targets_materialized_view_idx ON targets_materialized_view (id);


task_structures_materialized_view

In this view, each record is a unique task with additional action, jurisdiction, and structure data.

The SQL for this view is below:

create_task_structures_materialized_views.sql
CREATE MATERIALIZED VIEW IF NOT EXISTS task_structures_materialized_view
AS
SELECT
    tasks.identifier AS task_identifier,
    tasks.plan_identifier AS plan_id,
    tasks.group_identifier AS jurisdiction_id,
    tasks.status AS task_status,
    tasks.business_status AS task_business_status,
    tasks.focus AS task_focus,
    tasks.task_for AS task_task_for,
    tasks.execution_start_date AS task_execution_start_date,
    tasks.execution_end_date AS task_execution_end_date,
    actions.goal_id AS goal_id,
    actions.code AS action_code,
    jurisdictions.name AS jurisdiction_name,
    jurisdictions.parent_id AS jurisdiction_parent_id,
    COALESCE(structures_query.id, structures_query2.id) AS structure_id,
    COALESCE(structures_query.code, structures_query2.code) AS structure_code,
    COALESCE(structures_query.name, structures_query2.name) AS structure_name,
    COALESCE(structures_query.type, structures_query2.type) AS structure_type,
    COALESCE(structures_query.geometry, structures_query2.geometry) AS structure_geometry
FROM tasks
LEFT JOIN actions
    ON actions.code = tasks.code AND actions.plan_id = tasks.plan_identifier
LEFT JOIN jurisdictions
    ON jurisdictions.id = tasks.group_identifier
LEFT JOIN LATERAL (
    SELECT id, code, name, type, geometry
    FROM structures
    WHERE structures.id = tasks.task_for
    AND structures.jurisdiction_id = jurisdictions.id
) AS structures_query ON true
LEFT JOIN LATERAL (
    SELECT
        structures.id AS id,
        structures.code AS code,
        structures.name AS name,
        structures.type AS type,
        structures.geometry AS geometry
    FROM clients
    LEFT JOIN structures
    ON structures.id = clients.residence
    WHERE clients.baseEntityId = tasks.task_for
) AS structures_query2 ON true
ORDER BY tasks.identifier;

CREATE UNIQUE INDEX IF NOT EXISTS task_structures_materialized_view_idx ON task_structures_materialized_view (task_identifier);



The queries for refreshing the materialized views are below:

refresh_materialized_views.sql
REFRESH MATERIALIZED VIEW CONCURRENTLY plans_materialzied_view;
REFRESH MATERIALIZED VIEW CONCURRENTLY targets_materialized_view;
REFRESH MATERIALIZED VIEW CONCURRENTLY task_structures_materialized_view;


Other Queries

task_structures_geojson

Simply rearranges the data from task_structures_materialized_view to be output as valid geojson. This query might be used to create a chart in superset.

The SQL for this is below:

task_structures_geojson_slice.sql
SELECT
    task_identifier,
    plan_id,
    jurisdiction_id,
    goal_id,
    jsonb_build_object(
        'type',         'Feature',
        'id',           task_identifier,
        'geometry',     ST_AsGeoJSON(structure_geometry)::jsonb,
        'properties',   to_jsonb(row) - 'task_identifier' - 'structure_geometry'
    ) AS geojson
FROM (
    SELECT
        task_identifier,
        plan_id,
        jurisdiction_id,
        task_status,
        task_business_status,
        task_focus,
        task_task_for,
        task_execution_start_date,
        task_execution_end_date,
        goal_id,
        action_code,
        jurisdiction_name,
        jurisdiction_parent_id,
        structure_id,
        structure_code,
        structure_name,
        structure_type,
        structure_geometry
    FROM task_structures_materialized_view
) row;

jurisdictions_geojson

Simply rearranges the data from the jurisdictions table to be output as valid geojson. This query might be used to create a chart in superset.

The SQL for this is below:

jurisdictions_geojson_slice.sql
SELECT
    jurisdiction_id,
    jsonb_build_object(
        'type',         'Feature',
        'id',           jurisdiction_id,
        'geometry',     ST_AsGeoJSON(jurisdiction_geometry)::jsonb,
        'properties',   to_jsonb(row) - 'jurisdiction_id' - 'jurisdiction_geometry'
    ) AS geojson
FROM (
    SELECT
        jurisdiction_id,
        jurisdiction_name,
        jurisdiction_parent_id,
        jurisdiction_geometry
    FROM plans_materialzied_view
) row;

plans_slice

Simply queries for plans from the plans_materialized_view view. This query might be used to create a chart in superset.

The SQL for this is below:

plans_slice.sql
SELECT id,
       plan_id,
       plan_title,
       plan_status,
       jurisdiction_id,
       plan_fi_status,
       plan_fi_reason,
       plan_intervention_type,
       jurisdiction_parent_id,
       jurisdiction_name,
       jurisdiction_depth,
       jurisdiction_path,
       jurisdiction_name_path
FROM plans_materialzied_view;

get_operational_areas

This is a simple query to get all the operational areas that we care about.

The SQL for this is below:

get_operational_areas.sql
SELECT DISTINCT(jurisdiction_id) AS id
FROM plan_jurisdiction;

upsert_clients

This is a simple query to insert or update clients.

The SQL for this is below:

upsert_clients.sql
INSERT INTO clients
VALUES
(
    '${client_id}',
    '${client_baseEntityId}',
    ${client_dateCreated},
    ${client_dateVoided},
    '${client_firstName}',
    '${client_middleName}',
    '${client_lastName}',
    '${client_gender}',
    ${client_birthdate},
    '${client_identifiers}'::jsonb,
    '${client_attributes}'::jsonb,
    '${client_relationships}'::jsonb,
    '${client_addresses}'::jsonb,
    '${client_residence}',
    '${client_birthdateApprox}',
    '${client_deathdateApprox}',
    ${client_clientApplicationVersion},
    ${client_clientDatabaseVersion},
    ${client_serverVersion}
)
ON CONFLICT (id) DO UPDATE
SET
    id = '${client_id}',
    baseEntityId = '${client_baseEntityId}',
    dateCreated = ${client_dateCreated},
    dateVoided = ${client_dateVoided},
    firstName = '${client_firstName}',
    middleName = '${client_middleName}',
    lastName = '${client_lastName}',
    gender = '${client_gender}',
    birthdate = ${client_birthdate},
    identifiers = '${client_identifiers}'::jsonb,
    attributes = '${client_attributes}'::jsonb,
    relationships = '${client_relationships}'::jsonb,
    addresses = '${client_addresses}'::jsonb,
    residence = '${client_residence}',
    birthdateApprox = '${client_birthdateApprox}',
    deathdateApprox = '${client_deathdateApprox}',
    clientApplicationVersion = ${client_clientApplicationVersion},
    clientDatabaseVersion = ${client_clientDatabaseVersion},
    server_version = ${client_serverVersion};

upsert_plans

This is a simple query to insert or update plans.

The SQL for this is below:

upsert_plans.sql
INSERT INTO plans (
    identifier,
    version,
    name,
    title,
    status,
    date,
    effective_period_start,
    effective_period_end
)
VALUES (
    '${identifier}',
    '${version}',
    '${name}',
    '${title}',
    '${status}',
    '${date}',
    '${effectivePeriod_start}',
    '${effectivePeriod_end}'
)
ON CONFLICT (identifier) DO UPDATE
SET
    version = '${version}',
    name = '${name}',
    title = '${title}',
    status = '${status}',
    date = '${date}',
    effective_period_start = '${effectivePeriod_start}',
    effective_period_end = '${effectivePeriod_end}';

ERD

Entity Relationship Diagram