From 50593d42aecfcbabcf5b40ab4fbc899f95dc36d5 Mon Sep 17 00:00:00 2001
From: Aimee Stewart <zzeppozz@gmail.com>
Date: Thu, 12 Sep 2024 16:01:45 -0500
Subject: [PATCH] testing automation; unfinished

---
 _sphinx_config/pages/aws/aws_setup.rst |  21 ++++
 _sphinx_config/pages/aws/roles.rst     |  17 ++-
 aws/events/bison_mount_gbif_lambda.py  |  53 +++------
 aws/events/bison_query_lambda.py       | 119 +++++++++++++++++++
 aws/events/bison_subset_gbif_lambda.py | 158 ++++++-------------------
 aws/redshift/load_ancillary_data.sql   |  10 +-
 aws/redshift/queries.sql               |  21 ++++
 aws/redshift/subset_to_bison.sql       |  39 +++---
 8 files changed, 255 insertions(+), 183 deletions(-)
 create mode 100644 aws/events/bison_query_lambda.py
 create mode 100644 aws/redshift/queries.sql

diff --git a/_sphinx_config/pages/aws/aws_setup.rst b/_sphinx_config/pages/aws/aws_setup.rst
index c286d3d1..986cc305 100644
--- a/_sphinx_config/pages/aws/aws_setup.rst
+++ b/_sphinx_config/pages/aws/aws_setup.rst
@@ -13,14 +13,35 @@ The :ref:`_bison_redshift_s3_role` allows Redshift to access public S3 data and
 the bison S3 bucket, and allows Redshift to perform glue functions. Its trust
 relationship grants AssumeRole to redshift service.
 
+Make sure that the same role granted to the namespace is used for creating an external
+schema and lambda functions.  When mounting external data as a redshift table to the
+external schema, you may encounter an error indicating that the "dev" database does not
+exist.  This refers to the external database, and may indicate that the role used by the
+command and/or namespace differs from the role granted to the schema upon creation.
+
 Redshift Namespace and Workgroup
 ===========================================================
 
+Namespace and Workgroup
+------------------------------
+
 A namespace is storage-related, with database objects and users.  A workspace is
 a collection of compute resources such as security groups and other properties and
 limitations.
 https://docs.aws.amazon.com/redshift/latest/mgmt/serverless-workgroup-namespace.html
 
+External Schema
+------------------------
+The command below creates an external schema, redshift_spectrum, and also creates a
+**new** external database "dev".  It appears in the console to be the same "dev"
+database that contains the public schema, but it is separate.  Also note the IAM role
+used to create the schema must match the role attached to the namespace::
+
+    CREATE external schema redshift_spectrum
+        FROM data catalog
+        DATABASE dev
+        IAM_ROLE 'arn:aws:iam::321942852011:role/bison_redshift_s3_role'
+        CREATE external database if NOT exists;
 
 EC2 instance creation
 ===========================================================
diff --git a/_sphinx_config/pages/aws/roles.rst b/_sphinx_config/pages/aws/roles.rst
index 9a088892..3308d989 100644
--- a/_sphinx_config/pages/aws/roles.rst
+++ b/_sphinx_config/pages/aws/roles.rst
@@ -13,15 +13,30 @@ bison_redshift_s3_role
 
 * Policies:
 
+  * AmazonRedshiftAllCommandsFullAccess (AWS managed)
+  * AmazonRedshiftDataFullAccess (AWS managed)
+  * AmazonRedshiftFullAccess (AWS managed)
   * bison_s3_policy (read public/GBIF S3 data and read/write bison S3 data)
   * redshift_glue_policy.json (for Redshift interactions)
-  * AmazonRedshiftAllCommandsFullAccess (AWS managed)
+
   * AmazonS3FullAccess (AWS managed)
 
 * Trust policy:
 
   *
 
+bison_redshift_lambda_role
+------------------------
+
+Attach to BISON lambda functions
+
+  * AmazonRedshiftAllCommandsFullAccess (AWS managed)
+  * AmazonRedshiftDataFullAccess (AWS managed)
+  * AmazonRedshiftFullAccess (AWS managed)
+  * bison_lambda_log_policy (write CloudWatch logs to bison log groups)
+    TODO: add new log group for each lambda function
+  * bison_s3_policy (read public/GBIF S3 data and read/write bison S3 data)
+
 .. _bison_ec2_s3_role:
 
 bison_ec2_s3_role
diff --git a/aws/events/bison_mount_gbif_lambda.py b/aws/events/bison_mount_gbif_lambda.py
index e2f0c200..1bd673d4 100644
--- a/aws/events/bison_mount_gbif_lambda.py
+++ b/aws/events/bison_mount_gbif_lambda.py
@@ -1,6 +1,5 @@
 import json
 import boto3
-import botocore
 import botocore.session as bc
 from botocore.client import Config
 from datetime import datetime
@@ -13,6 +12,8 @@
 database = "dev"
 dbuser = "IAM:aimee.stewart"
 dbuser = "arn:aws:iam::321942852011:role/service-role/bison_subset_gbif_lambda-role-9i5qvpux"
+pub_schema = "public"
+external_schema = "redshift_spectrum"
 timeout = 900
 waittime = 5
 
@@ -21,8 +22,6 @@
 gbif_datestr = f"{datetime.now().year}-{datetime.now().month:02d}-01"
 parquet_key = f"occurrence/{gbif_datestr}/occurrence.parquet"
 bison_datestr = gbif_datestr.replace("-", "_")
-pub_schema = "public"
-external_schema = "redshift_spectrum"
 
 gbif_odr_data = f"s3://{gbif_bucket}/{parquet_key}/"
 mounted_gbif_name = f"{external_schema}.occurrence_{bison_datestr}_parquet"
@@ -103,20 +102,6 @@ class	VARCHAR(max),
             ('HUMAN_OBSERVATION', 'OBSERVATION', 'OCCURRENCE', 'PRESERVED_SPECIMEN');
 """
 
-list_external_tables_stmt = f"""
-    SELECT reloid AS tableid, nspname as schemaname, relname as tablename, relcreationtime
-    FROM pg_class_info cls LEFT JOIN pg_namespace ns ON cls.relnamespace=ns.oid
-    WHERE cls.relnamespace = ns.oid
-      AND schemaname = '{external_schema}';
-"""
-
-list_public_tables_stmt = f"""
-    SELECT reloid AS tableid, nspname as schemaname, relname as tablename, relcreationtime
-    FROM pg_class_info cls LEFT JOIN pg_namespace ns ON cls.relnamespace=ns.oid
-    WHERE cls.relnamespace = ns.oid
-      AND schemaname = '{pub_schema}';
-"""
-
 count_gbif_stmt = f"SELECT COUNT(*) from {mounted_gbif_name};"
 count_bison_stmt = f"SELECT COUNT(*) FROM {subset_bison_name};"
 unmount_stmt = f"DROP TABLE {mounted_gbif_name};"
@@ -145,8 +130,8 @@ def lambda_handler(event, context):
     # Submit query request
     try:
         submit_result = client_redshift.execute_statement(
-            WorkgroupName=workgroup, Database=database, Sql=list_public_tables_stmt)
-        print(f"*** {mounted_gbif_name} mount successfully executed")
+            WorkgroupName=workgroup, Database=database, Sql=mount_stmt)
+        print(f"*** Mount command submitted")
 
     except Exception as e:
         raise Exception(e)
@@ -157,7 +142,7 @@ def lambda_handler(event, context):
         print(f"***     {k} = {v}")
 
     # -------------------------------------
-    # Loop til complete
+    # Loop til complete, then describe result
     elapsed_time = 0
     complete = False
     while not complete and elapsed_time < 300:
@@ -178,20 +163,20 @@ def lambda_handler(event, context):
             print(f"Failed to describe_statement {e}")
             complete = True
 
-    # # -------------------------------------
-    # # Get statement output for query
-    try:
-        stmt_result = client_redshift.get_statement_result(Id=submit_id)
-    except Exception as e:
-        print(f"Failed to get_statement_result {e}")
-    else:
-        print("*** statement_result records")
-        try:
-            records = stmt_result["Records"]
-            for rec in records:
-                print(f"***     {rec}")
-        except Exception as e:
-            print(f"Failed to return records ({e})")
+    # # # -------------------------------------
+    # # # IFF query, get statement output
+    # try:
+    #     stmt_result = client_redshift.get_statement_result(Id=submit_id)
+    # except Exception as e:
+    #     print(f"*** No get_statement_result {e}")
+    # else:
+    #     print("*** get_statement_result records")
+    #     try:
+    #         records = stmt_result["Records"]
+    #         for rec in records:
+    #             print(f"***     {rec}")
+    #     except Exception as e:
+    #         print(f"Failed to return records ({e})")
 
     return {
         'statusCode': 200,
diff --git a/aws/events/bison_query_lambda.py b/aws/events/bison_query_lambda.py
new file mode 100644
index 00000000..bf465662
--- /dev/null
+++ b/aws/events/bison_query_lambda.py
@@ -0,0 +1,119 @@
+import json
+import boto3
+import botocore.session as bc
+from botocore.client import Config
+from datetime import datetime
+import time
+
+print('Loading function')
+
+region = "us-east-1"
+workgroup = "bison"
+database = "dev"
+dbuser = "IAM:aimee.stewart"
+dbuser = "arn:aws:iam::321942852011:role/service-role/bison_subset_gbif_lambda-role-9i5qvpux"
+dbuser = "arn:aws:iam::321942852011:role/service-role/bison_subset_gbif_lambda-role-9i5qvpux"
+bison_bucket = 'bison-321942852011-us-east-1'
+timeout = 900
+waittime = 2
+
+# Define the public bucket and file to query
+gbif_bucket = f"gbif-open-data-{region}"
+gbif_datestr = f"{datetime.now().year}-{datetime.now().month:02d}-01"
+parquet_key = f"occurrence/{gbif_datestr}/occurrence.parquet"
+bison_datestr = gbif_datestr.replace("-", "_")
+pub_schema = "public"
+external_schema = "redshift_spectrum"
+
+gbif_odr_data = f"s3://{gbif_bucket}/{parquet_key}/"
+mounted_gbif_name = f"{external_schema}.occurrence_{bison_datestr}_parquet"
+subset_bison_name = f"{pub_schema}.bison_{bison_datestr}"
+
+list_external_tables_stmt = f"""
+    SELECT reloid AS tableid, nspname as schemaname, relname as tablename, relcreationtime
+    FROM pg_class_info cls LEFT JOIN pg_namespace ns ON cls.relnamespace=ns.oid
+    WHERE cls.relnamespace = ns.oid
+      AND schemaname = '{external_schema}';
+"""
+
+list_public_tables_stmt = f"""
+    SELECT reloid AS tableid, nspname as schemaname, relname as tablename, relcreationtime
+    FROM pg_class_info cls LEFT JOIN pg_namespace ns ON cls.relnamespace=ns.oid
+    WHERE cls.relnamespace = ns.oid
+      AND schemaname = '{pub_schema}';
+"""
+
+count_gbif_stmt = f"SELECT COUNT(*) from {mounted_gbif_name};"
+count_bison_stmt = f"SELECT COUNT(*) FROM {subset_bison_name};"
+
+# Initializing Botocore client
+session = boto3.session.Session()
+bc_session = bc.get_session()
+session = boto3.Session(
+    botocore_session=bc_session,
+    region_name=region
+)
+
+# Initializing Redshift's client
+config = Config(connect_timeout=timeout, read_timeout=timeout)
+client_redshift = session.client("redshift-data", config=config)
+
+# -----------------------------------------------------
+def lambda_handler(event, context):
+    print("*** Entered lambda_handler")
+    # -------------------------------------
+    # Submit query request
+    try:
+        submit_result = client_redshift.execute_statement(
+            WorkgroupName=workgroup, Database=database, Sql=list_public_tables_stmt)
+        print(f"*** Mount command submitted")
+
+    except Exception as e:
+        raise Exception(e)
+
+    submit_id = submit_result['Id']
+    print(f"*** submit id = {submit_id}")
+    for k, v in submit_result.items():
+        print(f"***     {k} = {v}")
+
+    # -------------------------------------
+    # Loop til complete, then describe result
+    elapsed_time = 0
+    complete = False
+    while not complete and elapsed_time < 300:
+        try:
+            describe_result = client_redshift.describe_statement(Id=submit_id)
+            status = describe_result["Status"]
+            print(f"*** Query Status - {status} after {elapsed_time} seconds")
+            if status in ("ABORTED", "FAILED", "FINISHED"):
+                complete = True
+                desc_id = describe_result['Id']
+                print(f"*** desc id = {desc_id}")
+                for k, v in describe_result.items():
+                    print(f"***    {k} = {v}")
+            else:
+                time.sleep(waittime)
+                elapsed_time += waittime
+        except Exception as e:
+            print(f"Failed to describe_statement {e}")
+            complete = True
+
+    # -------------------------------------
+    # IFF query, get statement output
+    try:
+        stmt_result = client_redshift.get_statement_result(Id=submit_id)
+    except Exception as e:
+        print(f"*** No get_statement_result {e}")
+    else:
+        print("*** get_statement_result records")
+        try:
+            records = stmt_result["Records"]
+            for rec in records:
+                print(f"***     {rec}")
+        except Exception as e:
+            print(f"Failed to return records ({e})")
+
+    return {
+        'statusCode': 200,
+        'body': json.dumps(f"Lambda result logged")
+    }
diff --git a/aws/events/bison_subset_gbif_lambda.py b/aws/events/bison_subset_gbif_lambda.py
index 85106bac..10c68f65 100644
--- a/aws/events/bison_subset_gbif_lambda.py
+++ b/aws/events/bison_subset_gbif_lambda.py
@@ -1,10 +1,8 @@
-import os
-import json
 import boto3
-import botocore
 import botocore.session as bc
 from botocore.client import Config
 from datetime import datetime
+import time
 
 print('Loading function')
 
@@ -12,16 +10,18 @@
 workgroup = "bison"
 database = "dev"
 dbuser = "IAM:aimee.stewart"
-dbuser = "arn:aws:iam::321942852011:role/service-role/bison_subset_gbif_lambda-role-9i5qvpux"
+dbuser = "arn:aws:iam::321942852011:role/service-role/bison_redshift_lambda_role"
 timeout = 900
+waittime = 2
+bison_bucket = 'bison-321942852011-us-east-1'
+pub_schema = "public"
+external_schema = "redshift_spectrum"
 
 # Define the public bucket and file to query
 gbif_bucket = f"gbif-open-data-{region}"
 gbif_datestr = f"{datetime.now().year}-{datetime.now().month:02d}-01"
 parquet_key = f"occurrence/{gbif_datestr}/occurrence.parquet"
 bison_datestr = gbif_datestr.replace("-", "_")
-pub_schema = "public"
-external_schema = "redshift_spectrum"
 
 gbif_odr_data = f"s3://{gbif_bucket}/{parquet_key}/"
 mounted_gbif_name = f"{external_schema}.occurrence_{bison_datestr}_parquet"
@@ -102,30 +102,11 @@ class	VARCHAR(max),
             ('HUMAN_OBSERVATION', 'OBSERVATION', 'OCCURRENCE', 'PRESERVED_SPECIMEN');
 """
 
-
-list_external_tables_stmt = f"""
-    SELECT reloid AS tableid, nspname as schemaname, relname as tablename, relcreationtime
-    FROM pg_class_info cls LEFT JOIN pg_namespace ns ON cls.relnamespace=ns.oid
-    WHERE cls.relnamespace = ns.oid
-      AND schemaname = '{external_schema}';
-"""
-
-list_public_tables_stmt = f"""
-    SELECT reloid AS tableid, nspname as schemaname, relname as tablename, relcreationtime
-    FROM pg_class_info cls LEFT JOIN pg_namespace ns ON cls.relnamespace=ns.oid
-    WHERE cls.relnamespace = ns.oid
-      AND schemaname = '{pub_schema}';
-"""
-
 count_gbif_stmt = f"SELECT COUNT(*) from {mounted_gbif_name};"
 count_bison_stmt = f"SELECT COUNT(*) FROM {subset_bison_name};"
 unmount_stmt = f"DROP TABLE {mounted_gbif_name};"
-bison_bucket = 'bison-321942852011-us-east-1'
-test_fname = 'bison_trigger_success.txt'
-test_content = 'Success = True'
 
 session = boto3.session.Session()
-region = "us-east-1"
 
 # Initializing Botocore client
 bc_session = bc.get_session()
@@ -144,112 +125,41 @@ def lambda_handler(event, context):
     # -------------------------------------
     # Mount GBIF data
     try:
-        mount_response = client_redshift.execute_statement(
+        submit_result = client_redshift.execute_statement(
             WorkgroupName=workgroup, Database=database, Sql=mount_stmt)
-        print(f"*** {mounted_gbif_name} mount successfully executed")
-
-    except botocore.exceptions.ConnectionError as e:
-        client_redshift_1 = session.client("redshift-data", config=config)
-        mount_response = client_redshift_1.batch_execute_statement(
-            WorkgroupName=workgroup, Database=database, DbUser=dbuser, Sql=mount_stmt)
-        print(f"*** {mounted_gbif_name} mount after reestablishing the connection")
+        print("*** Mount submitted")
 
     except Exception as e:
         raise Exception(e)
 
-    print(str(mount_response))
-    curr_id = mount_response['Id']
+    curr_id = submit_result['Id']
     print(f"*** id = {curr_id}")
     describe_response = client_redshift.describe_statement(Id=curr_id)
     print(str(describe_response))
 
-    # # -------------------------------------
-    # # Mount GBIF data
-    # try:
-    #     mount_response = client_redshift.execute_statement(
-    #         WorkgroupName=workgroup, Database=database, Sql=mount_stmt)
-    #     print(f"*** {mounted_gbif_name} mount successfully executed")
-    #
-    # except botocore.exceptions.ConnectionError as e:
-    #     client_redshift_1 = session.client("redshift-data", config=config)
-    #     mount_response = client_redshift_1.execute_statement(
-    #         WorkgroupName=workgroup, Database=database, DbUser=dbuser, Sql=mount_stmt)
-    #     print(f"*** {mounted_gbif_name} mount after reestablishing the connection")
-    #
-    # except Exception as e:
-    #     raise Exception(e)
-    #
-    # print(str(mount_response))
-    # curr_id = mount_response['Id']
-    # print(f"*** id = {curr_id}")
-    # describe_response = client_redshift.describe_statement(Id=curr_id)
-    # print(str(describe_response))
-    # # -------------------------------------
-    # # Wait for success
-    #
-    # # -------------------------------------
-    # # Count GBIF records
-    # try:
-    #     count_gbif_response = client_redshift.execute_statement(
-    #         WorkgroupName=workgroup, Database=database, Sql=count_gbif_stmt)
-    #     print("*** GBIF count successfully executed")
-    #
-    # except botocore.exceptions.ConnectionError as e:
-    #     client_redshift_1 = session.client("redshift-data", config=config)
-    #     count_gbif_response = client_redshift_1.execute_statement(
-    #         WorkgroupName=workgroup, Database=database, DbUser=dbuser, Sql=mount_stmt)
-    #     print("*** GBIF count after reestablishing the connection")
-    #
-    # except Exception as e:
-    #     raise Exception(e)
-    #
-    # print(str(count_gbif_response))
-    # curr_id = count_gbif_response['Id']
-    # describe_response = client_redshift.describe_statement(Id=curr_id)
-    # print(f"*** id = {curr_id}")
-    # print(str(describe_response))
-    # # -------------------------------------
-    # # Subset GBIF data for BISON
-    # try:
-    #     subset_response = client_redshift.execute_statement(
-    #         WorkgroupName=workgroup, Database=database, Sql=subset_stmt)
-    #     print(f"*** Subset to {subset_bison_name} successfully executed")
-    #
-    # except botocore.exceptions.ConnectionError as e:
-    #     client_redshift_1 = session.client("redshift-data", config=config)
-    #     subset_response = client_redshift_1.execute_statement(
-    #         WorkgroupName=workgroup, Database=database, DbUser=dbuser, Sql=subset_stmt)
-    #     print(f"*** Subset to {subset_bison_name} after reestablishing the connection")
-    #
-    # except Exception as e:
-    #     raise Exception(e)
-    #
-    # print(f"*** id = {subset_response['Id']}")
-    # print(str(subset_response))
-    # # -------------------------------------
-    # # Count BISON records
-    # try:
-    #     count_bison_response = client_redshift.execute_statement(
-    #         WorkgroupName=workgroup, Database=database, DbUser=dbuser, Sql=count_bison_stmt)
-    #     print("*** BISON count successfully executed")
-    #
-    # except botocore.exceptions.ConnectionError as e:
-    #     client_redshift_1 = session.client("redshift-data", config=config)
-    #     count_bison_response = client_redshift_1.execute_statement(
-    #         WorkgroupName=workgroup, Database=database, Sql=count_bison_stmt)
-    #     print("*** BISON count after reestablishing the connection")
-    #
-    # except Exception as e:
-    #     raise Exception(e)
-
-    print(f"*** id = {count_bison_response['Id']}")
-    print(str(count_bison_response))
+    submit_id = submit_result['Id']
+    print(f"*** submit id = {submit_id}")
+    for k, v in submit_result.items():
+        print(f"***     {k} = {v}")
+
     # -------------------------------------
-    # Place test file in bucket to indicate success
-    s3 = boto3.client('s3', region_name=region)
-    s3.put_object(Body=test_content, Bucket=bison_bucket, Key=test_fname)
-
-    return {
-        'statusCode': 200,
-        'body': json.dumps(f"Lambda result: {str(mount_response)}")
-    }
+    # Loop til complete, then describe result
+    elapsed_time = 0
+    complete = False
+    while not complete and elapsed_time < 300:
+        try:
+            describe_result = client_redshift.describe_statement(Id=submit_id)
+            status = describe_result["Status"]
+            print(f"*** Query Status - {status} after {elapsed_time} seconds")
+            if status in ("ABORTED", "FAILED", "FINISHED"):
+                complete = True
+                desc_id = describe_result['Id']
+                print(f"*** desc id = {desc_id}")
+                for k, v in describe_result.items():
+                    print(f"***    {k} = {v}")
+            else:
+                time.sleep(waittime)
+                elapsed_time += waittime
+        except Exception as e:
+            print(f"Failed to describe_statement {e}")
+            complete = True
diff --git a/aws/redshift/load_ancillary_data.sql b/aws/redshift/load_ancillary_data.sql
index e8028c7c..e5834130 100644
--- a/aws/redshift/load_ancillary_data.sql
+++ b/aws/redshift/load_ancillary_data.sql
@@ -47,7 +47,7 @@ CREATE TABLE riisv2_2024_08_01 (
 );
 
 COPY riisv2_2024_08_01
-FROM 's3://bison-321942852011-us-east-1/input_data/USRIISv2_MasterList_annotated_2024_08_01.csv'
+FROM 's3://bison-321942852011-us-east-1/input/USRIISv2_MasterList_annotated_2024_08_01.csv'
 FORMAT CSV
 IAM_role DEFAULT;
 
@@ -75,7 +75,7 @@ CREATE TABLE aiannh2023 (
 );
 
 COPY aiannh2023
-FROM 's3://bison-321942852011-us-east-1/input_data/cb_2023_us_aiannh_500k.shp'
+FROM 's3://bison-321942852011-us-east-1/input/cb_2023_us_aiannh_500k.shp'
 FORMAT SHAPEFILE
 IAM_role DEFAULT;
 
@@ -103,7 +103,7 @@ CREATE TABLE county2023 (
 );
 
 COPY county2023
-FROM 's3://bison-321942852011-us-east-1/input_data/cb_2023_us_county_500k.shp'
+FROM 's3://bison-321942852011-us-east-1/input/cb_2023_us_county_500k.shp'
 FORMAT SHAPEFILE
 IAM_role DEFAULT;
 
@@ -126,13 +126,11 @@ CREATE TABLE pad1 (
    GIS_Acres VARCHAR(max)
 );
 
-COPY pad1 FROM 's3://bison-321942852011-us-east-1/input_data/pad_4.0_gap1_4326.shp'
+COPY pad1 FROM 's3://bison-321942852011-us-east-1/input/pad_4.0_gap1_4326.shp'
 FORMAT SHAPEFILE
 SIMPLIFY AUTO
 IAM_role DEFAULT;
 
-SELECT query_id, start_time, line_number, column_name, column_type, error_message
-    FROM sys_load_error_detail ORDER BY start_time DESC;
 
 ---- -------------------------------------------------------------------------------------
 ---- -------------------------------------------------------------------------------------
diff --git a/aws/redshift/queries.sql b/aws/redshift/queries.sql
new file mode 100644
index 00000000..78386319
--- /dev/null
+++ b/aws/redshift/queries.sql
@@ -0,0 +1,21 @@
+-- Get last error message
+SELECT query_id, start_time, line_number, column_name, column_type, error_message
+    FROM sys_load_error_detail ORDER BY start_time DESC;
+
+-- Count post-data load
+SELECT COUNT(*) from dev.redshift_spectrum.occurrence_2024_09_01_parquet;
+SELECT COUNT(*) FROM public.bison_2024_09_01;
+
+-- List Redshift tables and creation times
+SELECT reloid AS tableid, nspname as schemaname, relname as tablename, relcreationtime
+FROM pg_class_info cls LEFT JOIN pg_namespace ns ON cls.relnamespace=ns.oid
+WHERE cls.relnamespace = ns.oid
+  AND schemaname = 'public';
+
+
+SELECT * FROM svv_all_schemas WHERE database_name = 'dev'
+ORDER BY database_name, SCHEMA_NAME;
+
+select current_user;
+SELECT * FROM PG_USER_INFO;
+
diff --git a/aws/redshift/subset_to_bison.sql b/aws/redshift/subset_to_bison.sql
index 6799188b..009d12b8 100644
--- a/aws/redshift/subset_to_bison.sql
+++ b/aws/redshift/subset_to_bison.sql
@@ -1,3 +1,4 @@
+
 -- Mount S3 GBIF Open Data Registry as an external table, then subset it for BISON
 
 -------------------
@@ -10,14 +11,28 @@
 -- Mount GBIF
 -- -------------------------------------------------------------------------------------
 -- Create a schema for mounting external data
--- Throws error if pre-existing
-CREATE external schema redshift_spectrum
+-- Throws error if pre-existing??
+-- This also creates a new external database "dev", though it appears in the console to
+--    be the same "dev" database that contains the public schema.
+DROP EXTERNAL SCHEMA redshift_spectrum;
+CREATE EXTERNAL SCHEMA IF NOT EXISTS redshift_spectrum
     FROM data catalog
-    DATABASE dev
-    IAM_ROLE 'arn:aws:iam::321942852011:role/service-role/AmazonRedshift-CommandsAccessRole-20231129T105842'
+    DATABASE 'dev'
+    -- Same role as namespace
+    IAM_ROLE 'arn:aws:iam::321942852011:role/bison_redshift_s3_role'
     CREATE external database if NOT exists;
 
+-- If change IAM role, do this:
+--GRANT USAGE TO redshift_spectrum to "IAMR:bison_subset_gbif_lambda-role-9i5qvpux";
+GRANT ALL ON ALL TABLES IN SCHEMA redshift_spectrum
+    TO ROLE 'arn:aws:iam::321942852011:role/service-role/bison_subset_gbif_lambda-role-9i5qvpux';
+
+
+
 -- Mount a table of current GBIF ODR data in S3
+-- An error indicating that the "dev" database does not exist, refers to the external
+--    database, and may indicate that the role used by the command and/or namespace
+--    differs from the role granted to the schema upon creation.
 CREATE EXTERNAL TABLE redshift_spectrum.occurrence_2024_09_01_parquet (
     gbifid	VARCHAR(max),
     datasetkey	VARCHAR(max),
@@ -73,11 +88,12 @@ CREATE EXTERNAL TABLE redshift_spectrum.occurrence_2024_09_01_parquet (
     STORED AS PARQUET
     LOCATION 's3://gbif-open-data-us-east-1/occurrence/2024-09-01/occurrence.parquet/';
 
+
 -- -------------------------------------------------------------------------------------
 -- Subset for BISON
 -- -------------------------------------------------------------------------------------
 -- Drop previous table;
-DROP TABLE IF EXISTS public.bison_2024_07_01;
+DROP TABLE IF EXISTS public.bison_2024_08_01;
 -- Create a BISON table with a subset of records and subset of fields
 -- TODO: This includes lat/lon, allowing final export to Parquet after deleting geom
 CREATE TABLE public.bison_2024_09_01 AS
@@ -98,19 +114,6 @@ CREATE TABLE public.bison_2024_09_01 AS
 	  AND basisofrecord IN
 	    ('HUMAN_OBSERVATION', 'OBSERVATION', 'OCCURRENCE', 'PRESERVED_SPECIMEN');
 
--- -------------------------------------------------------------------------------------
--- Misc Queries
--- -------------------------------------------------------------------------------------
--- Count records from full GBIF and BISON subset
-SELECT COUNT(*) from dev.redshift_spectrum.occurrence_2024_09_01_parquet;
-SELECT COUNT(*) FROM public.bison_2024_09_01;
-
--- List Redshift tables and creation times
-SELECT reloid AS tableid, nspname as schemaname, relname as tablename, relcreationtime
-FROM pg_class_info cls LEFT JOIN pg_namespace ns ON cls.relnamespace=ns.oid
-WHERE cls.relnamespace = ns.oid
-  AND schemaname = 'public';
-
 -- -------------------------------------------------------------------------------------
 -- Unmount original GBIF data
 -- -------------------------------------------------------------------------------------