Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Let workspace private and personal by default for all user #484

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions lib/chatbot-api/functions/api-handler/routes/documents.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import genai_core.types
import genai_core.upload
import genai_core.documents
import genai_core.workspaces
from pydantic import BaseModel
from aws_lambda_powertools import Logger, Tracer
from aws_lambda_powertools.event_handler.appsync import Router
Expand Down Expand Up @@ -103,6 +104,14 @@ class DocumentSubscriptionStatusRequest(BaseModel):
@router.resolver(field_name="getUploadFileURL")
@tracer.capture_method
def file_upload(input: dict):

#Check policy for writable permission
user_id = genai_core.auth.get_user_id(router)
is_workspace_readable = genai_core.workspaces.is_workspace_readable(request.workspaceId, user_id)

if is_workspace_readable == False:
raise genai_core.types.CommonError("Due security policy you are not allowed to upload document into workspace.")

request = FileUploadRequest(**input)
_, extension = os.path.splitext(request.fileName)
if extension not in allowed_extensions:
Expand Down
77 changes: 61 additions & 16 deletions lib/chatbot-api/functions/api-handler/routes/workspaces.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ class GenericCreateWorkspaceRequest(BaseModel):
class CreateWorkspaceAuroraRequest(BaseModel):
kind: str
name: str
isPublic: bool
embeddingsModelProvider: str
embeddingsModelName: str
crossEncoderModelProvider: str
Expand All @@ -37,6 +38,7 @@ class CreateWorkspaceAuroraRequest(BaseModel):
class CreateWorkspaceOpenSearchRequest(BaseModel):
kind: str
name: str
isPublic: bool
embeddingsModelProvider: str
embeddingsModelName: str
crossEncoderModelProvider: str
Expand All @@ -51,14 +53,29 @@ class CreateWorkspaceOpenSearchRequest(BaseModel):
class CreateWorkspaceKendraRequest(BaseModel):
kind: str
name: str
isPublic: bool
kendraIndexId: str
useAllData: bool


@router.resolver(field_name="listWorkspaces")
@tracer.capture_method
def list_workspaces():
workspaces = genai_core.workspaces.list_workspaces()

user_id = genai_core.auth.get_user_id(router)

#Get Public workspace
publicWorkspace = genai_core.workspaces.list_workspaces_by_user(0)

#Get workspace affected to current user through policy
workspacesByUser = genai_core.workspaces.list_workspaces_by_user(user_id)

workspaces = []
processedItem = []
for workspaceItem in (workspacesByUser + publicWorkspace):
if not workspaceItem['workspace_id'] in processedItem:
workspaces.append(workspaceItem)
processedItem.append(workspaceItem['workspace_id'])

ret_value = [_convert_workspace(workspace) for workspace in workspaces]

Expand All @@ -68,11 +85,24 @@ def list_workspaces():
@router.resolver(field_name="getWorkspace")
@tracer.capture_method
def get_workspace(workspaceId: str):

user_id = genai_core.auth.get_user_id(router)
#Check policy before to fetch a data of workspace
workspace_policy = genai_core.workspaces.is_workspace_readable(workspaceId, user_id)

if not workspace_policy:
return None

workspace = genai_core.workspaces.get_workspace(workspaceId)

if not workspace:
return None

key_policies = ['is_owner', 'is_writable']

for key_policy in key_policies:
workspace[key_policy] = workspace_policy[key_policy]

ret_value = _convert_workspace(workspace)

return ret_value
Expand Down Expand Up @@ -116,6 +146,7 @@ def create_kendra_workspace(input: dict):


def _create_workspace_aurora(request: CreateWorkspaceAuroraRequest, config: dict):
user_id = genai_core.auth.get_user_id(router)
workspace_name = request.name.strip()
embedding_models = config["rag"]["embeddingsModels"]
cross_encoder_models = config["rag"]["crossEncoderModels"]
Expand Down Expand Up @@ -170,28 +201,32 @@ def _create_workspace_aurora(request: CreateWorkspaceAuroraRequest, config: dict
if request.chunkOverlap < 0 or request.chunkOverlap >= request.chunkSize:
raise genai_core.types.CommonError("Invalid chunk overlap")

item = genai_core.workspaces.create_workspace_aurora(
workspace_name=workspace_name,
is_public=request.isPublic,
embeddings_model_provider=request.embeddingsModelProvider,
embeddings_model_name=request.embeddingsModelName,
embeddings_model_dimensions=embeddings_model_dimensions,
cross_encoder_model_provider=request.crossEncoderModelProvider,
cross_encoder_model_name=request.crossEncoderModelName,
languages=request.languages,
metric=request.metric,
has_index=request.index,
hybrid_search=request.hybridSearch,
chunking_strategy=request.chunkingStrategy,
chunk_size=request.chunkSize,
chunk_overlap=request.chunkOverlap,
creator_id=user_id,
)
return _convert_workspace(
genai_core.workspaces.create_workspace_aurora(
workspace_name=workspace_name,
embeddings_model_provider=request.embeddingsModelProvider,
embeddings_model_name=request.embeddingsModelName,
embeddings_model_dimensions=embeddings_model_dimensions,
cross_encoder_model_provider=request.crossEncoderModelProvider,
cross_encoder_model_name=request.crossEncoderModelName,
languages=request.languages,
metric=request.metric,
has_index=request.index,
hybrid_search=request.hybridSearch,
chunking_strategy=request.chunkingStrategy,
chunk_size=request.chunkSize,
chunk_overlap=request.chunkOverlap,
)
get_workspace(item.workspace_id)
)


def _create_workspace_open_search(
request: CreateWorkspaceOpenSearchRequest, config: dict
):
user_id = genai_core.auth.get_user_id(router)
workspace_name = request.name.strip()
embedding_models = config["rag"]["embeddingsModels"]
cross_encoder_models = config["rag"]["crossEncoderModels"]
Expand Down Expand Up @@ -246,6 +281,7 @@ def _create_workspace_open_search(
return _convert_workspace(
genai_core.workspaces.create_workspace_open_search(
workspace_name=workspace_name,
is_public=request.isPublic,
embeddings_model_provider=request.embeddingsModelProvider,
embeddings_model_name=request.embeddingsModelName,
embeddings_model_dimensions=embeddings_model_dimensions,
Expand All @@ -256,11 +292,14 @@ def _create_workspace_open_search(
chunking_strategy=request.chunkingStrategy,
chunk_size=request.chunkSize,
chunk_overlap=request.chunkOverlap,
creator_id=user_id,
)
)


def _create_workspace_kendra(request: CreateWorkspaceKendraRequest, config: dict):

user_id = genai_core.auth.get_user_id(router)
workspace_name = request.name.strip()
kendra_indexes = genai_core.kendra.get_kendra_indexes()

Expand All @@ -285,8 +324,11 @@ def _create_workspace_kendra(request: CreateWorkspaceKendraRequest, config: dict
return _convert_workspace(
genai_core.workspaces.create_workspace_kendra(
workspace_name=workspace_name,
is_public=request.isPublic,
kendra_index=kendra_index,
use_all_data=request.useAllData,
creator_id=user_id

)
)

Expand All @@ -297,6 +339,7 @@ def _convert_workspace(workspace: dict):
return {
"id": workspace["workspace_id"],
"name": workspace["name"],
"isPublic": workspace["is_public"],
"engine": workspace["engine"],
"status": workspace["status"],
"languages": workspace.get("languages"),
Expand All @@ -322,4 +365,6 @@ def _convert_workspace(workspace: dict):
"kendraUseAllData": workspace.get("kendra_use_all_data", kendra_index_external),
"createdAt": workspace.get("created_at"),
"updatedAt": workspace.get("updated_at"),
"is_writable": workspace.get("is_writable"),
"is_owner": workspace.get("is_owner")
}
8 changes: 8 additions & 0 deletions lib/chatbot-api/rest-api.ts
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,10 @@ export class ApiResolvers extends Construct {
props.ragEngines?.workspacesTable.tableName ?? "",
WORKSPACES_BY_OBJECT_TYPE_INDEX_NAME:
props.ragEngines?.workspacesByObjectTypeIndexName ?? "",
WORKSPACES_POLICY_TABLE_NAME:
props.ragEngines?.workspacesPolicyTable.tableName ?? "",
WORKSPACES_POLICY_BY_WORKSPACE_ID_INDEX_NAME:
props.ragEngines?.workspacesPolicyByWorkspaceIdIndexName ?? "",
DOCUMENTS_TABLE_NAME:
props.ragEngines?.documentsTable.tableName ?? "",
DOCUMENTS_BY_COMPOUND_KEY_INDEX_NAME:
Expand Down Expand Up @@ -128,6 +132,10 @@ export class ApiResolvers extends Construct {
);
}

if (props.ragEngines?.workspacesPolicyTable) {
props.ragEngines.workspacesPolicyTable.grantReadWriteData(apiHandler);
}

if (props.ragEngines?.auroraPgVector) {
props.ragEngines.auroraPgVector.database.secret?.grantRead(apiHandler);
props.ragEngines.auroraPgVector.database.connections.allowDefaultPortFrom(
Expand Down
6 changes: 6 additions & 0 deletions lib/chatbot-api/schema/schema.graphql
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
input CreateWorkspaceAuroraInput {
name: String!
kind: String!
isPublic: Boolean!
embeddingsModelProvider: String!
embeddingsModelName: String!
crossEncoderModelProvider: String!
Expand All @@ -19,13 +20,15 @@ input CreateWorkspaceAuroraInput {
input CreateWorkspaceKendraInput {
name: String!
kind: String!
isPublic: Boolean!
kendraIndexId: String!
useAllData: Boolean!
}

input CreateWorkspaceOpenSearchInput {
name: String!
kind: String!
isPublic: Boolean!
embeddingsModelProvider: String!
embeddingsModelName: String!
crossEncoderModelProvider: String!
Expand Down Expand Up @@ -266,6 +269,7 @@ input WebsiteInput {
type Workspace @aws_cognito_user_pools {
id: String!
name: String!
isPublic: Boolean
formatVersion: Int
engine: String!
status: String
Expand All @@ -291,6 +295,8 @@ type Workspace @aws_cognito_user_pools {
kendraUseAllData: Boolean
createdAt: AWSDateTime!
updatedAt: AWSDateTime!
is_writable: Boolean
is_owner: Boolean
}

type Channel @aws_iam @aws_cognito_user_pools {
Expand Down
4 changes: 4 additions & 0 deletions lib/rag-engines/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,8 @@ export class RagEngines extends Construct {
public readonly documentsTable: dynamodb.Table;
public readonly workspacesTable: dynamodb.Table;
public readonly workspacesByObjectTypeIndexName: string;
public readonly workspacesPolicyTable: dynamodb.Table;
public readonly workspacesPolicyByWorkspaceIdIndexName: string;
public readonly documentsByCompountKeyIndexName: string;
public readonly documentsByStatusIndexName: string;
public readonly fileImportWorkflow?: sfn.StateMachine;
Expand Down Expand Up @@ -110,6 +112,8 @@ export class RagEngines extends Construct {
this.processingBucket = dataImport.processingBucket;
this.workspacesTable = tables.workspacesTable;
this.documentsTable = tables.documentsTable;
this.workspacesPolicyTable = tables.workspacesPolicyTable;
this.workspacesPolicyByWorkspaceIdIndexName = tables.workspacesPolicyByWorkspaceIdIndexName;
this.workspacesByObjectTypeIndexName =
tables.workspacesByObjectTypeIndexName;
this.documentsByCompountKeyIndexName =
Expand Down
27 changes: 27 additions & 0 deletions lib/rag-engines/rag-dynamodb-tables/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,12 @@ import { Construct } from "constructs";

export class RagDynamoDBTables extends Construct {
public readonly workspacesTable: dynamodb.Table;
public readonly workspacesPolicyTable: dynamodb.Table;
public readonly documentsTable: dynamodb.Table;
public readonly workspacesByObjectTypeIndexName: string =
"by_object_type_idx";
public readonly workspacesPolicyByWorkspaceIdIndexName: string =
"by_workspace_idx";
public readonly documentsByCompoundKeyIndexName: string =
"by_compound_key_idx";
public readonly documentsByStatusIndexName: string = "by_status_idx";
Expand Down Expand Up @@ -41,6 +44,29 @@ export class RagDynamoDBTables extends Construct {
},
});

const workspacesPolicyTable = new dynamodb.Table(this, "WorkspacesPolicy", {
partitionKey: {
name: "pk",
type: dynamodb.AttributeType.STRING,
},
sortKey: {
name: "sk",
type: dynamodb.AttributeType.STRING,
},
billingMode: dynamodb.BillingMode.PAY_PER_REQUEST,
encryption: dynamodb.TableEncryption.AWS_MANAGED,
removalPolicy: cdk.RemovalPolicy.DESTROY,
pointInTimeRecovery: true,
});

workspacesPolicyTable.addGlobalSecondaryIndex({
indexName: this.workspacesPolicyByWorkspaceIdIndexName,
partitionKey: {
name: "workspace_id",
type: dynamodb.AttributeType.STRING,
}
});

const documentsTable = new dynamodb.Table(this, "Documents", {
partitionKey: {
name: "workspace_id",
Expand Down Expand Up @@ -81,6 +107,7 @@ export class RagDynamoDBTables extends Construct {
});

this.workspacesTable = workspacesTable;
this.workspacesPolicyTable = workspacesPolicyTable;
this.documentsTable = documentsTable;
}
}
18 changes: 18 additions & 0 deletions lib/shared/layers/python-sdk/python/genai_core/aurora/delete.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,12 @@
import genai_core.utils.delete_files_with_prefix
from psycopg2 import sql
from genai_core.aurora.connection import AuroraConnection
import genai_core.workspaces

PROCESSING_BUCKET_NAME = os.environ["PROCESSING_BUCKET_NAME"]
UPLOAD_BUCKET_NAME = os.environ["UPLOAD_BUCKET_NAME"]
WORKSPACES_TABLE_NAME = os.environ["WORKSPACES_TABLE_NAME"]
WORKSPACES_POLICY_TABLE_NAME = os.environ["WORKSPACES_POLICY_TABLE_NAME"]
DOCUMENTS_TABLE_NAME = os.environ.get("DOCUMENTS_TABLE_NAME")

WORKSPACE_OBJECT_TYPE = "workspace"
Expand All @@ -30,6 +32,7 @@ def delete_aurora_workspace(workspace: dict):
)

workspaces_table = dynamodb.Table(WORKSPACES_TABLE_NAME)
documents_policy_table = dynamodb.Table(WORKSPACES_POLICY_TABLE_NAME)
documents_table = dynamodb.Table(DOCUMENTS_TABLE_NAME)

items_to_delete = []
Expand Down Expand Up @@ -68,3 +71,18 @@ def delete_aurora_workspace(workspace: dict):
)

print(f"Delete Item succeeded: {response}")

# Delete all workspace policy related to the current deletion workspace
items_policy_to_delete = genai_core.workspaces.list_policy_workspace_by_id(workspace_id)
# Batch delete in groups of 25
for i in range(0, len(items_policy_to_delete), 25):
with documents_policy_table.batch_writer() as batch:
for item in items_policy_to_delete[i : i + 25]:
batch.delete_item(
Key={
"pk": item["pk"],
"sk": item["sk"],
}
)

print(f"Deleted {len(items_policy_to_delete)} policy items.")
Loading
Loading