Skip to content

Commit

Permalink
Merge branch 'main' into dvde/add-extra-eclipselink-doc-info
Browse files Browse the repository at this point in the history
  • Loading branch information
eric-maynard authored Oct 21, 2024
2 parents 158cb28 + 49ec2c9 commit 008a329
Show file tree
Hide file tree
Showing 79 changed files with 2,402 additions and 458 deletions.
4 changes: 3 additions & 1 deletion .asf.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -45,12 +45,14 @@ github:
required_status_checks:
# strict means "Require branches to be up to date before merging".
strict: true
# contexts are the names of checks that must pass
# Contexts are the names of checks that must pass. This is the value
# of the job's `name` property if it's present.
contexts:
- markdown-link-check
- build
- regtest
- site
- "Helm tests"

features:
wiki: false
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/check-md-link.yml
Original file line number Diff line number Diff line change
Expand Up @@ -41,5 +41,5 @@ jobs:
with:
use-quiet-mode: 'yes'
config-file: '.github/workflows/check-md-link-config.json'
folder-path: 'regtests, regtests/client/python/docs, regtests/client/python, .github, build-logic, polaris-core, polaris-service, extension, spec, k8, notebooks'
folder-path: 'regtests, regtests/client/python/docs, regtests/client/python, .github, build-logic, polaris-core, polaris-service, extension, spec, k8, getting-started'
file-path: 'CHAT_BYLAWS.md, CODE_OF_CONDUCT.md, CONTRIBUTING.md, README.md SECURITY.md'
101 changes: 101 additions & 0 deletions .github/workflows/helm.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
#

# This workflow uses actions that are not certified by GitHub.
# They are provided by a third-party and are governed by
# separate terms of service, privacy policy, and support
# documentation.

name: Helm tests

on:
push:
branches: [ "main" ]
pull_request:
branches: [ "main" ]

jobs:

helm-tests:
name: Helm tests
runs-on: ubuntu-latest
timeout-minutes: 60

steps:
- name: Checkout
uses: actions/checkout@v4
with:
fetch-depth: 0

- name: Set up Helm
uses: azure/[email protected]
with:
version: 'v3.16.0'

- name: Set up chart-testing
uses: helm/[email protected]

- name: Run chart-testing (list-changed)
id: list-changed
run: |
changed=$(ct list-changed --target-branch ${{ github.event.repository.default_branch }} --chart-dirs helm)
if [[ -n "$changed" ]]; then
echo "changed=true" >> "$GITHUB_OUTPUT"
fi
- name: Run 'helm template' validation
if: steps.list-changed.outputs.changed == 'true'
run: |
cd helm/polaris
for f in values.yaml ci/*.yaml; do
echo "::group::helm template $f"
helm template --debug --namespace polaris-ns --values $f .
echo "::endgroup::"
done
- name: Run chart-testing (lint)
if: steps.list-changed.outputs.changed == 'true'
run: ct lint --target-branch ${{ github.event.repository.default_branch }} --debug --charts ./helm/polaris

- name: Set up Minikube
if: steps.list-changed.outputs.changed == 'true'
uses: medyagh/[email protected]

- name: Docker build
if: steps.list-changed.outputs.changed == 'true'
run: |
eval $(minikube docker-env)
docker build -f ./Dockerfile \
--build-arg ECLIPSELINK=true \
--build-arg ECLIPSELINK_DEPS=com.h2database:h2:2.3.232 \
-t polaris:latest .
- name: Install fixtures
if: steps.list-changed.outputs.changed == 'true'
run: |
kubectl create namespace polaris-ns
kubectl apply --namespace polaris-ns $(find helm/polaris/ci/fixtures -name "*.yaml" -exec echo -n "-f {} " \;)
- name: Run chart-testing (install)
if: steps.list-changed.outputs.changed == 'true'
run: |
ct install --target-branch ${{ github.event.repository.default_branch }} \
--namespace polaris-ns \
--helm-extra-set-args "--set=image.repository=polaris --set=image.tag=latest" \
--debug --charts ./helm/polaris
4 changes: 2 additions & 2 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,8 @@ regtests/output/
/polaris-venv/
/pyproject.toml

# Notebooks
notebooks/.ipynb_checkpoints/
# Notebook Checkpoints
**/.ipynb_checkpoints/

# Metastore
metastore_db/
Expand Down
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -28,13 +28,13 @@ Documentation is available at https://polaris.apache.org, including
[Polaris management API doc](https://polaris.apache.org/index.html#tag/polaris-management-service_other)
and [Apache Iceberg REST API doc](https://polaris.apache.org/index.html#tag/Configuration-API).

Subscribe to the [dev mailing list][dev-list] to join discussions. Check out the [CONTRIBUTING guide](CONTRIBUTING.md)
[Subscribe to the dev mailing list][dev-list-subscribe] to join discussions via email or browse [the archives](https://lists.apache.org/[email protected]). Check out the [CONTRIBUTING guide](CONTRIBUTING.md)
for contribution guidelines.

[![Zulip](https://img.shields.io/badge/Zulip-Chat-blue?color=3d4db3&logo=zulip&style=for-the-badge&logoColor=white)](https://polaris-catalog.zulipchat.com/)
[![Build Status](https://img.shields.io/github/actions/workflow/status/apache/polaris/gradle.yml?branch=main&label=Main%20CI&logo=Github&style=for-the-badge)](https://github.com/apache/polaris/actions/workflows/gradle.yml?query=branch%3Amain)

[dev-list]: mailto:[email protected]
[dev-list-subscribe]: mailto:dev-subscribe@polaris.apache.org

## Building and Running

Expand Down
20 changes: 3 additions & 17 deletions docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ services:
# add aws keys as dropwizard config
JAVA_OPTS: -Ddw.awsAccessKey=$AWS_ACCESS_KEY_ID -Ddw.awsSecretKey=$AWS_SECRET_ACCESS_KEY
volumes:
- credentials:/tmp/credentials/
- ./regtests/credentials:/tmp/credentials/

healthcheck:
test: ["CMD", "curl", "http://localhost:8182/healthcheck"]
Expand Down Expand Up @@ -66,19 +66,5 @@ services:
AWS_CROSS_REGION_BUCKET: $AWS_CROSS_REGION_BUCKET
AWS_ROLE_FOR_CROSS_REGION_BUCKET: $AWS_ROLE_FOR_CROSS_REGION_BUCKET
volumes:
- local_output:/tmp/polaris-regtests/
- credentials:/tmp/credentials/

volumes:
local_output:
driver: local
driver_opts:
o: bind
type: none
device: ./regtests/output
credentials:
driver: local
driver_opts:
o: bind
type: none
device: ./regtests/credentials
- ./regtests/output:/tmp/polaris-regtests/
- ./regtests/credentials:/tmp/credentials/
1 change: 1 addition & 0 deletions docs
45 changes: 45 additions & 0 deletions getting-started/spark/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
<!--
Licensed to the Apache Software Foundation (ASF) under one
or more contributor license agreements. See the NOTICE file
distributed with this work for additional information
regarding copyright ownership. The ASF licenses this file
to you under the Apache License, Version 2.0 (the
"License"); you may not use this file except in compliance
with the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing,
software distributed under the License is distributed on an
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
KIND, either express or implied. See the License for the
specific language governing permissions and limitations
under the License.
-->

# Getting Started with Apache Spark and Apache Polaris

This getting started guide provides a `docker-compose` file to set up [Apache Spark](https://spark.apache.org/) with Apache Polaris. Apache Polaris is configured as an Iceberg REST Catalog in Spark.
A Jupyter notebook is used to run PySpark.

## Run the `docker-compose` file
To start the `docker-compose` file, run this command from the repo's root directory:
```
docker-compose -f getting-started/spark/docker-compose.yml up
```

This will spin up 2 container services
* The `polaris` service for running Apache Polaris using an in-memory metastore
* The `jupyter` service for running Jupyter notebook with PySpark

## Access the Jupyter notebook interface
In the Jupyter notebook container log, look for the URL to access the Jupyter notebook. The url should be in the format, `http://127.0.0.1:8888/lab?token=<token>`.

Open the Jupyter notebook in a browser.
Navigate to [`notebooks/SparkPolaris.ipynb`](http://127.0.0.1:8888/lab/tree/notebooks/SparkPolaris.ipynb) <!-- markdown-link-check-disable-line -->

## Change the Polaris credential
The Polaris service will create a new root crendential on startup, find this credential in the Polaris service log and change the `polaris_credential` variable in the first cell of the jupyter notebook

## Run the Jupyter notebook
You can now run all cells in the notebook or write your own code!
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
services:
polaris:
build:
context: .
context: ../../
network: host
ports:
- "8181:8181"
Expand All @@ -37,8 +37,8 @@ services:
retries: 5
jupyter:
build:
context: .
dockerfile: ./notebooks/Dockerfile
context: ../../ # this is necessary to expose `regtests/` dir to notebooks/Dockerfile
dockerfile: ./getting-started/spark/notebooks/Dockerfile
network: host
ports:
- "8888:8888"
Expand All @@ -49,12 +49,4 @@ services:
AWS_REGION: us-west-2
POLARIS_HOST: polaris
volumes:
- notebooks:/home/jovyan/notebooks

volumes:
notebooks:
driver: local
driver_opts:
o: bind
type: none
device: ./notebooks
- ./notebooks:/home/jovyan/notebooks
File renamed without changes.
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,11 @@
"from polaris.catalog.api_client import ApiClient as CatalogApiClient\n",
"from polaris.catalog.api_client import Configuration as CatalogApiClientConfiguration\n",
"\n",
"client_id = 'b3b6497353b33ea7'\n",
"client_secret = '623a67ee71d75825238e3e269df5cdac' # pragma: allowlist secret\n",
"# (CHANGE ME): This credential changes on every Polaris service restart\n",
"# In the Polaris log, look for the `realm: default-realm root principal credentials:` string\n",
"polaris_credential = '35df9f8a34199df0:101b9d35700032416210ad2d39b1b4e3' # pragma: allowlist secret\n",
"\n",
"client_id, client_secret = polaris_credential.split(\":\")\n",
"client = CatalogApiClient(CatalogApiClientConfiguration(username=client_id,\n",
" password=client_secret,\n",
" host='http://polaris:8181/api/catalog'))\n",
Expand All @@ -42,8 +45,7 @@
"source": [
"# Create our first catalog\n",
"\n",
"* Creates a catalog named `polaris_catalog` that writes to a specified location in S3.\n",
"* An AWS IAM role is specified - this role is assumed whenever we read or write data in the catalog"
"* Creates a catalog named `polaris_catalog` that writes to a specified location in the Local Filesystem."
]
},
{
Expand All @@ -59,11 +61,9 @@
" host='http://polaris:8181/api/management/v1'))\n",
"root_client = PolarisDefaultApi(client)\n",
"\n",
"storage_conf = AwsStorageConfigInfo(storage_type=\"S3\",\n",
" allowed_locations=[\"s3://datalake-storage-team/polaris_test/\"],\n",
" role_arn=\"arn:aws:iam::631484165566:role/datalake-storage-integration-role\")\n",
"storage_conf = FileStorageConfigInfo(storage_type=\"FILE\", allowed_locations=[\"file:///tmp\"])\n",
"catalog_name = 'polaris_demo'\n",
"catalog = Catalog(name=catalog_name, type='INTERNAL', properties={\"default-base-location\": \"s3://datalake-storage-team/polaris_test/polaris_catalog\"},\n",
"catalog = Catalog(name=catalog_name, type='INTERNAL', properties={\"default-base-location\": \"file:///tmp/polaris/\"},\n",
" storage_config_info=storage_conf)\n",
"catalog.storage_config_info = storage_conf\n",
"root_client.create_catalog(create_catalog_request=CreateCatalogRequest(catalog=catalog))\n",
Expand Down Expand Up @@ -272,7 +272,7 @@
" .config(\"spark.sql.catalog.polaris.credential\", f\"{engineer_principal.credentials.client_id}:{engineer_principal.credentials.client_secret}\")\n",
"\n",
" # Set the warehouse to the name of the catalog we created\n",
" .config(\"spark.sql.catalog.polaris.warehouse\", 'polaris_demo')\n",
" .config(\"spark.sql.catalog.polaris.warehouse\", catalog_name)\n",
"\n",
" # Scope set to PRINCIPAL_ROLE:ALL\n",
" .config(\"spark.sql.catalog.polaris.scope\", 'PRINCIPAL_ROLE:ALL')\n",
Expand Down Expand Up @@ -454,7 +454,7 @@
" return codecs.decode(\"1F\", \"hex\").decode(\"UTF-8\").join(namespace)\n",
"\n",
"# Call loadTable\n",
"tbl_meta = collado_client.load_table(prefix='polaris_demo', namespace=format_namespace(['COLLADO_TEST', 'PUBLIC']), table='TEST_TABLE', x_iceberg_access_delegation='true')\n",
"tbl_meta = collado_client.load_table(prefix=catalog_name, namespace=format_namespace(['COLLADO_TEST', 'PUBLIC']), table='TEST_TABLE', x_iceberg_access_delegation='true')\n",
"display(JSON(tbl_meta.to_dict(), expanded=True))"
]
},
Expand Down Expand Up @@ -604,7 +604,7 @@
},
"outputs": [],
"source": [
"tbl_meta = pm_client.load_table(prefix='polaris_demo', namespace=format_namespace(['COLLADO_TEST', 'PUBLIC']), table='TEST_TABLE', x_iceberg_access_delegation='true')\n",
"tbl_meta = pm_client.load_table(prefix=catalog_name, namespace=format_namespace(['COLLADO_TEST', 'PUBLIC']), table='TEST_TABLE', x_iceberg_access_delegation='true')\n",
"display(JSON(tbl_meta.to_dict(), expanded=True))"
]
},
Expand Down Expand Up @@ -632,7 +632,7 @@
},
"outputs": [],
"source": [
"pm_client.drop_table(prefix='polaris_demo', namespace=format_namespace(['COLLADO_TEST', 'PUBLIC']), table='TEST_TABLE')"
"pm_client.drop_table(prefix=catalog_name, namespace=format_namespace(['COLLADO_TEST', 'PUBLIC']), table='TEST_TABLE')"
]
},
{
Expand Down Expand Up @@ -775,7 +775,7 @@
"# The ops_client fails to do any real damage even though the engineer normally has DROP_TABLE privileges\n",
"ops_client = IcebergCatalogAPI(CatalogApiClient(CatalogApiClientConfiguration(access_token=ops_token.access_token,\n",
" host='http://polaris:8181/api/catalog')))\n",
"ops_client.drop_table(prefix='polaris_demo', namespace=format_namespace(['COLLADO_TEST', 'PUBLIC']), table='TEST_TABLE')"
"ops_client.drop_table(prefix=catalog_name, namespace=format_namespace(['COLLADO_TEST', 'PUBLIC']), table='TEST_TABLE')"
]
}
],
Expand Down
2 changes: 1 addition & 1 deletion gradle/libs.versions.toml
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ mockito-core = { module = "org.mockito:mockito-core", version = "5.11.0" }
opentelemetry-bom = { module = "io.opentelemetry:opentelemetry-bom", version = "1.38.0" }
opentelemetry-semconv = { module = "io.opentelemetry.semconv:opentelemetry-semconv", version = "1.25.0-alpha" }
prometheus-metrics-exporter-servlet-jakarta = { module = "io.prometheus:prometheus-metrics-exporter-servlet-jakarta", version = "1.3.0" }
s3mock-testcontainers = { module = "com.adobe.testing:s3mock-testcontainers", version = "3.9.1" }
s3mock-testcontainers = { module = "com.adobe.testing:s3mock-testcontainers", version = "3.11.0" }
slf4j-api = { module = "org.slf4j:slf4j-api", version.ref = "slf4j" }
spotbugs-annotations = { module = "com.github.spotbugs:spotbugs-annotations", version = "4.8.5" }
swagger-annotations = { module = "io.swagger:swagger-annotations", version.ref = "swagger" }
Expand Down
3 changes: 3 additions & 0 deletions helm/polaris/Chart.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -29,3 +29,6 @@ sources:
keywords:
- polaris
- iceberg
maintainers:
- name: MonkeyCanCode
- name: adutra
Loading

0 comments on commit 008a329

Please sign in to comment.