From 5d4e653630e1234c91c0310391905dda69cbe11e Mon Sep 17 00:00:00 2001 From: Nicholas Chammas Date: Mon, 27 Nov 2023 00:26:55 -0500 Subject: [PATCH] Prepare Flintrock 2.1.0 release (#369) - Tweak the license file so GitHub recognizes it. - Fix a mistake in the manifest file so the change log is included as intended. - Update the default Amazon Linux 2 AMI. - Update and trim the main README a bit. - Adopt pyproject.toml. It is "strongly recommended" and commands like python setup.py sdist bdist_wheel are deprecated in favor of python -m build. - Trim outdated comments and pin of cryptography from setup.py. - Update testing code for setting up private VPC. --- .github/workflows/flintrock.yaml | 10 ++-- CHANGES.md | 10 +++- LICENSE | 7 ++- MANIFEST.in | 4 +- README.md | 78 ++++++++++---------------------- flintrock/__init__.py | 3 +- flintrock/config.yaml.template | 2 +- pyproject.toml | 5 ++ requirements/developer.pip | 1 - requirements/maintainer.in | 1 + requirements/maintainer.pip | 15 ++++-- requirements/user.in | 2 +- requirements/user.pip | 4 +- setup.cfg | 4 ++ setup.py | 13 ++---- test-infra/README.md | 8 +++- test-infra/bastion.tf | 10 ++-- test-infra/network.tf | 2 +- test-infra/provider.tf | 4 +- 19 files changed, 92 insertions(+), 91 deletions(-) create mode 100644 pyproject.toml diff --git a/.github/workflows/flintrock.yaml b/.github/workflows/flintrock.yaml index 6a39d90e..0f67351f 100644 --- a/.github/workflows/flintrock.yaml +++ b/.github/workflows/flintrock.yaml @@ -17,6 +17,8 @@ jobs: - ubuntu-20.04 - macos-11 python-version: + # Update the artifact upload steps below if modifying + # this list of Python versions. - "3.8" - "3.9" - "3.10" @@ -32,14 +34,16 @@ jobs: architecture: x64 - run: "pip install -r requirements/maintainer.pip" - run: "pytest" - - run: python setup.py sdist bdist_wheel + - run: python -m build - uses: actions/upload-artifact@v3 - if: ${{ matrix.python-version == '3.9' }} + # Use the latest supported Python to build a standalone package. + if: ${{ matrix.python-version == '3.12' }} with: name: Flintrock Standalone - ${{ matrix.os }} path: dist/Flintrock-*-standalone-*.zip - uses: actions/upload-artifact@v3 - if: ${{ matrix.os == 'ubuntu-20.04' && matrix.python-version == '3.9' }} + # Use the oldest supported Python to build a wheel. + if: ${{ matrix.os == 'ubuntu-20.04' && matrix.python-version == '3.8' }} with: name: Flintrock Wheel path: dist/Flintrock-*.whl diff --git a/CHANGES.md b/CHANGES.md index 46b34c2d..8d077696 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -2,7 +2,13 @@ ## [Unreleased] -[Unreleased]: https://github.com/nchammas/flintrock/compare/v2.0.0...master +[Unreleased]: https://github.com/nchammas/flintrock/compare/v2.1.0...master + +Nothing notable yet. + +## [2.1.0] - 2023-11-26 + +[2.1.0]: https://github.com/nchammas/flintrock/compare/v2.0.0...2.1.0 ### Changed @@ -10,12 +16,14 @@ * [#361]: Migrated from AdoptOpenJDK, which is deprecated, to Adoptium OpenJDK. * [#362], [#366]: Improved Flintrock's ability to cleanup after launch failures. * [#366]: Deprecated `--ec2-spot-request-duration`, which is not needed for one-time spot instances launched using the RunInstances API. +* [#369]: Adopted `pyproject.toml` and tweaked Flintrock's Python packaging accordingly. This keeps Flintrock in line with modern Python packaging standards and should be transparent to end-users. [#348]: https://github.com/nchammas/flintrock/pull/348 [#361]: https://github.com/nchammas/flintrock/pull/361 [#362]: https://github.com/nchammas/flintrock/pull/362 [#366]: https://github.com/nchammas/flintrock/pull/366 [#367]: https://github.com/nchammas/flintrock/pull/367 +[#369]: https://github.com/nchammas/flintrock/pull/369 ## [2.0.0] - 2021-06-10 diff --git a/LICENSE b/LICENSE index 82714d76..97900ac7 100644 --- a/LICENSE +++ b/LICENSE @@ -1,4 +1,3 @@ - Apache License Version 2.0, January 2004 http://www.apache.org/licenses/ @@ -179,7 +178,7 @@ APPENDIX: How to apply the Apache License to your work. To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "{}" + boilerplate notice, with the fields enclosed by brackets "[]" replaced with your own identifying information. (Don't include the brackets!) The text should be enclosed in the appropriate comment syntax for the file format. We also recommend that a @@ -187,7 +186,7 @@ same "printed page" as the copyright notice for easier identification within third-party archives. - Copyright {yyyy} {name of copyright owner} + Copyright 2024 Nicholas Chammas Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -199,4 +198,4 @@ distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and - limitations under the License. \ No newline at end of file + limitations under the License. diff --git a/MANIFEST.in b/MANIFEST.in index 98745d47..dd4ad5c7 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -1,8 +1,8 @@ -# See: https://docs.python.org/3/distutils/commandref.html +# See: https://setuptools.pypa.io/en/latest/userguide/miscellaneous.html graft flintrock include README.md -include CHANGELOG.md +include CHANGES.md include COPYRIGHT include LICENSE diff --git a/README.md b/README.md index b83a54f1..9a58e0ae 100644 --- a/README.md +++ b/README.md @@ -33,7 +33,7 @@ flintrock launch test-cluster \ --spark-version 3.5.0 \ --ec2-key-name key_name \ --ec2-identity-file /path/to/key.pem \ - --ec2-ami ami-0aeeebd8d2ab47354 \ + --ec2-ami ami-0588935a949f9ff17 \ --ec2-user ec2-user ``` @@ -123,10 +123,17 @@ without too much trouble, too. ### Release version -To get the latest release of Flintrock, simply run [pip](https://pip.pypa.io/en/stable/): +To get the latest release of Flintrock, simply install it with [pip][pip]. + +Since Flintrock is a command-line application rather than a library, you may prefer to +install it using [pipx][pipx], which automatically takes care of installing Flintrock to +an isolated virtual environment for you. + +[pip]: https://pip.pypa.io/en/stable/ +[pipx]: https://pypa.github.io/pipx/ ``` -pip3 install flintrock +pipx install flintrock ``` This will install Flintrock and place it on your path. You should be good to go now! @@ -140,27 +147,14 @@ flintrock configure ### Standalone version (Python not required!) -If you don't have a recent enough version of Python, or if you don't have Python installed at all, -you can still use Flintrock. We publish standalone packages of Flintrock on GitHub with our -[releases](https://github.com/nchammas/flintrock/releases). - -Find the standalone package for your OS under our [latest release](https://github.com/nchammas/flintrock/releases/latest), -unzip it to a location of your choice, and run the `flintrock` executable inside. - -For example: - -```sh -flintrock_version="2.0.0" - -curl --location --remote-name "https://github.com/nchammas/flintrock/releases/download/v$flintrock_version/Flintrock-$flintrock_version-standalone-macOS-x86_64.zip" -unzip -q -d flintrock "Flintrock-$flintrock_version-standalone-macOS-x86_64.zip" -cd flintrock/ +We used to publish standalone versions of Flintrock that don't require you to have Python +installed on your machine. Since Flintrock 2.1.0, we have stopped publishing these +standalone builds. -# You're good to go! -./flintrock --help -``` +If you used these standalone packages, please [chime in on this issue][standalone] and +share a bit about your environment and use case. -You'll probably want to add the location of the Flintrock executable to your `PATH` so that you can invoke it from any directory. +[standalone]: https://github.com/nchammas/flintrock/issues/370 ### Community-supported distributions @@ -175,7 +169,7 @@ These packages are not supported by the core contributors and **may be out of da If you like living on the edge, install the development version of Flintrock: ```sh -pip3 install git+https://github.com/nchammas/flintrock +pipx install git+https://github.com/nchammas/flintrock ``` If you want to [contribute](https://github.com/nchammas/flintrock/blob/master/CONTRIBUTING.md), follow the instructions in our contributing guide on [how to install Flintrock](https://github.com/nchammas/flintrock/blob/master/CONTRIBUTING.md#contributing-code). @@ -203,17 +197,17 @@ There are some things that Flintrock specifically *does not* support. Flintrock is not for managing long-lived clusters, or any infrastructure that serves as a permanent part of some environment. - For starters, Flintrock provides no guarantee that clusters launched with one version of Flintrock can be managed by another version of Flintrock, and no considerations are made for any long-term use cases. +For starters, Flintrock provides no guarantee that clusters launched with one version of Flintrock can be managed by another version of Flintrock, and no considerations are made for any long-term use cases. - If you are looking for ways to manage permanent infrastructure, look at tools like [Terraform](https://www.terraform.io/), [Ansible](http://www.ansible.com/), [SaltStack](http://saltstack.com/), or [Ubuntu Juju](http://www.ubuntu.com/cloud/tools/juju). You might also find a service like [Databricks](https://databricks.com/product/databricks) useful if you're looking for someone else to host and manage Spark for you. Amazon also offers [Spark on EMR](https://aws.amazon.com/elasticmapreduce/details/spark/). +If you are looking for ways to manage permanent infrastructure, look at tools like [Terraform](https://www.terraform.io/), [Ansible](http://www.ansible.com/), or [Ubuntu Juju](http://www.ubuntu.com/cloud/tools/juju). You might also find a service like [Databricks](https://databricks.com/product/databricks) useful if you're looking for someone else to host and manage Spark for you. Amazon also offers [Spark on EMR](https://aws.amazon.com/elasticmapreduce/details/spark/). ### Launching non-Spark-related services -Flintrock is meant for launching Spark clusters that include closely related services like HDFS, Mesos, and YARN. +Flintrock is meant for launching Spark clusters that include closely related services like HDFS. - Flintrock is not for launching external datasources (e.g. Cassandra), or other services that are not closely integrated with Spark (e.g. Tez). +Flintrock is not for launching external datasources (e.g. Cassandra), or other services that are not closely integrated with Spark (e.g. Tez). - If you are looking for an easy way to launch other services from the Hadoop ecosystem, look at the [Apache Bigtop](http://bigtop.apache.org/) project. +If you are looking for an easy way to launch other services from the Hadoop ecosystem, look at the [Apache Bigtop](http://bigtop.apache.org/) project. ### Launching out-of-date services @@ -263,7 +257,7 @@ providers: identity-file: /path/to/.ssh/key.pem instance-type: m5.large region: us-east-1 - ami: ami-0aeeebd8d2ab47354 + ami: ami-0588935a949f9ff17 user: ec2-user ``` @@ -283,29 +277,7 @@ flintrock launch test-cluster \ ### Fast Launches -Flintrock is really fast. This is how quickly it can launch fully operational clusters on EC2 compared to [spark-ec2](https://github.com/amplab/spark-ec2). - -#### Setup - -* Provider: EC2 -* Instance type: `m3.large` -* AMI: - * Flintrock: [Default Amazon Linux AMI](https://aws.amazon.com/amazon-linux-ami/) - * spark-ec2: [Custom spark-ec2 AMI](https://github.com/amplab/spark-ec2/tree/a990752575cd8b0ab25731d7820a55c714798ec3/ami-list) -* Spark/Hadoop download source: S3 -* Launch time: Best of 6 tries - -#### Results - -| Cluster Size | Flintrock Launch Time | spark-ec2 Launch Time | -|---------------|----------------------:|------------------------:| -| 1 slave | 2m 06s | 8m 44s | -| 50 slaves | 2m 30s | 37m 30s | -| 100 slaves | 2m 42s | 1h 06m 05s | - -The spark-ec2 launch times are sourced from [SPARK-5189](https://issues.apache.org/jira/browse/SPARK-5189). - -Note that AWS performance is highly variable, so you will not get these results consistently. They show the best case scenario for each tool, and not the typical case. For Flintrock, the typical launch time will be a minute or two longer. +Flintrock is really fast. It can launch a 100-node cluster in about three minutes (give or take a few seconds due to AWS's normal performance variability). ### Advanced Storage Setup @@ -330,7 +302,7 @@ Flintrock is built and tested against vanilla Amazon Linux and CentOS. You can e Supporting multiple versions of anything is tough. There's more surface area to cover for testing, and over the long term the maintenance burden of supporting something non-current with bug fixes and workarounds really adds up. -There are projects that support stuff across a wide cut of language or API versions. For example, Spark supports Java 7 and 8, and Python 2.6+ and 3+. The people behind these projects are gods. They take on an immense maintenance burden for the benefit and convenience of their users. +There are projects that support stuff across a wide cut of language or API versions. For example, Spark supports multiple versions of Java, Scala, R, and Python. The people behind these projects are gods. They take on an immense maintenance burden for the benefit and convenience of their users. We here at project Flintrock are much more modest in our abilities. We are best able to serve the project over the long term when we limit ourselves to supporting a small but widely applicable set of configurations. diff --git a/flintrock/__init__.py b/flintrock/__init__.py index f5bf9ff5..a33997dd 100644 --- a/flintrock/__init__.py +++ b/flintrock/__init__.py @@ -1,2 +1 @@ -# See: https://packaging.python.org/en/latest/distributing/#standards-compliance-for-interoperability -__version__ = '2.1.0.dev0' +__version__ = '2.1.0' diff --git a/flintrock/config.yaml.template b/flintrock/config.yaml.template index 872649a8..f040c070 100644 --- a/flintrock/config.yaml.template +++ b/flintrock/config.yaml.template @@ -30,7 +30,7 @@ providers: instance-type: m5.large region: us-east-1 # availability-zone: - ami: ami-0cabc39acf991f4f1 # Amazon Linux 2, us-east-1 + ami: ami-0588935a949f9ff17 # Amazon Linux 2, us-east-1 user: ec2-user # ami: ami-61bbf104 # CentOS 7, us-east-1 # user: centos diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 00000000..a9639d92 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,5 @@ +# Minimal pyproject file per: https://packaging.python.org/en/latest/guides/modernize-setup-py-project/ +[build-system] +# Minimum setuptools version that supports version in setup.cfg per: https://packaging.python.org/en/latest/guides/single-sourcing-package-version/ +requires = ["setuptools >= 46.4.0"] +build-backend = "setuptools.build_meta" diff --git a/requirements/developer.pip b/requirements/developer.pip index 3e84878f..2198fc11 100644 --- a/requirements/developer.pip +++ b/requirements/developer.pip @@ -36,7 +36,6 @@ coverage[toml]==7.3.2 cryptography==41.0.5 # via # -r requirements/user.pip - # flintrock # paramiko exceptiongroup==1.2.0 # via pytest diff --git a/requirements/maintainer.in b/requirements/maintainer.in index 2aafa5e8..b72956bd 100644 --- a/requirements/maintainer.in +++ b/requirements/maintainer.in @@ -2,3 +2,4 @@ wheel >= 0.31.0 twine == 4.0.2 PyInstaller == 6.2.0 +build >= 1.0.3, < 2.0.0 diff --git a/requirements/maintainer.pip b/requirements/maintainer.pip index dbc4d40c..d122e57e 100644 --- a/requirements/maintainer.pip +++ b/requirements/maintainer.pip @@ -24,6 +24,8 @@ botocore==1.32.4 # boto3 # flintrock # s3transfer +build==1.0.3 + # via -r requirements/maintainer.in certifi==2023.11.17 # via requests cffi==1.16.0 @@ -45,7 +47,6 @@ coverage[toml]==7.3.2 cryptography==41.0.5 # via # -r requirements/developer.pip - # flintrock # paramiko docutils==0.20.1 # via readme-renderer @@ -55,10 +56,11 @@ exceptiongroup==1.2.0 # pytest flake8==6.1.0 # via -r requirements/developer.pip -idna==3.4 +idna==3.6 # via requests importlib-metadata==6.8.0 # via + # build # keyring # pyinstaller # twine @@ -94,6 +96,7 @@ nh3==0.2.14 packaging==23.2 # via # -r requirements/developer.pip + # build # pyinstaller # pytest paramiko==3.3.1 @@ -118,7 +121,7 @@ pyflakes==3.1.0 # via # -r requirements/developer.pip # flake8 -pygments==2.17.1 +pygments==2.17.2 # via # readme-renderer # rich @@ -130,6 +133,8 @@ pynacl==1.5.0 # via # -r requirements/developer.pip # paramiko +pyproject-hooks==1.0.0 + # via build pytest==7.4.3 # via # -r requirements/developer.pip @@ -167,7 +172,9 @@ six==1.16.0 tomli==2.0.1 # via # -r requirements/developer.pip + # build # coverage + # pyproject-hooks # pytest twine==4.0.2 # via -r requirements/maintainer.in @@ -179,7 +186,7 @@ urllib3==1.26.18 # botocore # requests # twine -wheel==0.41.3 +wheel==0.42.0 # via -r requirements/maintainer.in zipp==3.17.0 # via diff --git a/requirements/user.in b/requirements/user.in index 67cf51c5..de13e5f3 100644 --- a/requirements/user.in +++ b/requirements/user.in @@ -6,4 +6,4 @@ # See: https://caremad.io/2013/07/setup-vs-requirement/ # - The #egg= syntax is a workaround for pip-tools. # See: https://github.com/jazzband/pip-tools/issues/204#issuecomment-550051424 --e file:.#egg=Flintrock +--editable file:.#egg=Flintrock diff --git a/requirements/user.pip b/requirements/user.pip index 19e2d1f3..c2cfee5a 100644 --- a/requirements/user.pip +++ b/requirements/user.pip @@ -22,9 +22,7 @@ cffi==1.16.0 click==8.1.7 # via flintrock cryptography==41.0.5 - # via - # flintrock - # paramiko + # via paramiko jmespath==1.0.1 # via # boto3 diff --git a/setup.cfg b/setup.cfg index 7bb8b2c4..a20f9c05 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,3 +1,7 @@ +# See: https://packaging.python.org/en/latest/guides/single-sourcing-package-version/ +[metadata] +version = attr: flintrock.__version__ + [tool:pytest] norecursedirs = venv addopts = diff --git a/setup.py b/setup.py index 7e2ee6a9..e95a10ed 100644 --- a/setup.py +++ b/setup.py @@ -1,5 +1,5 @@ import setuptools -from flintrock import __version__ +# from flintrock import __version__ with open('README.md') as f: @@ -7,10 +7,12 @@ setuptools.setup( name='Flintrock', - version=__version__, + # Moved to setup.cfg to avoid import of flintrock during installation of + # flintrock. This used to work, but becomes a problem with isolated builds + # and new pip behavior triggered by pyproject.toml. + # version=__version__, description='A command-line tool for launching Apache Spark clusters.', long_description=long_description, - # FYI: This option requires setuptools >= 38.6.0. long_description_content_type="text/markdown", url='https://github.com/nchammas/flintrock', author='Nicholas Chammas', @@ -52,11 +54,6 @@ 'click == 8.1.7', 'paramiko == 3.3.1', 'PyYAML == 6.0.1', - # This is to address reports that PyInstaller-packaged versions - # of Flintrock intermittently fail due to an out-of-date version - # of Cryptography being used. - # See: https://github.com/nchammas/flintrock/issues/169 - 'cryptography >= 1.7.2', ], entry_points={ diff --git a/test-infra/README.md b/test-infra/README.md index dfa5cfdc..33733e05 100644 --- a/test-infra/README.md +++ b/test-infra/README.md @@ -4,9 +4,12 @@ The Terraform templates in this directory manage private VPC infrastructure that ## Set Up -To spin up a private VPC along with associated infrastructure like a NAT gateway: +After [installing Terraform][install], you can spin up a private VPC along with associated infrastructure like a NAT gateway: -``` +[install]: https://developer.hashicorp.com/terraform/install + +```sh +terraform init terraform apply ``` @@ -36,6 +39,7 @@ All you need to do is pick a version of Flintrock to install and then you can be pip install https://github.com/nchammas/flintrock/archive/master.zip flintrock launch test-cluster flintrock login test-cluster +flintrock destroy test-cluster ``` ## Tear Down diff --git a/test-infra/bastion.tf b/test-infra/bastion.tf index 9221f455..b040cca9 100644 --- a/test-infra/bastion.tf +++ b/test-infra/bastion.tf @@ -10,7 +10,7 @@ resource "aws_security_group" "ssh" { from_port = 22 to_port = 22 protocol = "tcp" - cidr_blocks = ["${chomp(data.http.myip.body)}/32"] + cidr_blocks = ["${chomp(data.http.myip.response_body)}/32"] } egress { @@ -74,8 +74,12 @@ resource "aws_instance" "bastion" { provisioner "remote-exec" { inline = [ - "sudo yum install -y python3", - "python3 -m venv /home/ec2-user/venv", + "sudo yum install -y git", + "sudo yum install -y gcc make patch zlib-devel bzip2 bzip2-devel readline-devel sqlite sqlite-devel openssl11-devel tk-devel libffi-devel xz-devel", + "curl https://pyenv.run | bash", + ".pyenv/bin/pyenv install 3.8", + ".pyenv/bin/pyenv global 3.8", + ".pyenv/bin/pyenv exec python -m venv /home/ec2-user/venv", "/home/ec2-user/venv/bin/pip install PyYAML", ] } diff --git a/test-infra/network.tf b/test-infra/network.tf index 4e0961c0..e72c1a44 100644 --- a/test-infra/network.tf +++ b/test-infra/network.tf @@ -30,7 +30,7 @@ resource "aws_subnet" "private" { } resource "aws_eip" "nat" { - vpc = true + domain = "vpc" tags = { Name = "flintrock-test-nat-ip" diff --git a/test-infra/provider.tf b/test-infra/provider.tf index 0f011851..ee64fd55 100644 --- a/test-infra/provider.tf +++ b/test-infra/provider.tf @@ -2,11 +2,11 @@ terraform { required_providers { aws = { source = "hashicorp/aws" - version = "~> 2" + version = "~> 5" } http = { source = "hashicorp/http" - version = "~> 1" + version = "~> 3" } } }