diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml new file mode 100644 index 000000000..ba1db4e50 --- /dev/null +++ b/.github/workflows/tests.yml @@ -0,0 +1,40 @@ + +name: tests + +on: + push: + branches: [master] + pull_request: + schedule: + # Run every Sunday + - cron: "0 0 * * 0" + +jobs: + build: + name: ${{ matrix.os }}, Python ${{ matrix.python-version }} + runs-on: ${{ matrix.os }} + strategy: + matrix: + os: [ubuntu-latest, macos-latest] + python-version: [3.7, 3.8, 3.9, "3.10"] + + steps: + - uses: actions/checkout@v2 + + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v2 + with: + python-version: ${{ matrix.python-version }} + + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install -r dev-requirements.txt + + - name: Lint + run: | + make lint + + - name: Run tests + run: | + pytest -vvv \ No newline at end of file diff --git a/.gitignore b/.gitignore index eb9b79f8e..d0ee4d06f 100644 --- a/.gitignore +++ b/.gitignore @@ -6,4 +6,11 @@ docs/site/ # test cache .cache/* tests/__pycache__/* -*.pytest_cache/ \ No newline at end of file +*.pytest_cache/ +*.pyc +manual_test/ + +# other local dev info +.vscode/ +cookiecutter_data_science.egg-info/ + diff --git a/LICENSE b/LICENSE index 3981d8a60..a3de0161a 100644 --- a/LICENSE +++ b/LICENSE @@ -5,4 +5,4 @@ Permission is hereby granted, free of charge, to any person obtaining a copy of The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. \ No newline at end of file diff --git a/Makefile b/Makefile new file mode 100644 index 000000000..50bc58ed6 --- /dev/null +++ b/Makefile @@ -0,0 +1,60 @@ +## GLOBALS + +PROJECT_NAME = cookiecutter-data-science +PYTHON_VERSION = 3.10 +PYTHON_INTERPRETER = python + + +### UTILITIES +_prep: + rm -f **/*/.DS_store + + +### DEV COMMANDS + +## Set up python interpreter environment +create_environment: + conda create --name $(PROJECT_NAME) python=$(PYTHON_VERSION) -y + @echo ">>> conda env created. Activate with:\nconda activate $(PROJECT_NAME)" + +## Install Python Dependencies +requirements: + $(PYTHON_INTERPRETER) -m pip install -r dev-requirements.txt + +## Format the code using isort and black +format: + isort ccds hooks tests + black ccds hooks tests setup.py + +## Lint using flake8 + black +lint: + flake8 ccds hooks tests setup.py + black --check ccds hooks tests setup.py + + +### DOCS + +docs-serve: + cd docs && mkdocs serve + +### TESTS + +test: _prep + pytest -vvv + +test-fastest: _prep + pytest -vvv -FFF + +test-debug-last: + pytest --lf --pdb + +_clean_manual_test: + rm -rf manual_test + +manual-test: _prep _clean_manual_test + mkdir -p manual_test + cd manual_test && python -m ccds .. + +manual-test-debug: _prep _clean_manual_test + mkdir -p manual_test + cd manual_test && python -m pdb ../ccds/__main__.py .. diff --git a/README.md b/README.md index 49a1eb384..547074c7a 100644 --- a/README.md +++ b/README.md @@ -8,7 +8,7 @@ _A logical, reasonably standardized, but flexible project structure for doing an ### Requirements to use the cookiecutter template: ----------- - - Python 2.7 or 3.5+ + - Python 3.7+ - [Cookiecutter Python package](http://cookiecutter.readthedocs.org/en/latest/installation.html) >= 1.4.0: This can be installed with pip by or conda depending on how you manage your Python packages: ``` bash @@ -44,7 +44,7 @@ which is available now. ### The resulting directory structure ------------ -The directory structure of your new project looks like this: +The directory structure of your new project looks like this: ``` ├── LICENSE @@ -64,6 +64,8 @@ The directory structure of your new project looks like this: │ the creator's initials, and a short `-` delimited description, e.g. │ `1.0-jqp-initial-data-exploration`. │ +├── pyproject.toml <- Project configuration file with settings for running black; see setuptools.readthedocs.io +│ ├── references <- Data dictionaries, manuals, and all other explanatory materials. │ ├── reports <- Generated analysis as HTML, PDF, LaTeX, etc. @@ -72,9 +74,13 @@ The directory structure of your new project looks like this: ├── requirements.txt <- The requirements file for reproducing the analysis environment, e.g. │ generated with `pip freeze > requirements.txt` │ +├── setup.cfg <- Configuration file for flake8 and pep8 +│ ├── setup.py <- makes project pip installable (pip install -e .) so src can be imported -├── src <- Source code for use in this project. -│ ├── __init__.py <- Makes src a Python module +│ +├── {{ cookiecutter.module_name }} <- Source code for use in this project. +│ │ +│ ├── __init__.py <- Makes {{ cookiecutter.module_name }} a Python module │ │ │ ├── data <- Scripts to download or generate data │ │ └── make_dataset.py @@ -89,19 +95,24 @@ The directory structure of your new project looks like this: │ │ │ └── visualization <- Scripts to create exploratory and results oriented visualizations │ └── visualize.py -│ -└── tox.ini <- tox file with settings for running tox; see tox.readthedocs.io + ``` ## Contributing We welcome contributions! [See the docs for guidelines](https://drivendata.github.io/cookiecutter-data-science/#contributing). -### Installing development requirements +### Installing requirements ------------ pip install -r requirements.txt + +### Installing development requirements +------------ + + pip install -r dev-requirements.txt + ### Running the tests ------------ diff --git a/ccds.json b/ccds.json new file mode 100644 index 000000000..a79815a3a --- /dev/null +++ b/ccds.json @@ -0,0 +1,57 @@ +{ + "project_name": "project_name", + "repo_name": "{{ cookiecutter.project_name.lower().replace(' ', '_') }}", + "module_name": "{{ cookiecutter.project_name.lower().replace(' ', '_').replace('-', '_') }}", + "author_name": "Your name (or your organization/company/team)", + "description": "A short description of the project.", + "python_version_number": "3.10", + "dataset_storage": [ + { + "none": "none" + }, + { + "azure": { + "container": "container-name" + } + }, + { + "s3": { + "bucket": "bucket-name", + "aws_profile": "default" + } + }, + { + "gcs": { + "bucket": "bucket-name" + } + } + ], + "environment_manager": [ + "virtualenv", + "conda", + "pipenv", + "none" + ], + "dependency_file": [ + "requirements.txt", + "environment.yml", + "Pipfile" + ], + "pydata_packages": [ + "none", + "basic" + ], + "ethics_checklist": [ + "yes", + "no" + ], + "nbautoexport": [ + "yes", + "no" + ], + "open_source_license": [ + "MIT", + "BSD-3-Clause", + "No license file" + ] +} \ No newline at end of file diff --git a/{{ cookiecutter.repo_name }}/src/__init__.py b/ccds/__init__.py similarity index 100% rename from {{ cookiecutter.repo_name }}/src/__init__.py rename to ccds/__init__.py diff --git a/ccds/__main__.py b/ccds/__main__.py new file mode 100644 index 000000000..137532d70 --- /dev/null +++ b/ccds/__main__.py @@ -0,0 +1,30 @@ +# Monkey-patch jinja to allow variables to not exist, which happens with sub-options +import jinja2 + +jinja2.StrictUndefined = jinja2.Undefined + + +# Monkey-patch cookiecutter to allow sub-items +from cookiecutter import prompt + +from ccds.monkey_patch import prompt_for_config + +prompt.prompt_for_config = prompt_for_config + + +# monkey-patch context to point to ccds.json +from cookiecutter import generate + +from ccds.monkey_patch import generate_context_wrapper + +generate.generate_context = generate_context_wrapper + +# for use in tests need monkey-patched api main +from cookiecutter import cli +from cookiecutter import main as api_main # noqa: F401 referenced by tests + +main = cli.main + + +if __name__ == "__main__": + main() diff --git a/{{ cookiecutter.repo_name }}/src/data/__init__.py b/ccds/hook_utils/__init__.py similarity index 100% rename from {{ cookiecutter.repo_name }}/src/data/__init__.py rename to ccds/hook_utils/__init__.py diff --git a/ccds/hook_utils/custom_config.py b/ccds/hook_utils/custom_config.py new file mode 100644 index 000000000..03b2bcb06 --- /dev/null +++ b/ccds/hook_utils/custom_config.py @@ -0,0 +1,51 @@ +from distutils.dir_util import copy_tree +from pathlib import Path +from tempfile import TemporaryDirectory +from urllib.request import urlretrieve +from zipfile import ZipFile + +from cookiecutter.vcs import clone + + +def write_custom_config(user_input_config): + if not user_input_config: + return + + tmp = TemporaryDirectory() + tmp_zip = None + + print(user_input_config) + + # if not absolute, test if local path relative to parent of created directory + if not user_input_config.startswith("/"): + test_path = Path("..") / user_input_config + else: + test_path = Path(user_input_config) + + # check if user passed a local path + if test_path.exists() and test_path.is_dir(): + local_path = test_path + + elif test_path.exists() and test_path.endswith(".zip"): + tmp_zip = test_path + + # check if user passed a url to a zip + elif user_input_config.startswith("http") and ( + user_input_config.split(".")[-1] in ["zip"] + ): + tmp_zip, _ = urlretrieve(user_input_config) + + # assume it is a VCS uri and try to clone + else: + clone(user_input_config, clone_to_dir=tmp) + local_path = tmp + + if tmp_zip: + with ZipFile(tmp_zip, "r") as zipf: + zipf.extractall(tmp) + local_path = tmp + + # write whatever the user supplied into the project + copy_tree(local_path, ".") + + tmp.cleanup() diff --git a/ccds/hook_utils/dependencies.py b/ccds/hook_utils/dependencies.py new file mode 100644 index 000000000..7faa58b3a --- /dev/null +++ b/ccds/hook_utils/dependencies.py @@ -0,0 +1,38 @@ +def write_dependencies( + dependencies, packages, pip_only_packages, repo_name, module_name, python_version +): + if dependencies == "requirements.txt": + with open(dependencies, "w") as f: + lines = sorted(packages) + + lines += ["" "-e ."] + + f.write("\n".join(lines)) + f.write("\n") + + elif dependencies == "environment.yml": + with open(dependencies, "w") as f: + lines = [f"name: { repo_name }", "dependencies:"] + + lines += [f" - {p}" for p in packages if p not in pip_only_packages] + + lines += [" - pip:"] + [ + f" - {p}" for p in packages if p in pip_only_packages + ] + + lines += [" - -e ."] + + lines += [f" - python={python_version}"] + + f.write("\n".join(lines)) + + elif dependencies == "Pipfile": + with open(dependencies, "w") as f: + lines = ["[packages]"] + lines += [f'{p} = "*"' for p in sorted(packages)] + + lines += [f'"{ module_name }" ={{editable = true, path = "."}}'] + + lines += ["", "[requires]", f'python_version = "{ python_version }"'] + + f.write("\n".join(lines)) diff --git a/ccds/monkey_patch.py b/ccds/monkey_patch.py new file mode 100644 index 000000000..022c92c53 --- /dev/null +++ b/ccds/monkey_patch.py @@ -0,0 +1,131 @@ +from collections import OrderedDict +from pathlib import Path + +from cookiecutter.environment import StrictEnvironment +from cookiecutter.exceptions import UndefinedVariableInTemplate +from cookiecutter.generate import generate_context +from cookiecutter.prompt import ( + prompt_choice_for_config, + read_user_choice, + read_user_variable, + render_variable, +) +from jinja2.exceptions import UndefinedError + + +def _prompt_choice_and_subitems(cookiecutter_dict, env, key, options, no_input): + result = {} + + # first, get the selection + rendered_options = [ + render_variable(env, list(raw.keys())[0], cookiecutter_dict) for raw in options + ] + + if no_input: + selected = rendered_options[0] + else: + selected = read_user_choice(key, rendered_options) + + selected_item = [ + list(c.values())[0] for c in options if list(c.keys())[0] == selected + ][0] + + result[selected] = {} + + # then, fill in the sub values for that item + if isinstance(selected_item, dict): + for subkey, raw in selected_item.items(): + # We are dealing with a regular variable + val = render_variable(env, raw, cookiecutter_dict) + + if not no_input: + val = read_user_variable(subkey, val) + + result[selected][subkey] = val + elif isinstance(selected_item, list): + val = prompt_choice_for_config( + cookiecutter_dict, env, selected, selected_item, no_input + ) + result[selected] = val + elif isinstance(selected_item, str): + result[selected] = selected_item + + return result + + +def prompt_for_config(context, no_input=False): + """ + Prompts the user to enter new config, using context as a source for the + field names and sample values. + :param no_input: Prompt the user at command line for manual configuration? + """ + cookiecutter_dict = OrderedDict([]) + env = StrictEnvironment(context=context) + + # First pass: Handle simple and raw variables, plus choices. + # These must be done first because the dictionaries keys and + # values might refer to them. + for key, raw in context["cookiecutter"].items(): + if key.startswith("_"): + cookiecutter_dict[key] = raw + continue + + try: + if isinstance(raw, list): + if isinstance(raw[0], dict): + val = _prompt_choice_and_subitems( + cookiecutter_dict, env, key, raw, no_input + ) + cookiecutter_dict[key] = val + else: + # We are dealing with a choice variable + val = prompt_choice_for_config( + cookiecutter_dict, env, key, raw, no_input + ) + cookiecutter_dict[key] = val + elif not isinstance(raw, dict): + # We are dealing with a regular variable + val = render_variable(env, raw, cookiecutter_dict) + + if not no_input: + val = read_user_variable(key, val) + + cookiecutter_dict[key] = val + except UndefinedError as err: + msg = "Unable to render variable '{}'".format(key) + raise UndefinedVariableInTemplate(msg, err, context) + + # Second pass; handle the dictionaries. + for key, raw in context["cookiecutter"].items(): + + try: + if isinstance(raw, dict): + # We are dealing with a dict variable + val = render_variable(env, raw, cookiecutter_dict) + + if not no_input: + val = read_user_dict( # noqa: F821 referencable in patched context + key, val + ) + + cookiecutter_dict[key] = val + except UndefinedError as err: + msg = "Unable to render variable '{}'".format(key) + raise UndefinedVariableInTemplate(msg, err, context) + + return cookiecutter_dict + + +def generate_context_wrapper(*args, **kwargs): + """Hardcoded in cookiecutter, so we override: + https://github.com/cookiecutter/cookiecutter/blob/2bd62c67ec3e52b8e537d5346fd96ebd82803efe/cookiecutter/main.py#L85 + """ + # replace full path to cookiecutter.json with full path to ccds.json + kwargs["context_file"] = str(Path(kwargs["context_file"]).with_name("ccds.json")) + + parsed_context = generate_context(*args, **kwargs) + + # replace key + parsed_context["cookiecutter"] = parsed_context["ccds"] + del parsed_context["ccds"] + return parsed_context diff --git a/cookiecutter.json b/cookiecutter.json index 161f59e46..9e1a294af 100644 --- a/cookiecutter.json +++ b/cookiecutter.json @@ -1,10 +1,3 @@ { - "project_name": "project_name", - "repo_name": "{{ cookiecutter.project_name.lower().replace(' ', '_') }}", - "author_name": "Your name (or your organization/company/team)", - "description": "A short description of the project.", - "open_source_license": ["MIT", "BSD-3-Clause", "No license file"], - "s3_bucket": "[OPTIONAL] your-bucket-for-syncing-data (do not include 's3://')", - "aws_profile": "default", - "python_interpreter": ["python3", "python"] + "DEPRECATED": "Use of the `cookiecutter` command is deprecated. Please use `ccds` in place of `cookiecutter`. To continue using the deprecated template, use `cookiecutter ... -c v1`." } diff --git a/dev-requirements.txt b/dev-requirements.txt new file mode 100644 index 000000000..f0898e681 --- /dev/null +++ b/dev-requirements.txt @@ -0,0 +1,13 @@ +-r requirements.txt +-e . + +black +chardet +flake8 +isort +mkdocs +mkdocs-cinder +pipenv +pytest +virtualenvwrapper; sys_platform != 'win32' +virtualenvwrapper-win; sys_platform == 'win32' \ No newline at end of file diff --git a/docs/docs/index.md b/docs/docs/index.md index 1733e83dc..100b076f6 100644 --- a/docs/docs/index.md +++ b/docs/docs/index.md @@ -54,11 +54,11 @@ Disagree with a couple of the default folder names? Working on a project that's ## Getting started -With this in mind, we've created a data science cookiecutter template for projects in Python. Your analysis doesn't have to be in Python, but the template does provide some Python boilerplate that you'd want to remove (in the `src` folder for example, and the Sphinx documentation skeleton in `docs`). +With this in mind, we've created a data science cookiecutter template for projects in Python. Your analysis doesn't have to be in Python, but the template does provide some Python boilerplate that you'd want to remove (in the `{{ cookiecutter.module_name }}` folder for example, and the Sphinx documentation skeleton in `docs`). ### Requirements - - Python 2.7 or 3.5 + - Python >= 3.7 - [cookiecutter Python package](http://cookiecutter.readthedocs.org/en/latest/installation.html) >= 1.4.0: `pip install cookiecutter` @@ -103,8 +103,8 @@ cookiecutter https://github.com/drivendata/cookiecutter-data-science │ generated with `pip freeze > requirements.txt` │ ├── setup.py <- Make this project pip installable with `pip install -e` -├── src <- Source code for use in this project. -│   ├── __init__.py <- Makes src a Python module +├── {{ cookiecutter.module_name }} <- Source code for use in this project. +│   ├── __init__.py <- Makes {{ cookiecutter.module_name }} a Python module │ │ │   ├── data <- Scripts to download or generate data │   │   └── make_dataset.py @@ -129,7 +129,7 @@ There are some opinions implicit in the project structure that have grown out of ### Data is immutable -Don't ever edit your raw data, especially not manually, and especially not in Excel. Don't overwrite your raw data. Don't save multiple versions of the raw data. Treat the data (and its format) as immutable. The code you write should move the raw data through a pipeline to your final analysis. You shouldn't have to run all of the steps every time you want to make a new figure (see [Analysis is a DAG](#analysis-is-a-dag)), but anyone should be able to reproduce the final products with only the code in `src` and the data in `data/raw`. +Don't ever edit your raw data, especially not manually, and especially not in Excel. Don't overwrite your raw data. Don't save multiple versions of the raw data. Treat the data (and its format) as immutable. The code you write should move the raw data through a pipeline to your final analysis. You shouldn't have to run all of the steps every time you want to make a new figure (see [Analysis is a DAG](#analysis-is-a-dag)), but anyone should be able to reproduce the final products with only the code in `{{ cookiecutter.module_name }}` and the data in `data/raw`. Also, if data is immutable, it doesn't need source control in the same way that code does. Therefore, ***by default, the data folder is included in the `.gitignore` file.*** If you have a small amount of data that rarely changes, you may want to include the data in the repository. Github currently warns if files are over 50MB and rejects files over 100MB. Some other options for storing/syncing large data include [AWS S3](https://aws.amazon.com/s3/) with a syncing tool (e.g., [`s3cmd`](http://s3tools.org/s3cmd)), [Git Large File Storage](https://git-lfs.github.com/), [Git Annex](https://git-annex.branchable.com/), and [dat](http://dat-data.com/). Currently by default, we ask for an S3 bucket and use [AWS CLI](http://docs.aws.amazon.com/cli/latest/reference/s3/index.html) to sync data in the `data` folder with the server. @@ -141,7 +141,7 @@ Since notebooks are challenging objects for source control (e.g., diffs of the ` 1. Follow a naming convention that shows the owner and the order the analysis was done in. We use the format `--.ipynb` (e.g., `0.3-bull-visualize-distributions.ipynb`). - 2. Refactor the good parts. Don't write code to do the same task in multiple notebooks. If it's a data preprocessing task, put it in the pipeline at `src/data/make_dataset.py` and load data from `data/interim`. If it's useful utility code, refactor it to `src`. + 2. Refactor the good parts. Don't write code to do the same task in multiple notebooks. If it's a data preprocessing task, put it in the pipeline at `{{ cookiecutter.module_name }}/data/make_dataset.py` and load data from `data/interim`. If it's useful utility code, refactor it to `{{ cookiecutter.module_name }}`. Now by default we turn the project into a Python package (see the `setup.py` file). You can import your code and use it in notebooks with a cell like the following: @@ -149,10 +149,10 @@ Since notebooks are challenging objects for source control (e.g., diffs of the ` # OPTIONAL: Load the "autoreload" extension so that code can change %load_ext autoreload -# OPTIONAL: always reload modules so that as you change code in src, it gets loaded +# OPTIONAL: always reload modules so that as you change code in {{ cookiecutter.module_name }}, it gets loaded %autoreload 2 -from src.data import make_dataset +from {{ cookiecutter.module_name }}.data import make_dataset ``` ### Analysis is a directed acyclic graph ([DAG](https://en.wikipedia.org/wiki/Directed_acyclic_graph)) @@ -192,10 +192,10 @@ OTHER_VARIABLE=something #### Use a package to load these variables automatically. -If you look at the stub script in `src/data/make_dataset.py`, it uses a package called [python-dotenv](https://github.com/theskumar/python-dotenv) to load up all the entries in this file as environment variables so they are accessible with `os.environ.get`. Here's an example snippet adapted from the `python-dotenv` documentation: +If you look at the stub script in `{{ cookiecutter.module_name }}/data/make_dataset.py`, it uses a package called [python-dotenv](https://github.com/theskumar/python-dotenv) to load up all the entries in this file as environment variables so they are accessible with `os.environ.get`. Here's an example snippet adapted from the `python-dotenv` documentation: ```python -# src/data/dotenv_example.py +# {{ cookiecutter.module_name }}/data/dotenv_example.py import os from dotenv import load_dotenv, find_dotenv diff --git a/docs/mkdocs.yml b/docs/mkdocs.yml index 5f7e566b4..30ca202df 100644 --- a/docs/mkdocs.yml +++ b/docs/mkdocs.yml @@ -8,5 +8,5 @@ google_analytics: ['UA-54096005-4', 'auto'] theme: cinder extra_css: - css/extra.css -pages: +nav: - Home: index.md diff --git a/docs/requirements.txt b/docs/requirements.txt new file mode 100644 index 000000000..7c0036094 --- /dev/null +++ b/docs/requirements.txt @@ -0,0 +1,2 @@ +mkdocs +mkdocs-cinder diff --git a/docs/runtime.txt b/docs/runtime.txt new file mode 100644 index 000000000..548d71365 --- /dev/null +++ b/docs/runtime.txt @@ -0,0 +1 @@ +3.7 \ No newline at end of file diff --git a/hooks/post_gen_project.py b/hooks/post_gen_project.py new file mode 100644 index 000000000..0ca93080e --- /dev/null +++ b/hooks/post_gen_project.py @@ -0,0 +1,66 @@ +# https://github.com/cookiecutter/cookiecutter/issues/824 +# our workaround is to include these utility functions in the CCDS package +from urllib.request import urlretrieve + +from ccds.hook_utils.custom_config import write_custom_config +from ccds.hook_utils.dependencies import write_dependencies + +# +# TEMPLATIZED VARIABLES FILLED IN BY COOKIECUTTER +# +packages = [ + "black", + "flake8", + "isort", + "pip", + "python-dotenv", + "setuptools", + "wheel", +] + +# {% if cookiecutter.dataset_storage.s3 %} +packages += ["awscli"] +# {% endif %} # + +# {% if cookiecutter.pydata_packages == "basic" %} +packages += [ + "ipython", + "jupyter", + "matplotlib", + "numpy", + "pandas", + "scikit-learn", +] +# {% endif %} + +# track packages that are not available through conda +pip_only_packages = [ + "awscli", + "python-dotenv", +] + +# {% if cookiecutter.nbautoexport == "yes" %} +packages += ["nbautoexport"] +pip_only_packages += ["nbautoexport"] +# {% endif %} + +# +# POST-GENERATION FUNCTIONS +# +write_dependencies( + "{{ cookiecutter.dependency_file }}", + packages, + pip_only_packages, + repo_name="{{ cookiecutter.repo_name }}", + module_name="{{ cookiecutter.module_name }}", + python_version="{{ cookiecutter.python_version_number }}", +) + +write_custom_config("{{ cookiecutter.custom_config }}") + +# {% if cookiecutter.ethics_checklist == "yes" %} +urlretrieve( + "https://raw.githubusercontent.com/drivendataorg/deon/master/examples/ethics.md", + "ethics.md", +) +# {% endif %} diff --git a/hooks/pre_gen_project.py b/hooks/pre_gen_project.py index 2bd49c083..21f58047d 100644 --- a/hooks/pre_gen_project.py +++ b/hooks/pre_gen_project.py @@ -1,22 +1,4 @@ -def deprecation_warning(): - print(""" +# Functions here run before the project is generated. -============================================================================= -*** DEPRECATION WARNING *** - -Cookiecutter data science is moving to v2 soon, which will entail using -the command `ccds ...` rather than `cookiecutter ...`. The cookiecutter command -will continue to work, and this version of the template will still be available. -To use the legacy template, you will need to explicitly use `-c v1` to select it. - -Please update any scripts/automation you have to append the `-c v1` option, -which is available now. - -For example: - cookiecutter -c v1 https://github.com/drivendata/cookiecutter-data-science -============================================================================= - - """) - - -deprecation_warning() +# For the use of these hooks, see +# See https://cookiecutter.readthedocs.io/en/1.7.2/advanced/hooks.html diff --git a/netlify.toml b/netlify.toml new file mode 100644 index 000000000..f33c4ec13 --- /dev/null +++ b/netlify.toml @@ -0,0 +1,17 @@ +# Settings in the [build] context are global and are applied to all contexts +# unless otherwise overridden by more specific contexts. +[build] + # Directory (relative to root of your repo) that contains the deploy-ready + # HTML files and assets generated by the build. If a base directory has + # been specified, include it in the publish directory path. + base = "docs/" + publish = "site/" + + # Default build command. + command = "mkdocs build" + +# context specific environment variables. +[context.production.environment] + +# context specific environment variables. +[context.deploy-preview.environment] diff --git a/requirements.txt b/requirements.txt index 07f48f4de..c8e988bc2 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1 @@ -mkdocs -mkdocs-cinder cookiecutter -pytest diff --git a/setup.cfg b/setup.cfg new file mode 100644 index 000000000..0eb7d8386 --- /dev/null +++ b/setup.cfg @@ -0,0 +1,3 @@ +[flake8] +ignore = E203, E402, E501, W503 +max-line-length = 99 \ No newline at end of file diff --git a/setup.py b/setup.py new file mode 100644 index 000000000..9d3f65ca8 --- /dev/null +++ b/setup.py @@ -0,0 +1,50 @@ +from pathlib import Path +from setuptools import setup, find_packages + +project_path = Path(__file__).parent +long_description = open(project_path / "README.md").read() + +setup( + name="cookiecutter-data-science", + version="2.0.0", + description="A logical, reasonably standardized, but flexible project structure for doing and sharing data science work.", + long_description=long_description, + long_description_content_type="text/markdown", + author="DrivenData", + author_email="info@drivendata.org", + url="https://drivendata.github.io/cookiecutter-data-science/", + project_urls={ + "Homepage": "https://drivendata.github.io/cookiecutter-data-science/", + "Source Code": "https://github.com/drivendata/cookiecutter-data-science/", + "DrivenData": "http://drivendata.co", + }, + classifiers=[ + # How mature is this project? Common values are + # 3 - Alpha + # 4 - Beta + # 5 - Production/Stable + "Development Status :: 3 - Alpha", + # Indicate who your project is intended for + "Intended Audience :: Developers", + "Intended Audience :: Science/Research", + # Pick your license as you wish (should match "license" above) + "License :: OSI Approved :: MIT License", + # Specify the Python versions you support here. In particular, ensure + # that you indicate whether you support Python 2, Python 3 or both. + "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3.7", + "Programming Language :: Python :: 3.8", + "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3.10", + "Topic :: Scientific/Engineering", + "Topic :: Scientific/Engineering :: Artificial Intelligence", + ], + python_requires=">=3.7", + install_requires=["cookiecutter", "click"], + entry_points={ + "console_scripts": [ + "ccds=ccds.__main__:main", + ], + }, + packages=find_packages(exclude=["dist", "docs", "tests"]), +) diff --git a/tests/conda_harness.sh b/tests/conda_harness.sh new file mode 100644 index 000000000..59133820b --- /dev/null +++ b/tests/conda_harness.sh @@ -0,0 +1,36 @@ +#!/bin/bash +set -ex + +# enable conda commands inside the script +eval "$(conda shell.bash hook)" + +PROJECT_NAME=$(basename $1) +CCDS_ROOT=$(dirname $0) + +# configure exit / teardown behavior +function finish { + if [[ $(which python) == *"$PROJECT_NAME"* ]]; then + conda deactivate + fi + + conda env remove -n $PROJECT_NAME -y +} +trap finish EXIT + +# source the steps in the test +source $CCDS_ROOT/test_functions.sh + +# navigate to the generated project and run make commands +cd $1 + +# Fix for conda issue https://github.com/conda/conda/issues/7267 on MacOS +if [ -e /usr/local/miniconda ] +then + sudo chown -R $USER /usr/local/miniconda +fi + +make create_environment +conda activate $PROJECT_NAME +make requirements + +run_tests $PROJECT_NAME diff --git a/tests/conftest.py b/tests/conftest.py index 8acbfb271..1003aacf9 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1,47 +1,118 @@ -import sys -import pytest +import json import shutil +import sys +import tempfile +from contextlib import contextmanager +from itertools import product from pathlib import Path -from cookiecutter import main + +import pytest + +from ccds.__main__ import api_main CCDS_ROOT = Path(__file__).parents[1].resolve() -args = { - 'project_name': 'DrivenData', - 'author_name': 'DrivenData', - 'open_source_license': 'BSD-3-Clause', - 'python_interpreter': 'python' - } +default_args = { + "project_name": "my_test_project", + "repo_name": "my-test-repo", + "module_name": "project_module", + "author_name": "DrivenData", + "description": "A test project", + "open_source_license": "MIT", + "dataset_storage": {"azure": {"container": "container-name"}}, +} + + +def config_generator(fast=False): + cookiecutter_json = json.load((CCDS_ROOT / "ccds.json").open("r")) + + # python versions for the created environment; match the root + # python version since Pipenv needs to be able to find an executable + running_py_version = f"{sys.version_info.major}.{sys.version_info.minor}" + py_version = [("python_version_number", v) for v in [running_py_version]] + + configs = product( + py_version, + [ + ("environment_manager", opt) + for opt in cookiecutter_json["environment_manager"] + ], + [("dependency_file", opt) for opt in cookiecutter_json["dependency_file"]], + [("pydata_packages", opt) for opt in cookiecutter_json["pydata_packages"]], + ) + + def _is_valid(config): + config = dict(config) + # Pipfile + pipenv only valid combo for either + if (config["environment_manager"] == "pipenv") ^ ( + config["dependency_file"] == "Pipfile" + ): + return False + # conda is the only valid env manager for environment.yml + if (config["dependency_file"] == "environment.yml") and ( + config["environment_manager"] != "conda" + ): + return False + return True + + # remove invalid configs + configs = [c for c in configs if _is_valid(c)] + + for c in configs: + config = dict(c) + config.update(default_args) + yield config + + # just do a single config if fast passed once or three times + if fast in [1, 3]: + break + + +def pytest_addoption(parser): + """Pass -F/--fast multiple times to speed up tests + + default - execute makefile commands, all configs -def system_check(basename): - platform = sys.platform - if 'linux' in platform: - basename = basename.lower() - return basename + -F - execute makefile commands, single config + -FF - skip makefile commands, all configs + -FFF - skip makefile commands, single config + """ + parser.addoption( + "--fast", + "-F", + action="count", + default=0, + help="Speed up tests by skipping configs and/or Makefile validation", + ) + + +@pytest.fixture +def fast(request): + return request.config.getoption("--fast") -@pytest.fixture(scope='class', params=[{}, args]) -def default_baked_project(tmpdir_factory, request): - temp = tmpdir_factory.mktemp('data-project') - out_dir = Path(temp).resolve() +def pytest_generate_tests(metafunc): + # setup config fixture to get all of the results from config_generator + if "config" in metafunc.fixturenames: + metafunc.parametrize( + "config", config_generator(metafunc.config.getoption("fast")) + ) - pytest.param = request.param - main.cookiecutter( + +@contextmanager +def bake_project(config): + temp = Path(tempfile.mkdtemp(suffix="data-project")).resolve() + + api_main.cookiecutter( str(CCDS_ROOT), no_input=True, - extra_context=pytest.param, - output_dir=out_dir + extra_context=config, + output_dir=temp, + overwrite_if_exists=True, ) - pn = pytest.param.get('project_name') or 'project_name' - - # project name gets converted to lower case on Linux but not Mac - pn = system_check(pn) - - proj = out_dir / pn - request.cls.path = proj - yield + yield temp / config["repo_name"] # cleanup after - shutil.rmtree(out_dir) \ No newline at end of file + shutil.rmtree(temp) diff --git a/tests/pipenv_harness.sh b/tests/pipenv_harness.sh new file mode 100644 index 000000000..7d916b8c0 --- /dev/null +++ b/tests/pipenv_harness.sh @@ -0,0 +1,29 @@ +#!/bin/bash +set -ex + +PROJECT_NAME=$(basename $1) +CCDS_ROOT=$(dirname $0) + +# configure exit / teardown behavior +function finish { + if [[ $(which python) == *"$PROJECT_NAME"* ]]; then + exit + fi + + pipenv --rm +} +trap finish EXIT + +# source the steps in the test +source $CCDS_ROOT/test_functions.sh + +# navigate to the generated project and run make commands +cd $1 +make create_environment + +# can no longer happen outside environment because we now initialize +# nbautoexport which requires the environment be activated +pipenv run make requirements + +# test with pipenv run +pipenv run python -c "import sys; assert \"$PROJECT_NAME\" in sys.executable" diff --git a/tests/test_creation.py b/tests/test_creation.py index b32d23d66..b25926e24 100644 --- a/tests/test_creation.py +++ b/tests/test_creation.py @@ -1,113 +1,162 @@ -import os -import pytest -from subprocess import check_output -from conftest import system_check +import sys +from pathlib import Path +from subprocess import PIPE, run + +from conftest import bake_project def no_curlies(filepath): - """ Utility to make sure no curly braces appear in a file. - That is, was Jinja able to render everything? + """Utility to make sure no curly braces appear in a file. + That is, was Jinja able to render everything? """ - with open(filepath, 'r') as f: - data = f.read() - - template_strings = [ - '{{', - '}}', - '{%', - '%}' - ] + data = filepath.open("r").read() + + template_strings = ["{{", "}}", "{%", "%}"] template_strings_in_file = [s in data for s in template_strings] return not any(template_strings_in_file) -@pytest.mark.usefixtures("default_baked_project") -class TestCookieSetup(object): - def test_project_name(self): - project = self.path - if pytest.param.get('project_name'): - name = system_check('DrivenData') - assert project.name == name - else: - assert project.name == 'project_name' - - def test_author(self): - setup_ = self.path / 'setup.py' - args = ['python', str(setup_), '--author'] - p = check_output(args).decode('ascii').strip() - if pytest.param.get('author_name'): - assert p == 'DrivenData' - else: - assert p == 'Your name (or your organization/company/team)' - - def test_readme(self): - readme_path = self.path / 'README.md' - assert readme_path.exists() - assert no_curlies(readme_path) - if pytest.param.get('project_name'): - with open(readme_path) as fin: - assert 'DrivenData' == next(fin).strip() - - def test_setup(self): - setup_ = self.path / 'setup.py' - args = ['python', str(setup_), '--version'] - p = check_output(args).decode('ascii').strip() - assert p == '0.1.0' - - def test_license(self): - license_path = self.path / 'LICENSE' - assert license_path.exists() - assert no_curlies(license_path) - - def test_license_type(self): - setup_ = self.path / 'setup.py' - args = ['python', str(setup_), '--license'] - p = check_output(args).decode('ascii').strip() - if pytest.param.get('open_source_license'): - assert p == 'BSD-3' - else: - assert p == 'MIT' - - def test_requirements(self): - reqs_path = self.path / 'requirements.txt' - assert reqs_path.exists() - assert no_curlies(reqs_path) - if pytest.param.get('python_interpreter'): - with open(reqs_path) as fin: - lines = list(map(lambda x: x.strip(), fin.readlines())) - assert 'pathlib2' in lines - - def test_makefile(self): - makefile_path = self.path / 'Makefile' - assert makefile_path.exists() - assert no_curlies(makefile_path) - - def test_folders(self): - expected_dirs = [ - 'data', - 'data/external', - 'data/interim', - 'data/processed', - 'data/raw', - 'docs', - 'models', - 'notebooks', - 'references', - 'reports', - 'reports/figures', - 'src', - 'src/data', - 'src/features', - 'src/models', - 'src/visualization', - ] - - ignored_dirs = [ - str(self.path) - ] - - abs_expected_dirs = [str(self.path / d) for d in expected_dirs] - abs_dirs, _, _ = list(zip(*os.walk(self.path))) - assert len(set(abs_expected_dirs + ignored_dirs) - set(abs_dirs)) == 0 +def test_baking_configs(config, fast): + """For every generated config in the config_generator, run all + of the tests. + """ + print("using config", config) + with bake_project(config) as project_directory: + verify_folders(project_directory, config) + verify_files(project_directory, config) + + if fast < 2: + verify_makefile_commands(project_directory, config) + + +def verify_folders(root, config): + """Tests that expected folders and only expected folders exist.""" + expected_dirs = [ + ".", + "data", + "data/external", + "data/interim", + "data/processed", + "data/raw", + "docs", + "models", + "notebooks", + "references", + "reports", + "reports/figures", + config["module_name"], + f"{config['module_name']}/data", + f"{config['module_name']}/features", + f"{config['module_name']}/models", + f"{config['module_name']}/visualization", + ] + + expected_dirs = [ + # (root / d).resolve().relative_to(root) for d in expected_dirs + Path(d) + for d in expected_dirs + ] + + existing_dirs = [ + d.resolve().relative_to(root) for d in root.glob("**") if d.is_dir() + ] + + assert sorted(existing_dirs) == sorted(expected_dirs) + + +def verify_files(root, config): + """Test that expected files and only expected files exist.""" + expected_files = [ + "Makefile", + "README.md", + "setup.py", + "pyproject.toml", + "setup.cfg", + ".env", + ".gitignore", + "data/external/.gitkeep", + "data/interim/.gitkeep", + "data/processed/.gitkeep", + "data/raw/.gitkeep", + "docs/Makefile", + "docs/commands.rst", + "docs/conf.py", + "docs/getting-started.rst", + "docs/index.rst", + "docs/make.bat", + "ethics.md", + "notebooks/.gitkeep", + "notebooks/.nbautoexport", + "references/.gitkeep", + "reports/.gitkeep", + "reports/figures/.gitkeep", + "models/.gitkeep", + f"{config['module_name']}/__init__.py", + f"{config['module_name']}/data/__init__.py", + f"{config['module_name']}/data/make_dataset.py", + f"{config['module_name']}/features/__init__.py", + f"{config['module_name']}/features/build_features.py", + f"{config['module_name']}/models/__init__.py", + f"{config['module_name']}/models/train_model.py", + f"{config['module_name']}/models/predict_model.py", + f"{config['module_name']}/visualization/__init__.py", + f"{config['module_name']}/visualization/visualize.py", + ] + + # conditional files + if not config["open_source_license"].startswith("No license"): + expected_files.append("LICENSE") + + expected_files.append(config["dependency_file"]) + + expected_files = [Path(f) for f in expected_files] + + existing_files = [f.relative_to(root) for f in root.glob("**/*") if f.is_file()] + + assert sorted(existing_files) == sorted(expected_files) + + for f in existing_files: + assert no_curlies(root / f) + + +def verify_makefile_commands(root, config): + """Actually shell out to bash and run the make commands for: + - create_environment + - requirements + Ensure that these use the proper environment. + """ + test_path = Path(__file__).parent + + if config["environment_manager"] == "conda": + harness_path = test_path / "conda_harness.sh" + elif config["environment_manager"] == "virtualenv": + harness_path = test_path / "virtualenv_harness.sh" + elif config["environment_manager"] == "pipenv": + + harness_path = test_path / "pipenv_harness.sh" + elif config["environment_manager"] == "none": + return True + else: + raise ValueError( + f"Environment manager '{config['environment_manager']}' not found in test harnesses." + ) + + result = run( + ["bash", str(harness_path), str(root.resolve())], stderr=PIPE, stdout=PIPE + ) + result_returncode = result.returncode + + encoding = sys.stdout.encoding + + if encoding is None: + encoding = "utf-8" + + # normally hidden by pytest except in failure we want this displayed + print("\n======================= STDOUT ======================") + print(result.stdout.decode(encoding)) + + print("\n======================= STDERR ======================") + print(result.stderr.decode(encoding)) + assert result_returncode == 0 diff --git a/tests/test_functions.sh b/tests/test_functions.sh new file mode 100644 index 000000000..96c19f447 --- /dev/null +++ b/tests/test_functions.sh @@ -0,0 +1,12 @@ +function run_tests () { + python --version + python -c "print('python runs....')" + + if [[ $(which python) == *"$1"* ]]; then + echo "found correct python" + else + echo "Python env name $1 not in Python path $(which python)" + exit 99 + fi + +} \ No newline at end of file diff --git a/tests/virtualenv_harness.sh b/tests/virtualenv_harness.sh new file mode 100644 index 000000000..1569fff69 --- /dev/null +++ b/tests/virtualenv_harness.sh @@ -0,0 +1,57 @@ +#!/bin/bash +set -e + +PROJECT_NAME=$(basename $1) +CCDS_ROOT=$(dirname $0) + +# configure exit / teardown behavior +function finish { + if [[ $(which python) == *"$PROJECT_NAME"* ]]; then + deactivate + fi + + if [ ! -z `which rmvirtualenv` ]; then + rmvirtualenv $PROJECT_NAME + elif [ ! -z `which rmvirtualenv.bat` ]; then + rmvirtualenv.bat $PROJECT_NAME + fi +} +trap finish EXIT + +# source the steps in the test +source $CCDS_ROOT/test_functions.sh + +# navigate to the generated project and run make commands +cd $1 + +if [ -z $TMPDIR ] +then + windowstmpdir=/c/Users/VssAdministrator/AppData/Local/Temp + if [ -e $windowstmpdir ] + then + export TMPDIR=$windowstmpdir + fi +fi + +TEMP_ENV_ROOT=$(mktemp -d "${TMPDIR:-/tmp/}$(basename $0).XXXXXXXXXXXX") +export WORKON_HOME=$TEMP_ENV_ROOT + +if [ ! -z `which virtualenvwrapper.sh` ] +then + source `which virtualenvwrapper.sh` +fi + +make create_environment + +# workon not sourced + +if [ -e $TEMP_ENV_ROOT/$PROJECT_NAME/bin/activate ] +then + . $TEMP_ENV_ROOT/$PROJECT_NAME/bin/activate +else + . $TEMP_ENV_ROOT/$PROJECT_NAME/Scripts/activate +fi + +make requirements + +run_tests $PROJECT_NAME diff --git a/{{ cookiecutter.repo_name }}/Makefile b/{{ cookiecutter.repo_name }}/Makefile index cf9406bdf..bd391f009 100644 --- a/{{ cookiecutter.repo_name }}/Makefile +++ b/{{ cookiecutter.repo_name }}/Makefile @@ -1,86 +1,103 @@ -.PHONY: clean data lint requirements sync_data_to_s3 sync_data_from_s3 +.PHONY: clean data lint format requirements sync_data_down sync_data_up ################################################################################# # GLOBALS # ################################################################################# PROJECT_DIR := $(shell dirname $(realpath $(lastword $(MAKEFILE_LIST)))) -BUCKET = {{ cookiecutter.s3_bucket }} -PROFILE = {{ cookiecutter.aws_profile }} PROJECT_NAME = {{ cookiecutter.repo_name }} -PYTHON_INTERPRETER = {{ cookiecutter.python_interpreter }} +PYTHON_VERSION = {{ cookiecutter.python_version_number }} +PYTHON_INTERPRETER = python -ifeq (,$(shell which conda)) -HAS_CONDA=False -else -HAS_CONDA=True -endif ################################################################################# # COMMANDS # ################################################################################# +{% if cookiecutter.dependency_file != 'none' %} ## Install Python Dependencies -requirements: test_environment +requirements: + {% if "requirements.txt" == cookiecutter.dependency_file -%} $(PYTHON_INTERPRETER) -m pip install -U pip setuptools wheel $(PYTHON_INTERPRETER) -m pip install -r requirements.txt - -## Make Dataset -data: requirements - $(PYTHON_INTERPRETER) src/data/make_dataset.py data/raw data/processed + {% elif "environment.yml" == cookiecutter.dependency_file -%} + conda env update --name $(PROJECT_NAME) --file environment.yml --prune + {% elif "Pipfile" == cookiecutter.dependency_file -%} + pipenv install + {% endif %} + {% if cookiecutter.nbautoexport == "yes" %} + nbautoexport install + {% endif %} +{% endif %} ## Delete all compiled Python files clean: find . -type f -name "*.py[co]" -delete find . -type d -name "__pycache__" -delete -## Lint using flake8 +## Lint using flake8 and black (use `make format` to do formatting) lint: - flake8 src - -## Upload Data to S3 -sync_data_to_s3: -ifeq (default,$(PROFILE)) - aws s3 sync data/ s3://$(BUCKET)/data/ -else - aws s3 sync data/ s3://$(BUCKET)/data/ --profile $(PROFILE) -endif - -## Download Data from S3 -sync_data_from_s3: -ifeq (default,$(PROFILE)) - aws s3 sync s3://$(BUCKET)/data/ data/ -else - aws s3 sync s3://$(BUCKET)/data/ data/ --profile $(PROFILE) -endif - + flake8 {{ cookiecutter.module_name }} + black --check --config pyproject.toml {{ cookiecutter.module_name }} + + +## Format source code with black +format: + black --config pyproject.toml {{ cookiecutter.module_name }} + +{% if not cookiecutter.dataset_storage.none %} +## Download Data from storage system +sync_data_down: + {% if cookiecutter.dataset_storage.s3 -%} + aws s3 sync s3://{{ cookiecutter.dataset_storage.s3.bucket }}/data/\ + data/ {% if cookiecutter.dataset_storage.s3.aws_profile != 'default' %} --profile {{ cookiecutter.dataset_storage.s3.aws_profile }}{% endif %} + {% elif cookiecutter.dataset_storage.azure -%} + az storage blob download-batch -s {{ cookiecutter.dataset_storage.azure.container }}/data/ \ + -d data/ + {% elif cookiecutter.dataset_storage.gcs -%} + gsutil rsync gs://{{ cookiecutter.dataset_storage.gcs.bucket }}/data/ data/ + {% endif %} + +## Upload Data to storage system +sync_data_up: + {% if cookiecutter.dataset_storage.s3 -%} + aws s3 sync s3://{{ cookiecutter.dataset_storage.s3.bucket }}/data/ data/\ + {% if cookiecutter.dataset_storage.s3.aws_profile %} --profile $(PROFILE){% endif %} + {% elif cookiecutter.dataset_storage.azure -%} + az storage blob upload-batch -d {{ cookiecutter.dataset_storage.azure.container }}/data/ \ + -s data/ + {% elif cookiecutter.dataset_storage.gcs -%} + gsutil rsync data/ gs://{{ cookiecutter.dataset_storage.gcs.bucket }}/data/ + {% endif %} +{% endif %} + +{% if cookiecutter.environment_manager != 'none' %} ## Set up python interpreter environment create_environment: -ifeq (True,$(HAS_CONDA)) - @echo ">>> Detected conda, creating conda environment." -ifeq (3,$(findstring 3,$(PYTHON_INTERPRETER))) - conda create --name $(PROJECT_NAME) python=3 -else - conda create --name $(PROJECT_NAME) python=2.7 -endif - @echo ">>> New conda env created. Activate with:\nsource activate $(PROJECT_NAME)" -else - $(PYTHON_INTERPRETER) -m pip install -q virtualenv virtualenvwrapper - @echo ">>> Installing virtualenvwrapper if not already installed.\nMake sure the following lines are in shell startup file\n\ - export WORKON_HOME=$$HOME/.virtualenvs\nexport PROJECT_HOME=$$HOME/Devel\nsource /usr/local/bin/virtualenvwrapper.sh\n" - @bash -c "source `which virtualenvwrapper.sh`;mkvirtualenv $(PROJECT_NAME) --python=$(PYTHON_INTERPRETER)" + {% if cookiecutter.environment_manager == 'conda' -%} + {% if cookiecutter.dependency_file != 'environment.yml' %} + conda create --name $(PROJECT_NAME) python=$(PYTHON_VERSION) -y + {% else -%} + conda env create --name $(PROJECT_NAME) python=$(PYTHON_VERSION) -f environment.yml + {% endif %} + @echo ">>> conda env created. Activate with:\nconda activate $(PROJECT_NAME)" + {% elif cookiecutter.environment_manager == 'virtualenv' -%} + @bash -c "if [ ! -z `which virtualenvwrapper.sh` ]; then source `which virtualenvwrapper.sh`; mkvirtualenv $(PROJECT_NAME) --python=$(PYTHON_INTERPRETER); else mkvirtualenv.bat $(PROJECT_NAME) --python=$(PYTHON_INTERPRETER); fi" @echo ">>> New virtualenv created. Activate with:\nworkon $(PROJECT_NAME)" -endif + {% elif cookiecutter.environment_manager == 'pipenv' -%} + pipenv --python $(PYTHON_VERSION) + @echo ">>> New pipenv created. Activate with:\npipenv shell" + {% endif %} +{% endif %} -## Test python environment is setup correctly -test_environment: - $(PYTHON_INTERPRETER) test_environment.py ################################################################################# # PROJECT RULES # ################################################################################# - +## Make Dataset +data: requirements + $(PYTHON_INTERPRETER) {{ cookiecutter.module_name }}/data/make_dataset.py ################################################################################# # Self Documenting Commands # diff --git a/{{ cookiecutter.repo_name }}/README.md b/{{ cookiecutter.repo_name }}/README.md index 78eb6762d..ba887d7f4 100644 --- a/{{ cookiecutter.repo_name }}/README.md +++ b/{{ cookiecutter.repo_name }}/README.md @@ -31,9 +31,9 @@ Project Organization ├── requirements.txt <- The requirements file for reproducing the analysis environment, e.g. │ generated with `pip freeze > requirements.txt` │ - ├── setup.py <- makes project pip installable (pip install -e .) so src can be imported - ├── src <- Source code for use in this project. - │   ├── __init__.py <- Makes src a Python module + ├── setup.py <- makes project pip installable (pip install -e .) so {{ cookiecutter.module_name }} can be imported + ├── {{ cookiecutter.module_name }} <- Source code for use in this project. + │   ├── __init__.py <- Makes {{ cookiecutter.module_name }} a Python module │ │ │   ├── data <- Scripts to download or generate data │   │   └── make_dataset.py diff --git a/{{ cookiecutter.repo_name }}/docs/commands.rst b/{{ cookiecutter.repo_name }}/docs/commands.rst index 56e9e4ac7..438a52ff4 100644 --- a/{{ cookiecutter.repo_name }}/docs/commands.rst +++ b/{{ cookiecutter.repo_name }}/docs/commands.rst @@ -3,8 +3,17 @@ Commands The Makefile contains the central entry points for common tasks related to this project. -Syncing data to S3 -^^^^^^^^^^^^^^^^^^ - -* `make sync_data_to_s3` will use `aws s3 sync` to recursively sync files in `data/` up to `s3://{{ cookiecutter.s3_bucket }}/data/`. -* `make sync_data_from_s3` will use `aws s3 sync` to recursively sync files from `s3://{{ cookiecutter.s3_bucket }}/data/` to `data/`. +{% if not cookiecutter.dataset_storage.none %} +Syncing data to cloud storage +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +{% if cookiecutter.dataset_storage.s3 -%} +* `make sync_data_up` will use `aws s3 sync` to recursively sync files in `data/` up to `s3://{{ cookiecutter.dataset_storage.s3.bucket }}/data/`. +* `make sync_data_down` will use `aws s3 sync` to recursively sync files from `s3://{{ cookiecutter.dataset_storage.s3.bucket }}/data/` to `data/`. +{% elif cookiecutter.dataset_storage.azure -%} +* `make sync_data_up` will use `az storage blob upload-batch -d` to recursively sync files in `data/` up to `{{ cookiecutter.dataset_storage.azure.container }}/data/`. +* `make sync_data_down` will use `az storage blob upload-batch -d` to recursively sync files from `{{ cookiecutter.dataset_storage.azure.container }}/data/` to `data/`. +{% elif cookiecutter.dataset_storage.gcs -%} +* `make sync_data_up` will use `gsutil rsync` to recursively sync files in `data/` up to `gs://{{ cookiecutter.dataset_storage.gcs.bucket }}/data/`. +* `make sync_data_down` will use `gsutil rsync` to recursively sync files in `gs://{{ cookiecutter.dataset_storage.gcs.bucket }}/data/` to `data/`. +{% endif %} +{% endif %} \ No newline at end of file diff --git a/{{ cookiecutter.repo_name }}/notebooks/.nbautoexport b/{{ cookiecutter.repo_name }}/notebooks/.nbautoexport new file mode 100644 index 000000000..8c1c824f0 --- /dev/null +++ b/{{ cookiecutter.repo_name }}/notebooks/.nbautoexport @@ -0,0 +1,8 @@ +{% if cookiecutter.nbautoexport == 'yes' %} +{ + "export_formats": [ + "script" + ], + "organize_by": "extension" +} +{% endif %} diff --git a/{{ cookiecutter.repo_name }}/pyproject.toml b/{{ cookiecutter.repo_name }}/pyproject.toml new file mode 100644 index 000000000..5d72c78fd --- /dev/null +++ b/{{ cookiecutter.repo_name }}/pyproject.toml @@ -0,0 +1,10 @@ +[tool.black] +line-length = 99 +target-version = ['py36', 'py37', 'py38'] +include = '\.pyi?$' +exclude = ''' +/( + \.git + | \.venv +)/ +''' diff --git a/{{ cookiecutter.repo_name }}/requirements.txt b/{{ cookiecutter.repo_name }}/requirements.txt deleted file mode 100644 index 10a89cb6e..000000000 --- a/{{ cookiecutter.repo_name }}/requirements.txt +++ /dev/null @@ -1,15 +0,0 @@ -# local package --e . - -# external requirements -click -Sphinx -coverage -awscli -flake8 -python-dotenv>=0.5.1 -{% if cookiecutter.python_interpreter != 'python3' %} - -# backwards compatibility -pathlib2 -{% endif %} \ No newline at end of file diff --git a/{{ cookiecutter.repo_name }}/setup.cfg b/{{ cookiecutter.repo_name }}/setup.cfg new file mode 100644 index 000000000..d916b1a69 --- /dev/null +++ b/{{ cookiecutter.repo_name }}/setup.cfg @@ -0,0 +1,8 @@ +[flake8] +ignore = E731,E266,E501,C901,W503 +max-line-length = 99 +exclude = .git,notebooks,references,models,data + +[pep8] +max-line-length = 99 +exclude = .git,notebooks,references,models,data diff --git a/{{ cookiecutter.repo_name }}/setup.py b/{{ cookiecutter.repo_name }}/setup.py index 3fef006e2..a43dad402 100644 --- a/{{ cookiecutter.repo_name }}/setup.py +++ b/{{ cookiecutter.repo_name }}/setup.py @@ -1,7 +1,7 @@ from setuptools import find_packages, setup setup( - name='src', + name='{{ cookiecutter.module_name }}', packages=find_packages(), version='0.1.0', description='{{ cookiecutter.description }}', diff --git a/{{ cookiecutter.repo_name }}/src/models/.gitkeep b/{{ cookiecutter.repo_name }}/src/models/.gitkeep deleted file mode 100644 index e69de29bb..000000000 diff --git a/{{ cookiecutter.repo_name }}/src/visualization/.gitkeep b/{{ cookiecutter.repo_name }}/src/visualization/.gitkeep deleted file mode 100644 index e69de29bb..000000000 diff --git a/{{ cookiecutter.repo_name }}/test_environment.py b/{{ cookiecutter.repo_name }}/test_environment.py deleted file mode 100644 index 0b0abeaa1..000000000 --- a/{{ cookiecutter.repo_name }}/test_environment.py +++ /dev/null @@ -1,25 +0,0 @@ -import sys - -REQUIRED_PYTHON = "{{ cookiecutter.python_interpreter }}" - - -def main(): - system_major = sys.version_info.major - if REQUIRED_PYTHON == "python": - required_major = 2 - elif REQUIRED_PYTHON == "python3": - required_major = 3 - else: - raise ValueError("Unrecognized python interpreter: {}".format( - REQUIRED_PYTHON)) - - if system_major != required_major: - raise TypeError( - "This project requires Python {}. Found: Python {}".format( - required_major, sys.version)) - else: - print(">>> Development environment passes all tests!") - - -if __name__ == '__main__': - main() diff --git a/{{ cookiecutter.repo_name }}/tox.ini b/{{ cookiecutter.repo_name }}/tox.ini deleted file mode 100644 index c32fbd859..000000000 --- a/{{ cookiecutter.repo_name }}/tox.ini +++ /dev/null @@ -1,3 +0,0 @@ -[flake8] -max-line-length = 79 -max-complexity = 10 diff --git a/{{ cookiecutter.repo_name }}/src/features/__init__.py b/{{ cookiecutter.repo_name }}/{{ cookiecutter.module_name }}/__init__.py similarity index 100% rename from {{ cookiecutter.repo_name }}/src/features/__init__.py rename to {{ cookiecutter.repo_name }}/{{ cookiecutter.module_name }}/__init__.py diff --git a/{{ cookiecutter.repo_name }}/src/models/__init__.py b/{{ cookiecutter.repo_name }}/{{ cookiecutter.module_name }}/data/__init__.py similarity index 100% rename from {{ cookiecutter.repo_name }}/src/models/__init__.py rename to {{ cookiecutter.repo_name }}/{{ cookiecutter.module_name }}/data/__init__.py diff --git a/{{ cookiecutter.repo_name }}/src/data/make_dataset.py b/{{ cookiecutter.repo_name }}/{{ cookiecutter.module_name }}/data/make_dataset.py similarity index 59% rename from {{ cookiecutter.repo_name }}/src/data/make_dataset.py rename to {{ cookiecutter.repo_name }}/{{ cookiecutter.module_name }}/data/make_dataset.py index 96b377a23..70a6ee31d 100644 --- a/{{ cookiecutter.repo_name }}/src/data/make_dataset.py +++ b/{{ cookiecutter.repo_name }}/{{ cookiecutter.module_name }}/data/make_dataset.py @@ -6,18 +6,18 @@ @click.command() -@click.argument('input_filepath', type=click.Path(exists=True)) -@click.argument('output_filepath', type=click.Path()) +@click.argument("input_filepath", type=click.Path(exists=True)) +@click.argument("output_filepath", type=click.Path()) def main(input_filepath, output_filepath): - """ Runs data processing scripts to turn raw data from (../raw) into - cleaned data ready to be analyzed (saved in ../processed). + """Runs data processing scripts to turn raw data from (../raw) into + cleaned data ready to be analyzed (saved in ../processed). """ logger = logging.getLogger(__name__) - logger.info('making final data set from raw data') + logger.info("making final data set from raw data") -if __name__ == '__main__': - log_fmt = '%(asctime)s - %(name)s - %(levelname)s - %(message)s' +if __name__ == "__main__": + log_fmt = "%(asctime)s - %(name)s - %(levelname)s - %(message)s" logging.basicConfig(level=logging.INFO, format=log_fmt) # not used in this stub but often useful for finding various files diff --git a/{{ cookiecutter.repo_name }}/src/visualization/__init__.py b/{{ cookiecutter.repo_name }}/{{ cookiecutter.module_name }}/features/__init__.py similarity index 100% rename from {{ cookiecutter.repo_name }}/src/visualization/__init__.py rename to {{ cookiecutter.repo_name }}/{{ cookiecutter.module_name }}/features/__init__.py diff --git a/{{ cookiecutter.repo_name }}/src/features/build_features.py b/{{ cookiecutter.repo_name }}/{{ cookiecutter.module_name }}/features/build_features.py similarity index 100% rename from {{ cookiecutter.repo_name }}/src/features/build_features.py rename to {{ cookiecutter.repo_name }}/{{ cookiecutter.module_name }}/features/build_features.py diff --git a/{{ cookiecutter.repo_name }}/src/data/.gitkeep b/{{ cookiecutter.repo_name }}/{{ cookiecutter.module_name }}/models/__init__.py similarity index 100% rename from {{ cookiecutter.repo_name }}/src/data/.gitkeep rename to {{ cookiecutter.repo_name }}/{{ cookiecutter.module_name }}/models/__init__.py diff --git a/{{ cookiecutter.repo_name }}/src/models/predict_model.py b/{{ cookiecutter.repo_name }}/{{ cookiecutter.module_name }}/models/predict_model.py similarity index 100% rename from {{ cookiecutter.repo_name }}/src/models/predict_model.py rename to {{ cookiecutter.repo_name }}/{{ cookiecutter.module_name }}/models/predict_model.py diff --git a/{{ cookiecutter.repo_name }}/src/models/train_model.py b/{{ cookiecutter.repo_name }}/{{ cookiecutter.module_name }}/models/train_model.py similarity index 100% rename from {{ cookiecutter.repo_name }}/src/models/train_model.py rename to {{ cookiecutter.repo_name }}/{{ cookiecutter.module_name }}/models/train_model.py diff --git a/{{ cookiecutter.repo_name }}/src/features/.gitkeep b/{{ cookiecutter.repo_name }}/{{ cookiecutter.module_name }}/visualization/__init__.py similarity index 100% rename from {{ cookiecutter.repo_name }}/src/features/.gitkeep rename to {{ cookiecutter.repo_name }}/{{ cookiecutter.module_name }}/visualization/__init__.py diff --git a/{{ cookiecutter.repo_name }}/src/visualization/visualize.py b/{{ cookiecutter.repo_name }}/{{ cookiecutter.module_name }}/visualization/visualize.py similarity index 100% rename from {{ cookiecutter.repo_name }}/src/visualization/visualize.py rename to {{ cookiecutter.repo_name }}/{{ cookiecutter.module_name }}/visualization/visualize.py