6 jaren geleden · eadb954af7
--- a/README.md
+++ b/README.md
@@ -1,4 +1,4 @@
 
				-# Cookiekaker Cookiecutter Data Science Template inspired by @vasinkd and @drivendata
			
 
				+# Cookiekaker Deep Learning Template inspired by @vasinkd and @drivendata
			
 
				 
			
 
				 _A not quite logical, nad unreasonably standardized, but flexible project structure for doing and sharing data science work at certain motivation and place._
			
 
				 
			
--- a/cookiecutter.json
+++ b/cookiecutter.json
@@ -6,8 +6,8 @@
 
				     "open_source_license": ["No license file", "MIT", "BSD-3-Clause"],
			
 
				     "s3_bucket": "[OPTIONAL] your-bucket-for-syncing-data (do not include 's3://')",
			
 
				     "aws_profile": "default",
			
 
				-    "minio": "[OPTIONAL] your-minio-for-syncing data",
			
 
				+    "minio": "[OPTIONAL] your-minio-for-syncing-data (your shoud have installed rclone to sycn (https://rclone.org/install/))",
			
 
				     "minio_profile": "default",
			
 
				-    "python_interpreter": ["python"],
			
 
				-    // "git_remote_repo": "Your remote repo for this project"
			
 
				+    // "git_remote_repo": "Your remote repo for this project",
			
 
				+    "python_interpreter": ["python"]
			
 
				 }
			
--- a/cookiecutter.repo_name
+++ b/cookiecutter.repo_name
@@ -0,0 +1,12 @@
 
				+# Environment variables go here, can be read by `python-dotenv` package:
			
 
				+#
			
 
				+#   `src/script.py`
			
 
				+#   ----------------------------------------------------------------
			
 
				+#    import dotenv
			
 
				+#
			
 
				+#    project_dir = os.path.join(os.path.dirname(__file__), os.pardir)
			
 
				+#    dotenv_path = os.path.join(project_dir, '.env')
			
 
				+#    dotenv.load_dotenv(dotenv_path)
			
 
				+#   ----------------------------------------------------------------
			
 
				+#
			
 
				+# DO NOT ADD THIS FILE TO VERSION CONTROL!
			
--- a/cookiecutter.repo_name
+++ b/cookiecutter.repo_name
@@ -57,7 +57,7 @@ docs/_build/
 
				 target/
			
 
				 
			
 
				 # DotEnv configuration
			
 
				-.env
			
 
				+# .env
			
 
				 
			
 
				 # Database
			
 
				 *.db
			
@@ -84,3 +84,16 @@ target/
 
				 
			
 
				 # Mypy cache
			
 
				 .mypy_cache/
			
 
				+
			
 
				+# Data folders
			
 
				+/data/*
			
 
				+!/data/raw/
			
 
				+!/data/README.md
			
 
				+models/*
			
 
				+logs/*
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
--- a/cookiecutter.repo_name
+++ b/cookiecutter.repo_name
@@ -12,6 +12,12 @@ PYTHON_INTERPRETER = {{ cookiecutter.python_interpreter }}
 
				 # GIT = {{ cookiecutter.git_remote_repo }}
			
 
				 USERNAME = {{ cookiecutter.author_name }}
			
 
				 
			
 
				+ifeq (,$(shell which conda))
			
 
				+HAS_CONDA=False
			
 
				+else
			
 
				+HAS_CONDA=True
			
 
				+endif
			
 
				+
			
 
				 #################################################################################
			
 
				 # COMMANDS                                                                      #
			
 
				 #################################################################################
			
@@ -33,49 +39,83 @@ lint:
 
				 
			
 
				 ## Make Dataset
			
 
				 data: requirements
			
 
				-	$(PYTHON_INTERPRETER) src/data/make_dataset.py data/raw data/processed
			
 
				+	$(PYTHON_INTERPRETER) $(PROJECT_NAME)/utils/make_dataset.py
			
 
				 
			
 
				 ## Upload Data to S3
			
 
				 sync_data_to_s3:
			
 
				 ifeq (default,$(S3PROFILE))
			
 
				-	aws s3 sync data/ s3://$(S3BUCKET)/data/
			
 
				+	aws s3 sync data/ s3://$(S3BUCKET)/data/ 
			
 
				+	aws s3 sync models/ s3://$(S3BUCKET)/models/
			
 
				 else
			
 
				 	aws s3 sync data/ s3://$(S3BUCKET)/data/ --profile $(S3PROFILE)
			
 
				+	aws s3 sync models/ s3://$(S3BUCKET)/models/ --profile $(S3PROFILE)
			
 
				+
			
 
				 endif
			
 
				 
			
 
				 ## Download Data from S3
			
 
				 sync_data_from_s3:
			
 
				 ifeq (default,$(S3PROFILE))
			
 
				 	aws s3 sync s3://$(S3BUCKET)/data/ data/
			
 
				+	aws s3 sync s3://$(S3BUCKET)/models/ models/
			
 
				 else
			
 
				 	aws s3 sync s3://$(S3BUCKET)/data/ data/ --profile $(S3PROFILE)
			
 
				+	aws s3 sync s3://$(S3BUCKET)/models/ models/ --profile $(S3PROFILE)
			
 
				 endif
			
 
				 
			
 
				 ## Upload to minio
			
 
				 sync_data_to_minio:
			
 
				 ifeq (default,$(MINIOPROFILE))
			
 
				-	rclone --size-only sync data/ $(MINIO)/data/ --stats-one-line -P --stats 2s
			
 
				+	rclone sync data/ $(MINIO)/$(PROJECT_NAME)/data/ --stats-one-line -P --stats 2s
			
 
				+	rclone sync models/ $(MINIO)/$(PROJECT_NAME)/models/ --stats-one-line -P --stats 2s
			
 
				 
			
 
				 ## Download from minio
			
 
				-sync_data_to_minio:
			
 
				+sync_data_from_minio:
			
 
				 ifeq (default,$(MINIOPROFILE))
			
 
				-	rclone --size-only sync $(MINIO)/data/ data/ --stats-one-line -P --stats 2s
			
 
				+	rclone sync $(MINIO)/$(PROJECT_NAME)/data/ data/ --stats-one-line -P --stats 2s
			
 
				+	rclone sync $(MINIO)/$(PROJECT_NAME)/models/ models/ --stats-one-line -P --stats 2s
			
 
				 
			
 
				 
			
 
				 ## Initial set up of python interpreter environment, version control and pre-commit hooks
			
 
				 initial_setup:
			
 
				-	@bash -c "virtualenv -p $(PYTHON_INTERPRETER) $(HOME)/envs/$(PROJECT_NAME)"
			
 
				+ifeq (True,$(HAS_CONDA))
			
 
				+		@echo ">>> Detected conda, creating conda environment."
			
 
				+ifeq (3,$(findstring 3,$(PYTHON_INTERPRETER)))
			
 
				+	conda create --name $(PROJECT_NAME) python=3
			
 
				+endif
			
 
				 	@bash -c "git init"
			
 
				-	@bash -c "source $(HOME)/envs/$(PROJECT_NAME)/bin/activate; pip install dvc[all]; dvc init"
			
 
				+	@bash -c "source activate $(PROJECT_NAME); pip install dvc[all]; dvc init"
			
 
				 	@bash -c "echo '[core]' >> $(PROJECT_DIR)/.dvc/config"
			
 
				 	@bash -c "echo 'analytics = false' >> $(PROJECT_DIR)/.dvc/config"
			
 
				-	@bash -c "source $(HOME)/envs/$(PROJECT_NAME)/bin/activate; pip install pre-commit; pre-commit install; pre-commit autoupdate"
			
 
				-	@bash -c "source $(HOME)/envs/$(PROJECT_NAME)/bin/activate; pip install -r $(PROJECT_DIR)/requirements.txt"
			
 
				+	@bash -c "source activate $(PROJECT_NAME); pip install pre-commit; pre-commit install; pre-commit autoupdate"
			
 
				+	@bash -c "source activate $(PROJECT_NAME);; pip install -r $(PROJECT_DIR)/requirements.txt"
			
 
				 	@bash -c "echo 'CWD=$(PROJECT_DIR)' >> $(PROJECT_DIR)/.env"
			
 
				+	@bash -c "echo '.env' >> $(PROJECT_DIR)/.gitignore"
			
 
				 	@bash -c "git add . ; git commit -am 'INITIAL COMMIT'"
			
 
				 	@bash -c "git remote add origin https://github.com/$(USERNAME)/$(PROJECT_NAME).git"
			
 
				-# 	@bash -c "git remote add origin $(GIT)"	
			
 
				-	@echo ">>> New virtualenv created. Activate with:\nsource $(HOME)/envs/$(PROJECT_NAME)/bin/activate"
			
 
				+	# @bash -c "git remote add origin $(GIT)"
			
 
				+	@bash -c "git push -u origin" && echo "upload repo to the remote" || echo "repository $(PROJECT_NAME) doesn't exist yet, please create it."
			
 
				+	@echo ">>> New virtualenv created. Activate with:\nsource activate $(PROJECT_NAME)"
			
 
				+	@echo ">>> git and dvc are ready to go"
			
 
				+
			
 
				+else
			
 
				+	$(PYTHON_INTERPRETER) -m pip install -q virtualenv virtualenvwrapper
			
 
				+	@echo ">>> Installing virtualenvwrapper if not already installed.\nMake sure the following lines are in shell startup file\n\
			
 
				+				export WORKON_HOME=$$HOME/.virtualenvs\nexport PROJECT_HOME=$$HOME/Devel\nsource /usr/local/bin/virtualenvwrapper.sh\n"
			
 
				+	@bash -c "source `which virtualenvwrapper.sh`; mkvirtualenv $(PROJECT_NAME) --python=$(PYTHON_INTERPRETER)"
			
 
				+	@echo ">>> Initializing git repo"
			
 
				+	@bash -c "git init"
			
 
				+	@bash -c "workon $(PROJECT_NAME); pip install dvc[all]; dvc init"
			
 
				+	@bash -c "echo '[core]' >> $(PROJECT_DIR)/.dvc/config"
			
 
				+	@bash -c "echo 'analytics = false' >> $(PROJECT_DIR)/.dvc/config"
			
 
				+	@bash -c "workon $(PROJECT_NAME); pip install pre-commit; pre-commit install; pre-commit autoupdate"
			
 
				+	@bash -c "workon $(PROJECT_NAME);; pip install -r $(PROJECT_DIR)/requirements.txt"
			
 
				+	@bash -c "echo 'CWD=$(PROJECT_DIR)' >> $(PROJECT_DIR)/.env"
			
 
				+	@bash -c "echo '.env' >> $(PROJECT_DIR)/.gitignore"
			
 
				+	@bash -c "git add . ; git commit -am 'INITIAL COMMIT'"
			
 
				+	@bash -c "git remote add origin https://github.com/$(USERNAME)/$(PROJECT_NAME).git"
			
 
				+	# 	@bash -c "git remote add origin $(GIT)"	
			
 
				+	@bash -c "git push -u origin" && echo "upload repo to the remote" || echo "repository $(PROJECT_NAME) doesn't exist yet, please create it."
			
 
				+	@echo ">>> New virtualenv created. Activate with:\nworkon $(PROJECT_NAME)"
			
 
				 	@echo ">>> git and dvc are ready to go"
			
 
				 
			
 
				 ## Following set up of python interpreter environment, version control and pre-commit hooks
			
@@ -88,10 +128,16 @@ following_setup:
 
				 	@echo ">>> git and dvc are ready to go"
			
 
				 
			
 
				 requirements:
			
 
				-	@bash -c "source $(HOME)/envs/$(PROJECT_NAME)/bin/activate; pip install -r $(PROJECT_DIR)/requirements.txt"
			
 
				+ifeq (True,$(HAS_CONDA))
			
 
				+	@bash -c "workon $(PROJECT_NAME)/bin/activate; pip install -r $(PROJECT_DIR)/requirements.txt"
			
 
				+else
			
 
				+	@bash -c "source activate $(PROJECT_NAME)/bin/activate; pip install -r $(PROJECT_DIR)/requirements.txt"
			
 
				 
			
 
				 environment:
			
 
				-	@echo ">>> Activate with:\nsource $(HOME)/envs/$(PROJECT_NAME)/bin/activate"
			
 
				+ifeq (True,$(HAS_CONDA))
			
 
				+	@echo ">>> Activate with:\nsource activate $(PROJECT_NAME)
			
 
				+else 
			
 
				+	@echo ">>> Activate with:\nworkon $(PROJECT_NAME)
			
 
				 
			
 
				 commit:
			
 
				 	$(eval EXP_FILE := "$(PROJECT_DIR)/$(PROJECT_NAME)/study_name.txt")
			
@@ -100,7 +146,10 @@ commit:
 
				 
			
 
				 push:
			
 
				 	@bash -c "git push origin --follow-tags"
			
 
				-	@bash -c "source $(HOME)/envs/$(PROJECT_NAME)/bin/activate; dvc push"
			
 
				+ifeq (True,$(HAS_CONDA))
			
 
				+	@bash -c "source activate $(PROJECT_NAME); dvc push"
			
 
				+else
			
 
				+	@bash -c "workon $(PROJECT_NAME); dvc push"
			
 
				 
			
 
				 #################################################################################
			
 
				 # PROJECT RULES                                                                 #
			
--- a/cookiecutter.repo_name
+++ b/cookiecutter.repo_name
@@ -6,49 +6,52 @@
 
				 Project Organization
			
 
				 
			
 
				 ```
			
 
				-├── LICENSE
			
 
				-├── Makefile           <- Makefile with commands like `make data` or `make train`
			
 
				-├── README.md          <- The top-level README for developers using this project.
			
 
				-├── data
			
 
				-│   ├── external       <- Data from third party sources.
			
 
				-│   ├── interim        <- Intermediate data that has been transformed.
			
 
				-│   ├── processed      <- The final, canonical data sets for modeling.
			
 
				-│   ├── features       <- Features may be stored here
			
 
				-│   ├── inference      <- Inference stages may be stored here
			
 
				-│   └── raw            <- The original, immutable data dump.
			
 
				-│
			
 
				-├── docs               <- A default Sphinx project; see sphinx-doc.org for details
			
 
				-│
			
 
				-├── models             <- Trained and serialized models, model predictions, or model summaries
			
 
				-│
			
 
				-├── notebooks          <- Jupyter notebooks. Naming convention is a number (for ordering),
			
 
				-│                         the creator's initials, and a short `-` delimited description, e.g.
			
 
				-│                         `1.0-jqp-initial-data-exploration`.
			
 
				-│
			
 
				-├── references         <- Data dictionaries, manuals, and all other explanatory materials.
			
 
				-│
			
 
				-├── reports            <- Generated analysis as HTML, PDF, LaTeX, etc.
			
 
				-│   └── figures        <- Generated graphics and figures to be used in reporting
			
 
				-│
			
 
				-├── .pre-commit-config.yaml <- Stores pre-commit settings
			
 
				-│
			
 
				-├── requirements.txt   <- The requirements file for reproducing the analysis environment, e.g.
			
 
				-│                         generated with `pip freeze > requirements.txt`
			
 
				-│
			
 
				-├── __init__.py
			
 
				-│
			
 
				-└── {{cookiecutter.repo_name}}   <- Source code for use in this project.
			
 
				-    ├── __init__.py    <- Makes {{cookiecutter.repo_name}} a Python module
			
 
				-    │    
			
 
				-    ├── settings.py <- illustrates how to use .env file
			
 
				-    │
			
 
				-    ├── data           <- Scripts to download or generate data
			
 
				-    │   └── make_dataset.py
			
 
				-    │
			
 
				-    ├── features       <- Scripts to turn raw data into features for modeling
			
 
				-    │   └── featurize.py
			
 
				-    │
			
 
				-    └── models         <- Scripts to train models and then use trained models to make
			
 
				-        │                 predictions
			
 
				-        └── train.py
			
 
				-```
			
 
				+
			
 
				+│   .pre-commit-config.yaml <- Stores pre-commit settings
			
 
				+│   LICENSE
			
 
				+│   Makefile                <- Makefile with commands like `make data` or `make train`
			
 
				+│   README.md               <- The top-level README for developers using this project.
			
 
				+│   requirements.txt        <- The requirements file for reproducing the analysis environment, e.g.
			
 
				+│   setup.py                
			
 
				+│   test_environment.py
			
 
				+│   tox.ini                <- tox file with settings for running tox; see tox.testrun.org
			
 
				+│   __init__.py
			
 
				+├───app                    <- Folder for wrapper, api, applications and other buisness stuff
			
 
				+│       api.py
			
 
				+│       app.py
			
 
				+│       main.py
			
 
				+│       __init__.py
			
 
				+├───data                    <- Folder to store data
			
 
				+│   │   README.md           <- Desription about data 
			
 
				+│   ├───processed           <- The final, canonical data sets for modeling.
			
 
				+│   └───raw                 <- The original, immutable data dump.
			
 
				+│       ├───external        <- Data from third party sources.
			
 
				+│       └───interim         <- Intermediate data that has been transformed.
			
 
				+├── docs                    <- A default Sphinx project; see sphinx-doc.org for details
			
 
				+├───logs                    <- To store logs from for example tensorboard
			
 
				+├── models                  <- Trained and serialized models, model predictions, or model summaries
			
 
				+├── notebooks               <- Jupyter notebooks. Naming convention is a number (for ordering),
			
 
				+│                           the creator's initials, and a short `-` delimited description, e.g.
			
 
				+│                           `1.0-jqp-initial-data-exploration`.
			
 
				+├── references              <- Data dictionaries, manuals, and all other explanatory materials.
			
 
				+├── reports                 <- Generated analysis as HTML, PDF, LaTeX, etc.
			
 
				+│   ├───figures             <- Generated analysis as HTML, PDF, LaTeX, etc.
			
 
				+│   └───images              <- Images for reports
			
 
				+└───{{ cookiecutter.repo_name }}<- Source code for use in this project.
			
 
				+    │   settings.py         <- illustrates how to use .env file
			
 
				+    │   __init__.py         <- Makes {{cookiecutter.repo_name}} a Python module
			
 
				+    ├───nets                <- Code of your models and nets
			
 
				+    │       nets.py         <- Nets
			
 
				+    │       predict.py      <- Evaluation sctript
			
 
				+    │       train.py        <- Train script
			
 
				+    │       __init__.py
			
 
				+    ├───pipelines           <- To store dvc pipelines
			
 
				+    ├───utils               <- Different utils functions
			
 
				+    │       featurize.py    <- To create features of dataset
			
 
				+    │       make_dataset.py <- To create final dataset
			
 
				+    │       utils.py        <- Different helpers
			
 
				+    │       __init__.py
			
 
				+    └───visualization       <- To visualize stuff
			
 
				+            visualization.py
			
 
				+            __init__.py
			
 
				+```
			
--- a/cookiecutter.repo_name
+++ b/cookiecutter.repo_name
@@ -0,0 +1,3 @@
 
				+# Data 
			
 
				+
			
 
				+Here you can describe yoour data and it location in the stucture
			
--- a/}}/data/raw/external/.gitkeep
+++ b/}}/data/raw/external/.gitkeep
--- a/}}/data/raw/interim/.gitkeep
+++ b/}}/data/raw/interim/.gitkeep
--- a/cookiecutter.repo_name
+++ b/cookiecutter.repo_name
--- a/cookiecutter.repo_name
+++ b/cookiecutter.repo_name
--- a/}}/nets/models/.gitkeep
+++ b/}}/nets/models/.gitkeep
--- a/}}/nets/models/__init__.py
+++ b/}}/nets/models/__init__.py
--- a/}}/nets/models/models.py
+++ b/}}/nets/models/models.py
--- a/}}/nets/models/predict.py
+++ b/}}/nets/models/predict.py
--- a/}}/nets/models/train.py
+++ b/}}/nets/models/train.py
--- a/}}/nets/pipelines/.gitkeep
+++ b/}}/nets/pipelines/.gitkeep
--- a/cookiecutter.repo_name
+++ b/cookiecutter.repo_name
@@ -0,0 +1,10 @@
 
				+import os
			
 
				+import dotenv
			
 
				+import signal
			
 
				+
			
 
				+signal.signal(signal.SIGINT, signal.default_int_handler)
			
 
				+
			
 
				+dotenv.load_dotenv(dotenv.find_dotenv())
			
 
				+
			
 
				+cwd = os.getenv("CWD")
			
 
				+RAND_STATE = 42
			
--- a/}}/nets/utils/__init__.py
+++ b/}}/nets/utils/__init__.py
--- a/}}/nets/utils/featurize.py
+++ b/}}/nets/utils/featurize.py
--- a/}}/nets/utils/make_dataset.py
+++ b/}}/nets/utils/make_dataset.py
--- a/cookiecutter.repo_name
+++ b/cookiecutter.repo_name
--- a/}}/nets/visualization/.gitkeep
+++ b/}}/nets/visualization/.gitkeep
--- a/}}/nets/visualization/__init__.py
+++ b/}}/nets/visualization/__init__.py
--- a/}}/nets/visualization/visualization.py
+++ b/}}/nets/visualization/visualization.py