ソースを参照

final preparations

metya 6 年 前
コミット
eadb954af7
25 ファイル変更155 行追加65 行削除
  1. 1 1
      README.md
  2. 3 3
      cookiecutter.json
  3. 12 0
      {{ cookiecutter.repo_name }}/.env
  4. 14 1
      {{ cookiecutter.repo_name }}/.gitignore
  5. 63 14
      {{ cookiecutter.repo_name }}/Makefile
  6. 49 46
      {{ cookiecutter.repo_name }}/README.md
  7. 3 0
      {{ cookiecutter.repo_name }}/data/README.md
  8. 0 0
      {{ cookiecutter.repo_name }}/data/raw/external/.gitkeep
  9. 0 0
      {{ cookiecutter.repo_name }}/data/raw/interim/.gitkeep
  10. 0 0
      {{ cookiecutter.repo_name }}/nets/settings.py
  11. 0 0
      {{ cookiecutter.repo_name }}/{{ cookiecutter.repo_name }}/__init__.py
  12. 0 0
      {{ cookiecutter.repo_name }}/{{ cookiecutter.repo_name }}/nets/.gitkeep
  13. 0 0
      {{ cookiecutter.repo_name }}/{{ cookiecutter.repo_name }}/nets/__init__.py
  14. 0 0
      {{ cookiecutter.repo_name }}/{{ cookiecutter.repo_name }}/nets/nets.py
  15. 0 0
      {{ cookiecutter.repo_name }}/{{ cookiecutter.repo_name }}/nets/predict.py
  16. 0 0
      {{ cookiecutter.repo_name }}/{{ cookiecutter.repo_name }}/nets/train.py
  17. 0 0
      {{ cookiecutter.repo_name }}/{{ cookiecutter.repo_name }}/pipelines/.gitkeep
  18. 10 0
      {{ cookiecutter.repo_name }}/{{ cookiecutter.repo_name }}/settings.py
  19. 0 0
      {{ cookiecutter.repo_name }}/{{ cookiecutter.repo_name }}/utils/__init__.py
  20. 0 0
      {{ cookiecutter.repo_name }}/{{ cookiecutter.repo_name }}/utils/featurize.py
  21. 0 0
      {{ cookiecutter.repo_name }}/{{ cookiecutter.repo_name }}/utils/make_dataset.py
  22. 0 0
      {{ cookiecutter.repo_name }}/{{ cookiecutter.repo_name }}/utils/utils.py
  23. 0 0
      {{ cookiecutter.repo_name }}/{{ cookiecutter.repo_name }}/visualization/.gitkeep
  24. 0 0
      {{ cookiecutter.repo_name }}/{{ cookiecutter.repo_name }}/visualization/__init__.py
  25. 0 0
      {{ cookiecutter.repo_name }}/{{ cookiecutter.repo_name }}/visualization/visualization.py

+ 1 - 1
README.md

@@ -1,4 +1,4 @@
-# Cookiekaker Cookiecutter Data Science Template inspired by @vasinkd and @drivendata
+# Cookiekaker Deep Learning Template inspired by @vasinkd and @drivendata
 
 _A not quite logical, nad unreasonably standardized, but flexible project structure for doing and sharing data science work at certain motivation and place._
 

+ 3 - 3
cookiecutter.json

@@ -6,8 +6,8 @@
     "open_source_license": ["No license file", "MIT", "BSD-3-Clause"],
     "s3_bucket": "[OPTIONAL] your-bucket-for-syncing-data (do not include 's3://')",
     "aws_profile": "default",
-    "minio": "[OPTIONAL] your-minio-for-syncing data",
+    "minio": "[OPTIONAL] your-minio-for-syncing-data (your shoud have installed rclone to sycn (https://rclone.org/install/))",
     "minio_profile": "default",
-    "python_interpreter": ["python"],
-    // "git_remote_repo": "Your remote repo for this project"
+    // "git_remote_repo": "Your remote repo for this project",
+    "python_interpreter": ["python"]
 }

+ 12 - 0
{{ cookiecutter.repo_name }}/.env

@@ -0,0 +1,12 @@
+# Environment variables go here, can be read by `python-dotenv` package:
+#
+#   `src/script.py`
+#   ----------------------------------------------------------------
+#    import dotenv
+#
+#    project_dir = os.path.join(os.path.dirname(__file__), os.pardir)
+#    dotenv_path = os.path.join(project_dir, '.env')
+#    dotenv.load_dotenv(dotenv_path)
+#   ----------------------------------------------------------------
+#
+# DO NOT ADD THIS FILE TO VERSION CONTROL!

+ 14 - 1
{{ cookiecutter.repo_name }}/.gitignore

@@ -57,7 +57,7 @@ docs/_build/
 target/
 
 # DotEnv configuration
-.env
+# .env
 
 # Database
 *.db
@@ -84,3 +84,16 @@ target/
 
 # Mypy cache
 .mypy_cache/
+
+# Data folders
+/data/*
+!/data/raw/
+!/data/README.md
+models/*
+logs/*
+
+
+
+
+
+

+ 63 - 14
{{ cookiecutter.repo_name }}/Makefile

@@ -12,6 +12,12 @@ PYTHON_INTERPRETER = {{ cookiecutter.python_interpreter }}
 # GIT = {{ cookiecutter.git_remote_repo }}
 USERNAME = {{ cookiecutter.author_name }}
 
+ifeq (,$(shell which conda))
+HAS_CONDA=False
+else
+HAS_CONDA=True
+endif
+
 #################################################################################
 # COMMANDS                                                                      #
 #################################################################################
@@ -33,49 +39,83 @@ lint:
 
 ## Make Dataset
 data: requirements
-	$(PYTHON_INTERPRETER) src/data/make_dataset.py data/raw data/processed
+	$(PYTHON_INTERPRETER) $(PROJECT_NAME)/utils/make_dataset.py
 
 ## Upload Data to S3
 sync_data_to_s3:
 ifeq (default,$(S3PROFILE))
-	aws s3 sync data/ s3://$(S3BUCKET)/data/
+	aws s3 sync data/ s3://$(S3BUCKET)/data/ 
+	aws s3 sync models/ s3://$(S3BUCKET)/models/
 else
 	aws s3 sync data/ s3://$(S3BUCKET)/data/ --profile $(S3PROFILE)
+	aws s3 sync models/ s3://$(S3BUCKET)/models/ --profile $(S3PROFILE)
+
 endif
 
 ## Download Data from S3
 sync_data_from_s3:
 ifeq (default,$(S3PROFILE))
 	aws s3 sync s3://$(S3BUCKET)/data/ data/
+	aws s3 sync s3://$(S3BUCKET)/models/ models/
 else
 	aws s3 sync s3://$(S3BUCKET)/data/ data/ --profile $(S3PROFILE)
+	aws s3 sync s3://$(S3BUCKET)/models/ models/ --profile $(S3PROFILE)
 endif
 
 ## Upload to minio
 sync_data_to_minio:
 ifeq (default,$(MINIOPROFILE))
-	rclone --size-only sync data/ $(MINIO)/data/ --stats-one-line -P --stats 2s
+	rclone sync data/ $(MINIO)/$(PROJECT_NAME)/data/ --stats-one-line -P --stats 2s
+	rclone sync models/ $(MINIO)/$(PROJECT_NAME)/models/ --stats-one-line -P --stats 2s
 
 ## Download from minio
-sync_data_to_minio:
+sync_data_from_minio:
 ifeq (default,$(MINIOPROFILE))
-	rclone --size-only sync $(MINIO)/data/ data/ --stats-one-line -P --stats 2s
+	rclone sync $(MINIO)/$(PROJECT_NAME)/data/ data/ --stats-one-line -P --stats 2s
+	rclone sync $(MINIO)/$(PROJECT_NAME)/models/ models/ --stats-one-line -P --stats 2s
 
 
 ## Initial set up of python interpreter environment, version control and pre-commit hooks
 initial_setup:
-	@bash -c "virtualenv -p $(PYTHON_INTERPRETER) $(HOME)/envs/$(PROJECT_NAME)"
+ifeq (True,$(HAS_CONDA))
+		@echo ">>> Detected conda, creating conda environment."
+ifeq (3,$(findstring 3,$(PYTHON_INTERPRETER)))
+	conda create --name $(PROJECT_NAME) python=3
+endif
 	@bash -c "git init"
-	@bash -c "source $(HOME)/envs/$(PROJECT_NAME)/bin/activate; pip install dvc[all]; dvc init"
+	@bash -c "source activate $(PROJECT_NAME); pip install dvc[all]; dvc init"
 	@bash -c "echo '[core]' >> $(PROJECT_DIR)/.dvc/config"
 	@bash -c "echo 'analytics = false' >> $(PROJECT_DIR)/.dvc/config"
-	@bash -c "source $(HOME)/envs/$(PROJECT_NAME)/bin/activate; pip install pre-commit; pre-commit install; pre-commit autoupdate"
-	@bash -c "source $(HOME)/envs/$(PROJECT_NAME)/bin/activate; pip install -r $(PROJECT_DIR)/requirements.txt"
+	@bash -c "source activate $(PROJECT_NAME); pip install pre-commit; pre-commit install; pre-commit autoupdate"
+	@bash -c "source activate $(PROJECT_NAME);; pip install -r $(PROJECT_DIR)/requirements.txt"
 	@bash -c "echo 'CWD=$(PROJECT_DIR)' >> $(PROJECT_DIR)/.env"
+	@bash -c "echo '.env' >> $(PROJECT_DIR)/.gitignore"
 	@bash -c "git add . ; git commit -am 'INITIAL COMMIT'"
 	@bash -c "git remote add origin https://github.com/$(USERNAME)/$(PROJECT_NAME).git"
-# 	@bash -c "git remote add origin $(GIT)"	
-	@echo ">>> New virtualenv created. Activate with:\nsource $(HOME)/envs/$(PROJECT_NAME)/bin/activate"
+	# @bash -c "git remote add origin $(GIT)"
+	@bash -c "git push -u origin" && echo "upload repo to the remote" || echo "repository $(PROJECT_NAME) doesn't exist yet, please create it."
+	@echo ">>> New virtualenv created. Activate with:\nsource activate $(PROJECT_NAME)"
+	@echo ">>> git and dvc are ready to go"
+
+else
+	$(PYTHON_INTERPRETER) -m pip install -q virtualenv virtualenvwrapper
+	@echo ">>> Installing virtualenvwrapper if not already installed.\nMake sure the following lines are in shell startup file\n\
+				export WORKON_HOME=$$HOME/.virtualenvs\nexport PROJECT_HOME=$$HOME/Devel\nsource /usr/local/bin/virtualenvwrapper.sh\n"
+	@bash -c "source `which virtualenvwrapper.sh`; mkvirtualenv $(PROJECT_NAME) --python=$(PYTHON_INTERPRETER)"
+	@echo ">>> Initializing git repo"
+	@bash -c "git init"
+	@bash -c "workon $(PROJECT_NAME); pip install dvc[all]; dvc init"
+	@bash -c "echo '[core]' >> $(PROJECT_DIR)/.dvc/config"
+	@bash -c "echo 'analytics = false' >> $(PROJECT_DIR)/.dvc/config"
+	@bash -c "workon $(PROJECT_NAME); pip install pre-commit; pre-commit install; pre-commit autoupdate"
+	@bash -c "workon $(PROJECT_NAME);; pip install -r $(PROJECT_DIR)/requirements.txt"
+	@bash -c "echo 'CWD=$(PROJECT_DIR)' >> $(PROJECT_DIR)/.env"
+	@bash -c "echo '.env' >> $(PROJECT_DIR)/.gitignore"
+	@bash -c "git add . ; git commit -am 'INITIAL COMMIT'"
+	@bash -c "git remote add origin https://github.com/$(USERNAME)/$(PROJECT_NAME).git"
+	# 	@bash -c "git remote add origin $(GIT)"	
+	@bash -c "git push -u origin" && echo "upload repo to the remote" || echo "repository $(PROJECT_NAME) doesn't exist yet, please create it."
+	@echo ">>> New virtualenv created. Activate with:\nworkon $(PROJECT_NAME)"
 	@echo ">>> git and dvc are ready to go"
 
 ## Following set up of python interpreter environment, version control and pre-commit hooks
@@ -88,10 +128,16 @@ following_setup:
 	@echo ">>> git and dvc are ready to go"
 
 requirements:
-	@bash -c "source $(HOME)/envs/$(PROJECT_NAME)/bin/activate; pip install -r $(PROJECT_DIR)/requirements.txt"
+ifeq (True,$(HAS_CONDA))
+	@bash -c "workon $(PROJECT_NAME)/bin/activate; pip install -r $(PROJECT_DIR)/requirements.txt"
+else
+	@bash -c "source activate $(PROJECT_NAME)/bin/activate; pip install -r $(PROJECT_DIR)/requirements.txt"
 
 environment:
-	@echo ">>> Activate with:\nsource $(HOME)/envs/$(PROJECT_NAME)/bin/activate"
+ifeq (True,$(HAS_CONDA))
+	@echo ">>> Activate with:\nsource activate $(PROJECT_NAME)
+else 
+	@echo ">>> Activate with:\nworkon $(PROJECT_NAME)
 
 commit:
 	$(eval EXP_FILE := "$(PROJECT_DIR)/$(PROJECT_NAME)/study_name.txt")
@@ -100,7 +146,10 @@ commit:
 
 push:
 	@bash -c "git push origin --follow-tags"
-	@bash -c "source $(HOME)/envs/$(PROJECT_NAME)/bin/activate; dvc push"
+ifeq (True,$(HAS_CONDA))
+	@bash -c "source activate $(PROJECT_NAME); dvc push"
+else
+	@bash -c "workon $(PROJECT_NAME); dvc push"
 
 #################################################################################
 # PROJECT RULES                                                                 #

+ 49 - 46
{{ cookiecutter.repo_name }}/README.md

@@ -6,49 +6,52 @@
 Project Organization
 
 ```
-├── LICENSE
-├── Makefile           <- Makefile with commands like `make data` or `make train`
-├── README.md          <- The top-level README for developers using this project.
-├── data
-│   ├── external       <- Data from third party sources.
-│   ├── interim        <- Intermediate data that has been transformed.
-│   ├── processed      <- The final, canonical data sets for modeling.
-│   ├── features       <- Features may be stored here
-│   ├── inference      <- Inference stages may be stored here
-│   └── raw            <- The original, immutable data dump.
-│
-├── docs               <- A default Sphinx project; see sphinx-doc.org for details
-│
-├── models             <- Trained and serialized models, model predictions, or model summaries
-│
-├── notebooks          <- Jupyter notebooks. Naming convention is a number (for ordering),
-│                         the creator's initials, and a short `-` delimited description, e.g.
-│                         `1.0-jqp-initial-data-exploration`.
-│
-├── references         <- Data dictionaries, manuals, and all other explanatory materials.
-│
-├── reports            <- Generated analysis as HTML, PDF, LaTeX, etc.
-│   └── figures        <- Generated graphics and figures to be used in reporting
-│
-├── .pre-commit-config.yaml <- Stores pre-commit settings
-│
-├── requirements.txt   <- The requirements file for reproducing the analysis environment, e.g.
-│                         generated with `pip freeze > requirements.txt`
-│
-├── __init__.py
-│
-└── {{cookiecutter.repo_name}}   <- Source code for use in this project.
-    ├── __init__.py    <- Makes {{cookiecutter.repo_name}} a Python module
-    │    
-    ├── settings.py <- illustrates how to use .env file
-    │
-    ├── data           <- Scripts to download or generate data
-    │   └── make_dataset.py
-    │
-    ├── features       <- Scripts to turn raw data into features for modeling
-    │   └── featurize.py
-    │
-    └── models         <- Scripts to train models and then use trained models to make
-        │                 predictions
-        └── train.py
-```
+
+│   .pre-commit-config.yaml <- Stores pre-commit settings
+│   LICENSE
+│   Makefile                <- Makefile with commands like `make data` or `make train`
+│   README.md               <- The top-level README for developers using this project.
+│   requirements.txt        <- The requirements file for reproducing the analysis environment, e.g.
+│   setup.py                
+│   test_environment.py
+│   tox.ini                <- tox file with settings for running tox; see tox.testrun.org
+│   __init__.py
+├───app                    <- Folder for wrapper, api, applications and other buisness stuff
+│       api.py
+│       app.py
+│       main.py
+│       __init__.py
+├───data                    <- Folder to store data
+│   │   README.md           <- Desription about data 
+│   ├───processed           <- The final, canonical data sets for modeling.
+│   └───raw                 <- The original, immutable data dump.
+│       ├───external        <- Data from third party sources.
+│       └───interim         <- Intermediate data that has been transformed.
+├── docs                    <- A default Sphinx project; see sphinx-doc.org for details
+├───logs                    <- To store logs from for example tensorboard
+├── models                  <- Trained and serialized models, model predictions, or model summaries
+├── notebooks               <- Jupyter notebooks. Naming convention is a number (for ordering),
+│                           the creator's initials, and a short `-` delimited description, e.g.
+│                           `1.0-jqp-initial-data-exploration`.
+├── references              <- Data dictionaries, manuals, and all other explanatory materials.
+├── reports                 <- Generated analysis as HTML, PDF, LaTeX, etc.
+│   ├───figures             <- Generated analysis as HTML, PDF, LaTeX, etc.
+│   └───images              <- Images for reports
+└───{{ cookiecutter.repo_name }}<- Source code for use in this project.
+    │   settings.py         <- illustrates how to use .env file
+    │   __init__.py         <- Makes {{cookiecutter.repo_name}} a Python module
+    ├───nets                <- Code of your models and nets
+    │       nets.py         <- Nets
+    │       predict.py      <- Evaluation sctript
+    │       train.py        <- Train script
+    │       __init__.py
+    ├───pipelines           <- To store dvc pipelines
+    ├───utils               <- Different utils functions
+    │       featurize.py    <- To create features of dataset
+    │       make_dataset.py <- To create final dataset
+    │       utils.py        <- Different helpers
+    │       __init__.py
+    └───visualization       <- To visualize stuff
+            visualization.py
+            __init__.py
+```

+ 3 - 0
{{ cookiecutter.repo_name }}/data/README.md

@@ -0,0 +1,3 @@
+# Data 
+
+Here you can describe yoour data and it location in the stucture

+ 0 - 0
{{ cookiecutter.repo_name }}/data/external/.gitkeep → {{ cookiecutter.repo_name }}/data/raw/external/.gitkeep


+ 0 - 0
{{ cookiecutter.repo_name }}/data/interim/.gitkeep → {{ cookiecutter.repo_name }}/data/raw/interim/.gitkeep


+ 0 - 0
{{ cookiecutter.repo_name }}/nets/settings.py


+ 0 - 0
{{ cookiecutter.repo_name }}/nets/__init__.py → {{ cookiecutter.repo_name }}/{{ cookiecutter.repo_name }}/__init__.py


+ 0 - 0
{{ cookiecutter.repo_name }}/nets/models/.gitkeep → {{ cookiecutter.repo_name }}/{{ cookiecutter.repo_name }}/nets/.gitkeep


+ 0 - 0
{{ cookiecutter.repo_name }}/nets/models/__init__.py → {{ cookiecutter.repo_name }}/{{ cookiecutter.repo_name }}/nets/__init__.py


+ 0 - 0
{{ cookiecutter.repo_name }}/nets/models/models.py → {{ cookiecutter.repo_name }}/{{ cookiecutter.repo_name }}/nets/nets.py


+ 0 - 0
{{ cookiecutter.repo_name }}/nets/models/predict.py → {{ cookiecutter.repo_name }}/{{ cookiecutter.repo_name }}/nets/predict.py


+ 0 - 0
{{ cookiecutter.repo_name }}/nets/models/train.py → {{ cookiecutter.repo_name }}/{{ cookiecutter.repo_name }}/nets/train.py


+ 0 - 0
{{ cookiecutter.repo_name }}/nets/pipelines/.gitkeep → {{ cookiecutter.repo_name }}/{{ cookiecutter.repo_name }}/pipelines/.gitkeep


+ 10 - 0
{{ cookiecutter.repo_name }}/{{ cookiecutter.repo_name }}/settings.py

@@ -0,0 +1,10 @@
+import os
+import dotenv
+import signal
+
+signal.signal(signal.SIGINT, signal.default_int_handler)
+
+dotenv.load_dotenv(dotenv.find_dotenv())
+
+cwd = os.getenv("CWD")
+RAND_STATE = 42

+ 0 - 0
{{ cookiecutter.repo_name }}/nets/utils/__init__.py → {{ cookiecutter.repo_name }}/{{ cookiecutter.repo_name }}/utils/__init__.py


+ 0 - 0
{{ cookiecutter.repo_name }}/nets/utils/featurize.py → {{ cookiecutter.repo_name }}/{{ cookiecutter.repo_name }}/utils/featurize.py


+ 0 - 0
{{ cookiecutter.repo_name }}/nets/utils/make_dataset.py → {{ cookiecutter.repo_name }}/{{ cookiecutter.repo_name }}/utils/make_dataset.py


+ 0 - 0
{{ cookiecutter.repo_name }}/nets/utils/utils.py → {{ cookiecutter.repo_name }}/{{ cookiecutter.repo_name }}/utils/utils.py


+ 0 - 0
{{ cookiecutter.repo_name }}/nets/visualization/.gitkeep → {{ cookiecutter.repo_name }}/{{ cookiecutter.repo_name }}/visualization/.gitkeep


+ 0 - 0
{{ cookiecutter.repo_name }}/nets/visualization/__init__.py → {{ cookiecutter.repo_name }}/{{ cookiecutter.repo_name }}/visualization/__init__.py


+ 0 - 0
{{ cookiecutter.repo_name }}/nets/visualization/visualization.py → {{ cookiecutter.repo_name }}/{{ cookiecutter.repo_name }}/visualization/visualization.py