diff --git a/.coveragerc_py37 b/.coveragerc_py37
new file mode 100644
index 00000000..96bb72bf
--- /dev/null
+++ b/.coveragerc_py37
@@ -0,0 +1,20 @@
+[run]
+branch = True
+timid = True
+
+[report]
+exclude_lines =
+    pragma: no cover
+    pragma: py3 no cover
+    if six.PY2
+    elif six.PY2
+
+partial_branches =
+    pragma: no cover
+    pragma: py3 no cover
+    if six.PY3
+    elif six.PY3
+
+show_missing = True
+
+fail_under = 90
diff --git a/.flake8 b/.flake8
index a87e2f9f..83270830 100644
--- a/.flake8
+++ b/.flake8
@@ -1,3 +1,3 @@
 [flake8]
-application_import_names = sagemaker_tensorflow_container, test, timeout, utils
+application_import_names = image_utils, integration, sagemaker_tensorflow_container, test, timeout, utils
 import-order-style = google
diff --git a/.github/ISSUE_TEMPLATE/bug_report.md b/.github/ISSUE_TEMPLATE/bug_report.md
new file mode 100644
index 00000000..978cf8cf
--- /dev/null
+++ b/.github/ISSUE_TEMPLATE/bug_report.md
@@ -0,0 +1,31 @@
+---
+name: Bug report
+about: File a report to help us reproduce and fix the problem
+title: ''
+labels: ''
+assignees: ''
+
+---
+
+**Describe the bug**
+A clear and concise description of what the bug is.
+
+**To reproduce**
+A clear, step-by-step set of instructions to reproduce the bug.
+
+**Expected behavior**
+A clear and concise description of what you expected to happen.
+
+**Screenshots or logs**
+If applicable, add screenshots or logs to help explain your problem.
+
+**System information**
+A description of your system. Please provide:
+- **Toolkit version**:
+- **Framework version**:
+- **Python version**:
+- **CPU or GPU**:
+- **Custom Docker image (Y/N)**:
+
+**Additional context**
+Add any other context about the problem here.
diff --git a/.github/ISSUE_TEMPLATE/config.yml b/.github/ISSUE_TEMPLATE/config.yml
new file mode 100644
index 00000000..9df79c90
--- /dev/null
+++ b/.github/ISSUE_TEMPLATE/config.yml
@@ -0,0 +1,5 @@
+blank_issues_enabled: false
+contact_links:
+  - name: Ask a question
+    url: https://stackoverflow.com/questions/tagged/amazon-sagemaker
+    about: Use Stack Overflow to ask and answer questions
diff --git a/.github/ISSUE_TEMPLATE/documentation-request.md b/.github/ISSUE_TEMPLATE/documentation-request.md
new file mode 100644
index 00000000..b64cd478
--- /dev/null
+++ b/.github/ISSUE_TEMPLATE/documentation-request.md
@@ -0,0 +1,17 @@
+---
+name: Documentation request
+about: Request improved documentation
+title: ''
+labels: ''
+assignees: ''
+
+---
+
+**What did you find confusing? Please describe.**
+A clear and concise description of what you found confusing. Ex. I tried to [...] but I didn't understand how to [...]
+
+**Describe how documentation can be improved**
+A clear and concise description of where documentation was lacking and how it can be improved.
+
+**Additional context**
+Add any other context or screenshots about the documentation request here.
diff --git a/.github/ISSUE_TEMPLATE/feature_request.md b/.github/ISSUE_TEMPLATE/feature_request.md
new file mode 100644
index 00000000..bff1cb4e
--- /dev/null
+++ b/.github/ISSUE_TEMPLATE/feature_request.md
@@ -0,0 +1,20 @@
+---
+name: Feature request
+about: Suggest new functionality for this toolkit
+title: ''
+labels: ''
+assignees: ''
+
+---
+
+**Describe the feature you'd like**
+A clear and concise description of the functionality you want.
+
+**How would this feature be used? Please describe.**
+A clear and concise description of the use case for this feature. Please provide an example, if possible.
+
+**Describe alternatives you've considered**
+A clear and concise description of any alternative solutions or features you've considered.
+
+**Additional context**
+Add any other context or screenshots about the feature request here.
diff --git a/CHANGELOG.md b/CHANGELOG.md
index af391c6a..58039444 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,359 @@
 # Changelog
 
+## v10.1.8 (2020-12-08)
+
+### Bug Fixes and Other Changes
+
+ * workaround to print stderr when capture_error is True
+
+## v10.1.7 (2020-11-06)
+
+### Bug Fixes and Other Changes
+
+ * propagate log level
+
+## v10.1.6 (2020-10-15)
+
+### Bug Fixes and Other Changes
+
+ * add condition to avoid error when 'model_dir' is None
+
+## v10.1.5 (2020-08-23)
+
+### Bug Fixes and Other Changes
+
+ * call entry_point.run with capture_error=True
+
+## v10.1.4.post4 (2020-07-01)
+
+### Testing and Release Infrastructure
+
+ * add integration test for MPI env vars propagation
+
+## v10.1.4.post3 (2020-06-29)
+
+### Testing and Release Infrastructure
+
+ * add issue templates
+
+## v10.1.4.post2 (2020-06-18)
+
+### Documentation Changes
+
+ * remove confusing information from the Readme.
+
+### Testing and Release Infrastructure
+
+ * add single-instance, multi-process Horovod test for local GPU
+
+## v10.1.4.post1 (2020-06-11)
+
+### Testing and Release Infrastructure
+
+ * Rename buildspec files.
+
+## v10.1.4.post0 (2020-06-10)
+
+### Documentation Changes
+
+ * remove functional test info from branch
+ * Update README.rst
+
+### Testing and Release Infrastructure
+
+ * Make docker folder read only, remove unused tests.
+
+## v10.1.4 (2020-06-10)
+
+### Bug Fixes and Other Changes
+
+ * bump version of sagemaker-training for script entry point fix.
+
+## v10.1.3 (2020-05-12)
+
+### Bug Fixes and Other Changes
+
+ * Bump version of sagemaker-training for typing fix
+
+### Testing and Release Infrastructure
+
+ * remove unused build scripts.
+
+## v10.1.2 (2020-05-05)
+
+### Bug Fixes and Other Changes
+
+ * Add py37 to sm tests
+
+## v10.1.1 (2020-05-04)
+
+### Bug Fixes and Other Changes
+
+ * remove sagemaker pysdk, keras_applications and keras_preprocessing in docker files
+ * Fix sm integration issues
+ * add dockerfiles for tf 1.15.2 py37 containers
+
+## v10.1.0 (2020-04-29)
+
+### Features
+
+ * Python 3.7 support
+
+### Testing and Release Infrastructure
+
+ * Fix buildspecs
+
+## v10.0.0 (2020-04-27)
+
+### Breaking Changes
+
+ * Replace sagemaker-containers with sagemaker-training
+
+### Testing and Release Infrastructure
+
+ * remove CHANGELOG entries from failed builds
+ * bump version to prepare for new version scheme
+ * add training script to benchmark directory
+ * skip image push in PR build if no changes
+
+## v2.3.2 (2020-04-07)
+
+### Bug Fixes and Other Changes
+
+ * Bump smdebug version
+
+## v2.3.1 (2020-04-06)
+
+### Bug Fixes and Other Changes
+
+ * updating pillow version of tf1.15
+
+## v2.3.0 (2020-04-02)
+
+### Features
+
+ * install sagemaker-tensorflow-toolkit from PyPI.
+
+## v2.2.8 (2020-04-01)
+
+### Bug Fixes and Other Changes
+
+ * Allowing arguments for deep_learning_container.py for tf1.15
+
+## v2.2.7.post0 (2020-03-31)
+
+### Testing and Release Infrastructure
+
+ * refactor toolkit tests.
+
+## v2.2.7 (2020-03-26)
+
+### Bug Fixes and Other Changes
+
+ * Adding of deep_learning_container.py in Tf1.15
+
+## v2.2.6 (2020-03-16)
+
+### Bug Fixes and Other Changes
+
+ * smdebug 0.7.1
+ * Added marker to skip on pipeline
+
+## v2.2.5 (2020-03-12)
+
+### Bug Fixes and Other Changes
+
+ * install smexperiments when python >= 3.6
+ * SM integration test for TF 1.x
+ * upgrade to latest sagemaker-experiments
+ * Added pytest fixture
+
+## v2.2.4 (2020-03-11)
+
+### Bug Fixes and Other Changes
+
+ * update smdebug wheel
+ * Revert "Update smdebug to 0.7.0 - TF 1.15.2 (#298)"
+
+## v2.2.3 (2020-03-10)
+
+### Bug Fixes and Other Changes
+
+ * update smdebug wheel
+ * Update smdebug to 0.7.0 - TF 1.15.2
+ * install SageMaker Python SDK into Python 3 images
+
+## v2.2.2.post0 (2020-03-05)
+
+### Testing and Release Infrastructure
+
+ * fix PR build
+
+## v2.2.2 (2020-02-20)
+
+### Bug Fixes and Other Changes
+
+ * copy all tests to test-toolkit folder.
+
+## v2.2.1 (2020-02-17)
+
+### Bug Fixes and Other Changes
+
+ * update: update r1.15.2 dockerfiles
+
+## v2.2.0 (2020-02-13)
+
+### Features
+
+ * Add release to PyPI. Change package name to sagemaker-tensorflow-training.
+
+### Bug Fixes and Other Changes
+
+ * pin awscli to latest version
+ * Pin awscli to latest
+ * bump smdebug version to 0.5.0.post0
+ * update: Update awscli version and remove related pins
+ * update: Update buildspec for TF 1.15.0
+ * update copyright year in license header
+
+### Documentation Changes
+
+ * update README.rst
+ * Add link to TF 2.0 branch
+
+### Testing and Release Infrastructure
+
+ * Add twine check during PR.
+ * properly fail build if has-matching-changes fails
+ * properly fail build if has-matching-changes fails
+
+## v0.1.0 (2020-02-12)
+
+### Features
+
+ * Add release to PyPI. Change package name to sagemaker-tensorflow-training.
+
+### Bug Fixes and Other Changes
+
+ * pin awscli to latest version
+ * Pin awscli to latest
+ * bump smdebug version to 0.5.0.post0
+ * update: Update awscli version and remove related pins
+ * update: Update buildspec for TF 1.15.0
+ * update copyright year in license header
+ * update: Release TF 1.15.0 dockerfiles
+ * use regional endpoint for STS in builds
+ * update documentation link in warning message
+ * update instance type region availability.
+ * license file was missing from root of repo.
+ * install tensorflow<2.0
+ * merge dockerfiles
+ * move script mode branch to master
+ * use last legacy mode version for --framework-version test arg default
+ * Pin pytest and pluggy to work around configparser error
+ * Use multiprocessing.Process to launch parameter server
+ * increase grpc message size limit to 2gb
+ * Fix typo in serving method name
+ * restore python-dev package in image
+ * Add default tag to functional tests
+ * update link to correct docs
+ * Add EI Dockerfile for 1.11
+ * Add EI documentation within README
+ * add Dockerfile for EI
+ * Use get_closest_marker instead of get_marker
+ * Add docker files of TF 1.12
+ * Default GRPC timeout for EI & Allow timeout to be configurable
+ * remove requests from test dependencies
+ * catch RpcError due to change in GRPC
+ * Icywang86rui gpu fix
+ * Read port range from container support for TFS port
+ * Unfreeze requests version
+ * export_saved_model: copy asset files
+ * add port to dockerfile
+ * Updated TF Pipe Mode Version
+ * Fix MKL setting
+ * Set MKL vars plus tests
+ * increase test timeout
+ * Add back https to S3
+ * Add 1.11.0 CPU and GPU Dockerfile
+ * pin requests version
+ * fix memory leak in serving
+ * Update region in s3 boto client in serve
+ * Update readme with instructions for 1.9.0 and above
+ * Fix deserialization of dicts for json predict requests
+ * Add dockerfile and update test for tensorflow 1.10.0
+ * Support tensorflow 1.9.0
+ * Add integ tests to verify that tensorflow in gpu-image can access gpu-devices.
+ * train on 3 epochs for pipe mode test
+ * Change error classes used by _default_input_fn() and _default_output_fn()
+ * Changing assertion to check only existence
+ * Install sagemaker-tensorflow from pypi. Add MKL environment variables for TF 1.8
+ * get most recent saved model to export
+ * pip install tensorflow 1.8 in 1.8 cpu image
+ * install tensorflow extensions
+ * upgrade cpu binaries in docker build
+ * Force upgrade of the framework binaries to make sure the right binaries are installed.
+ * Add Pillow to pip install list
+ * Increase train steps for cifar distributed test to mitigate race condition
+ * Add TensorFlow 1.8 dockerfiles
+ * Add TensorFlow 1.7 dockerfiles
+ * Explain how to download tf binaries from PyPI
+ * Allow training without S3
+ * Fix hyperparameter name for detecting a tuning job
+ * Checkout v1.4.1 tag instead of r1.4 branch
+ * Move processing of requirements file in.
+ * Generate checkpoint path using TRAINING_JOB_NAME environment variable if needed
+ * Wrap user-provided model_fn to pass arguments positionally (maintains compatibility with existing behavior)
+ * Add more unit tests for trainer, fix __all__ and rename train.py to avoid import conflict
+ * Use regional endpoint for S3 client
+ * Update README.rst
+ * Pass input_channels to eval_input_fn if defined
+ * Fix setup.py to refer to renamed README
+ * Add test and build instructions
+ * Fix year in license headers
+ * Add TensorFlow 1.6
+ * Add test instructions in README
+ * Add container support to install_requires
+ * Add Apache license headers
+ * Use wget to install tensorflow-model-server
+ * Fix file path for integ test
+ * Fix s3_prefix path in integ test
+ * Fix typo in path for integ test
+ * Add input_channels to train_input_fn interface.
+ * Update logging and make serving_input_fn optional.
+ * remove pip install in tensorflow training
+ * Modify integration tests to run nvidia-docker for gpu
+ * add h5py for keras models
+ * Add local integ tests & resources
+ * Restructure repo to use a directory per TF version for dockerfiles
+ * Rename "feature_map" variables to "feature_dict" to avoid overloading it with the ML term "feature map"
+ * Copying in changes from internal repo:
+ * Add functional test
+ * Fix FROM image names for final build dockerfiles
+ * Add dockerfiles for building our production images (TF 1.4)
+ * GPU Dockerfile and setup.py fixes
+ * Add base image Dockerfiles for 1.4
+ * Merge pull request #1 from aws/mvs-first-commit
+ * first commit
+ * Updating initial README.md from template
+ * Creating initial file from template
+ * Creating initial file from template
+ * Creating initial file from template
+ * Creating initial file from template
+ * Creating initial file from template
+ * Initial commit
+
+### Documentation Changes
+
+ * update README.rst
+ * Add link to TF 2.0 branch
+
+### Testing and Release Infrastructure
+
+ * Add twine check during PR.
+ * properly fail build if has-matching-changes fails
+ * properly fail build if has-matching-changes fails
+
 ## v0.1.0 (2019-05-22)
 
 ### Bug fixes and other changes
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
new file mode 100644
index 00000000..5cc14234
--- /dev/null
+++ b/CONTRIBUTING.md
@@ -0,0 +1,61 @@
+# Contributing Guidelines
+
+Thank you for your interest in contributing to our project. Whether it's a bug report, new feature, correction, or additional 
+documentation, we greatly value feedback and contributions from our community.
+
+Please read through this document before submitting any issues or pull requests to ensure we have all the necessary 
+information to effectively respond to your bug report or contribution.
+
+
+## Reporting Bugs/Feature Requests
+
+We welcome you to use the GitHub issue tracker to report bugs or suggest features.
+
+When filing an issue, please check [existing open](https://github.com/aws/sagemaker-tensorflow-training-toolkit/issues), or [recently closed](https://github.com/aws/sagemaker-tensorflow-training-toolkit/issues?utf8=%E2%9C%93&q=is%3Aissue%20is%3Aclosed%20), issues to make sure somebody else hasn't already 
+reported the issue. Please try to include as much information as you can. Details like these are incredibly useful:
+
+* A reproducible test case or series of steps
+* The version of our code being used
+* Any modifications you've made relevant to the bug
+* Anything unusual about your environment or deployment
+
+
+## Contributing via Pull Requests
+Contributions via pull requests are much appreciated. Before sending us a pull request, please ensure that:
+
+1. You are working against the latest source on the *master* branch.
+2. You check existing open, and recently merged, pull requests to make sure someone else hasn't addressed the problem already.
+3. You open an issue to discuss any significant work - we would hate for your time to be wasted.
+
+To send us a pull request, please:
+
+1. Fork the repository.
+2. Modify the source; please focus on the specific change you are contributing. If you also reformat all the code, it will be hard for us to focus on your change.
+3. Ensure local tests pass.
+4. Commit to your fork using clear commit messages.
+5. Send us a pull request, answering any default questions in the pull request interface.
+6. Pay attention to any automated CI failures reported in the pull request, and stay involved in the conversation.
+
+GitHub provides additional document on [forking a repository](https://help.github.com/articles/fork-a-repo/) and 
+[creating a pull request](https://help.github.com/articles/creating-a-pull-request/).
+
+
+## Finding contributions to work on
+Looking at the existing issues is a great way to find something to contribute on. As our projects, by default, use the default GitHub issue labels ((enhancement/bug/duplicate/help wanted/invalid/question/wontfix), looking at any ['help wanted'](https://github.com/aws/sagemaker-tensorflow-training-toolkit/labels/help%20wanted) issues is a great place to start. 
+
+
+## Code of Conduct
+This project has adopted the [Amazon Open Source Code of Conduct](https://aws.github.io/code-of-conduct). 
+For more information see the [Code of Conduct FAQ](https://aws.github.io/code-of-conduct-faq) or contact 
+opensource-codeofconduct@amazon.com with any additional questions or comments.
+
+
+## Security issue notifications
+If you discover a potential security issue in this project we ask that you notify AWS/Amazon Security via our [vulnerability reporting page](http://aws.amazon.com/security/vulnerability-reporting/). Please do **not** create a public github issue.
+
+
+## Licensing
+
+See the [LICENSE](https://github.com/aws/sagemaker-tensorflow-training-toolkit//blob/master/LICENSE) file for our project's licensing. We will ask you to confirm the licensing of your contribution.
+
+We may ask you to sign a [Contributor License Agreement (CLA)](http://en.wikipedia.org/wiki/Contributor_License_Agreement) for larger changes.
diff --git a/README.rst b/README.rst
index 6d031bf9..92aed6e2 100644
--- a/README.rst
+++ b/README.rst
@@ -1,290 +1,29 @@
-===============================
-SageMaker TensorFlow Containers
-===============================
+=====================================
+SageMaker TensorFlow Training Toolkit
+=====================================
 
-SageMaker TensorFlow Containers is an open source library for making the
-TensorFlow framework run on `Amazon SageMaker `__.
+SageMaker TensorFlow Training Toolkit is an open-source library for using TensorFlow to train models on Amazon SageMaker.
 
-This repository also contains Dockerfiles which install this library, TensorFlow, and dependencies
-for building SageMaker TensorFlow images.
+For inference, see `SageMaker TensorFlow Inference Toolkit `__.
 
-For information on running TensorFlow jobs on SageMaker: `Python
-SDK `__.
+For the Dockerfiles used for building SageMaker TensorFlow Containers, see `AWS Deep Learning Containers `__.
+
+For information on running TensorFlow jobs on Amazon SageMaker, please refer to the `SageMaker Python SDK documentation `__.
 
 For notebook examples: `SageMaker Notebook
 Examples `__.
 
-Table of Contents
------------------
-
-#. `Getting Started <#getting-started>`__
-#. `Building your Image <#building-your-image>`__
-#. `Running the tests <#running-the-tests>`__
-
-Getting Started
----------------
-
-Prerequisites
-~~~~~~~~~~~~~
-
-Make sure you have installed all of the following prerequisites on your
-development machine:
-
-- `Docker `__
-
-For Testing on GPU
-^^^^^^^^^^^^^^^^^^
-
--  `Nvidia-Docker `__
-
-Recommended
-^^^^^^^^^^^
-
--  A Python environment management tool. (e.g.
-   `PyEnv `__,
-   `VirtualEnv `__)
-
-Building your Image
--------------------
-
-`Amazon SageMaker `__
-utilizes Docker containers to run all training jobs & inference endpoints.
-
-The Docker images are built from the Dockerfiles specified in
-`Docker/ `__.
-
-The Docker files are grouped based on TensorFlow version and separated
-based on Python version and processor type.
-
-The Docker files for TensorFlow 2.0 are available in the
-`tf-2 `__ branch, in
-`docker/2.0.0/ `__.
-
-The Docker images, used to run training & inference jobs, are built from
-both corresponding "base" and "final" Dockerfiles.
-
-Base Images
-~~~~~~~~~~~
-
-The "base" Dockerfile encompass the installation of the framework and all of the dependencies
-needed. It is needed before building image for TensorFlow 1.8.0 and before.
-Building a base image is not required for images for TensorFlow 1.9.0 and onwards.
-
-Tagging scheme is based on --. (e.g. 1.4
-.1-cpu-py2)
-
-All "final" Dockerfiles build images using base images that use the tagging scheme
-above.
-
-If you want to build your "base" Docker image, then use:
-
-::
-
-    # All build instructions assume you're building from the same directory as the Dockerfile.
-
-    # CPU
-    docker build -t tensorflow-base:-cpu- -f Dockerfile.cpu .
-
-    # GPU
-    docker build -t tensorflow-base:-gpu- -f Dockerfile.gpu .
-
-::
-
-    # Example
-
-    # CPU
-    docker build -t tensorflow-base:1.4.1-cpu-py2 -f Dockerfile.cpu .
-
-    # GPU
-    docker build -t tensorflow-base:1.4.1-gpu-py2 -f Dockerfile.gpu .
-
-Final Images
-~~~~~~~~~~~~
-
-The "final" Dockerfiles encompass the installation of the SageMaker specific support code.
-
-For images of TensorFlow 1.8.0 and before, all "final" Dockerfiles use `base images for building `__.
-
-These "base" images are specified with the naming convention of
-tensorflow-base:--.
-
-Before building "final" images:
-
-Build your "base" image. Make sure it is named and tagged in accordance with your "final"
-Dockerfile. Skip this step if you want to build image of Tensorflow Version 1.9.0 and above.
-
-Then prepare the SageMaker TensorFlow Container python package in the image folder like below:
-
-::
-
-    # Create the SageMaker TensorFlow Container Python package.
-    cd sagemaker-tensorflow-containers
-    python setup.py sdist
-
-    #. Copy your Python package to "final" Dockerfile directory that you are building.
-    cp dist/sagemaker_tensorflow_container-.tar.gz docker//final/py2
-
-If you want to build "final" Docker images, for versions 1.6 and above, you will first need to download the appropriate tensorflow pip wheel, then pass in its location as a build argument. These can be obtained from pypi. For example, the files for 1.6.0 are here:
-
-https://pypi.org/project/tensorflow/1.6.0/#files
-https://pypi.org/project/tensorflow-gpu/1.6.0/#files
-
-Note that you need to use the tensorflow-gpu wheel when building the GPU image.
-
-Then run:
-
-::
-
-    # All build instructions assumes you're building from the same directory as the Dockerfile.
-
-    # CPU
-    docker build -t : --build-arg py_version= --build-arg framework_installable= -f Dockerfile.cpu .
-
-    # GPU
-    docker build -t : --build-arg py_version= --build-arg framework_installable= -f Dockerfile.gpu .
-
-::
-
-    # Example
-    docker build -t preprod-tensorflow:1.6.0-cpu-py2 --build-arg py_version=2
-    --build-arg framework_installable=tensorflow-1.6.0-cp27-cp27mu-manylinux1_x86_64.whl -f Dockerfile.cpu .
-
-The dockerfiles for 1.4 and 1.5 build from source instead, so when building those, you don't need to download the wheel beforehand:
-
-::
-
-    # All build instructions assumes you're building from the same directory as the Dockerfile.
-
-    # CPU
-    docker build -t : -f Dockerfile.cpu .
-
-    # GPU
-    docker build -t : -f Dockerfile.gpu .
-
-::
-
-    # Example
-
-    # CPU
-    docker build -t preprod-tensorflow:1.4.1-cpu-py2 -f Dockerfile.cpu .
-
-    # GPU
-    docker build -t preprod-tensorflow:1.4.1-gpu-py2 -f Dockerfile.gpu .
-
-
-Running the tests
------------------
-
-Running the tests requires installation of the SageMaker TensorFlow Container code and its test
-dependencies.
-
-::
-
-    git clone https://github.com/aws/sagemaker-tensorflow-containers.git
-    cd sagemaker-tensorflow-containers
-    pip install -e .[test]
-
-Tests are defined in
-`test/ `__
-and include unit, integration and functional tests.
-
-Unit Tests
-~~~~~~~~~~
-
-If you want to run unit tests, then use:
-
-::
-
-    # All test instructions should be run from the top level directory
-
-    pytest test/unit
-
-Integration Tests
-~~~~~~~~~~~~~~~~~
-
-Running integration tests require `Docker `__ and `AWS
-credentials `__,
-as the integration tests make calls to a couple AWS services. The integration and functional
-tests require configurations specified within their respective
-`conftest.py `__.Make sure to update the account-id and region at a minimum.
-
-Integration tests on GPU require `Nvidia-Docker `__.
-
-Before running integration tests:
-
-#. Build your Docker image.
-#. Pass in the correct pytest arguments to run tests against your Docker image.
-
-If you want to run local integration tests, then use:
-
-::
-
-    # Required arguments for integration tests are found in test/integ/conftest.py
-
-    pytest test/integration --docker-base-name  \
-                            --tag  \
-                            --framework-version  \
-                            --processor 
-
-::
-
-    # Example
-    pytest test/integration --docker-base-name preprod-tensorflow \
-                            --tag 1.0 \
-                            --framework-version 1.4.1 \
-                            --processor cpu
-
-Functional Tests
-~~~~~~~~~~~~~~~~
-
-Functional tests require your Docker image to be within an `Amazon ECR repository `__.
-
-The Docker-base-name is your `ECR repository namespace `__.
-
-The instance-type is your specified `Amazon SageMaker Instance Type
-`__ that the functional test will run on.
-
-
-Before running functional tests:
-
-#. Build your Docker image.
-#. Push the image to your ECR repository.
-#. Pass in the correct pytest arguments to run tests on SageMaker against the image within your ECR repository.
-
-If you want to run a functional end to end test on `Amazon
-SageMaker `__, then use:
-
-::
-
-    # Required arguments for integration tests are found in test/functional/conftest.py
-
-    pytest test/functional --aws-id  \
-                           --docker-base-name  \
-                           --instance-type  \
-                           --tag  \
-
-::
-
-    # Example
-    pytest test/functional --aws-id 12345678910 \
-                           --docker-base-name preprod-tensorflow \
-                           --instance-type ml.m4.xlarge \
-                           --tag 1.0
-
 Contributing
 ------------
 
 Please read
-`CONTRIBUTING.md `__
+`CONTRIBUTING.md `__
 for details on our code of conduct, and the process for submitting pull
 requests to us.
 
 License
 -------
 
-SageMaker TensorFlow Containers is licensed under the Apache 2.0 License. It is copyright 2018
+SageMaker TensorFlow Training Toolkit is licensed under the Apache 2.0 License. It is copyright 2018
 Amazon.com, Inc. or its affiliates. All Rights Reserved. The license is available at:
 http://aws.amazon.com/apache2.0/
diff --git a/VERSION b/VERSION
index eb5fc1c6..50106b6d 100644
--- a/VERSION
+++ b/VERSION
@@ -1 +1 @@
-2.0.8.dev0
+10.1.9.dev0
diff --git a/benchmarks/horovod-resnet/execute_horovod_training.py b/benchmarks/horovod-resnet/execute_horovod_training.py
index e6ac7609..4b0b9b23 100755
--- a/benchmarks/horovod-resnet/execute_horovod_training.py
+++ b/benchmarks/horovod-resnet/execute_horovod_training.py
@@ -26,7 +26,7 @@
 from sagemaker.tensorflow import TensorFlow
 
 dir_path = os.path.dirname(os.path.realpath(__file__))
-benchmark_results_dir = os.path.join('s3://', Session().default_bucket(), 'hvd-benchmarking')
+benchmark_results_dir = os.path.join("s3://", Session().default_bucket(), "hvd-benchmarking")
 
 
 @click.group()
@@ -35,93 +35,98 @@ def cli():
 
 
 def generate_report():
-    results_dir = os.path.join(dir_path, 'results')
+    results_dir = os.path.join(dir_path, "results")
 
     if os.path.exists(results_dir):
         shutil.rmtree(results_dir)
 
-    subprocess.call(['aws', 's3', 'cp', '--recursive', benchmark_results_dir, results_dir])
+    subprocess.call(["aws", "s3", "cp", "--recursive", benchmark_results_dir, results_dir])
 
     jobs = {}
 
     for job_name in os.listdir(results_dir):
         jobs[job_name] = {}
 
-        _, instance_type, instance_count, device, py_version, _, _, _, _, _, _, _ = job_name.split('-')
+        _, instance_type, instance_count, device, py_version, _, _, _, _, _, _, _ = job_name.split(
+            "-"
+        )
 
         current_dir = os.path.join(results_dir, job_name)
 
-        model_dir = os.path.join(current_dir, 'output', 'model.tar.gz')
-        subprocess.call(['tar', '-xvzf', model_dir], cwd=current_dir)
+        model_dir = os.path.join(current_dir, "output", "model.tar.gz")
+        subprocess.call(["tar", "-xvzf", model_dir], cwd=current_dir)
 
-        jobs[job_name]['instance_type'] = instance_type
-        jobs[job_name]['instance_count'] = instance_count
-        jobs[job_name]['device'] = device
-        jobs[job_name]['py_version'] = py_version
+        jobs[job_name]["instance_type"] = instance_type
+        jobs[job_name]["instance_count"] = instance_count
+        jobs[job_name]["device"] = device
+        jobs[job_name]["py_version"] = py_version
 
-        benchmark_log = os.path.join(current_dir, 'benchmark_run.log')
+        benchmark_log = os.path.join(current_dir, "benchmark_run.log")
 
         if os.path.exists(benchmark_log):
             with open(benchmark_log) as f:
                 data = json.load(f)
 
-
-                jobs[job_name]['dataset'] = data['dataset']['name']
-                jobs[job_name]['num_cores'] = data['machine_config']['cpu_info']['num_cores']
-                jobs[job_name]['cpu_info'] = data['machine_config']['cpu_info']['cpu_info']
-                jobs[job_name]['mhz_per_cpu'] = data['machine_config']['cpu_info']['mhz_per_cpu']
-                jobs[job_name]['gpu_count'] = data['machine_config']['gpu_info']['count']
-                jobs[job_name]['gpu_model'] = data['machine_config']['gpu_info']['model']
+                jobs[job_name]["dataset"] = data["dataset"]["name"]
+                jobs[job_name]["num_cores"] = data["machine_config"]["cpu_info"]["num_cores"]
+                jobs[job_name]["cpu_info"] = data["machine_config"]["cpu_info"]["cpu_info"]
+                jobs[job_name]["mhz_per_cpu"] = data["machine_config"]["cpu_info"]["mhz_per_cpu"]
+                jobs[job_name]["gpu_count"] = data["machine_config"]["gpu_info"]["count"]
+                jobs[job_name]["gpu_model"] = data["machine_config"]["gpu_info"]["model"]
 
                 def find_value(parameter):
-                    other_key = [k for k in parameter if k != 'name'][0]
+                    other_key = [k for k in parameter if k != "name"][0]
                     return parameter[other_key]
 
-                for parameter in data['run_parameters']:
-                    jobs[job_name][parameter['name']] = find_value(parameter)
+                for parameter in data["run_parameters"]:
+                    jobs[job_name][parameter["name"]] = find_value(parameter)
 
-                jobs[job_name]['model_name'] = data['model_name']
-                jobs[job_name]['run_date'] = data['run_date']
-                jobs[job_name]['tensorflow_version'] = data['tensorflow_version']['version']
-                jobs[job_name]['tensorflow_version_git_hash'] = data['tensorflow_version']['git_hash']
+                jobs[job_name]["model_name"] = data["model_name"]
+                jobs[job_name]["run_date"] = data["run_date"]
+                jobs[job_name]["tensorflow_version"] = data["tensorflow_version"]["version"]
+                jobs[job_name]["tensorflow_version_git_hash"] = data["tensorflow_version"][
+                    "git_hash"
+                ]
 
     return pd.DataFrame(jobs)
 
 
-@cli.command('train')
-@click.option('--framework-version', required=True, type=click.Choice(['1.11', '1.12']))
-@click.option('--device', required=True, type=click.Choice(['cpu', 'gpu']))
-@click.option('--py-versions', multiple=True, type=str)
-@click.option('--training-input-mode', default='File', type=click.Choice(['File', 'Pipe']))
-@click.option('--networking-isolation/--no-networking-isolation', default=False)
-@click.option('--wait/--no-wait', default=False)
-@click.option('--security-groups', multiple=True, type=str)
-@click.option('--subnets', multiple=True, type=str)
-@click.option('--role', default='SageMakerRole', type=str)
-@click.option('--instance-counts', multiple=True, type=int)
-@click.option('--instance-types', multiple=True, type=str)
-@click.argument('script_args', nargs=-1, type=str)
-def train(framework_version,
-          device,
-          py_versions,
-          training_input_mode,
-          networking_isolation,
-          wait,
-          security_groups,
-          subnets,
-          role,
-          instance_counts,
-          instance_types,
-          script_args):
+@cli.command("train")
+@click.option("--framework-version", required=True, type=click.Choice(["1.11", "1.12"]))
+@click.option("--device", required=True, type=click.Choice(["cpu", "gpu"]))
+@click.option("--py-versions", multiple=True, type=str)
+@click.option("--training-input-mode", default="File", type=click.Choice(["File", "Pipe"]))
+@click.option("--networking-isolation/--no-networking-isolation", default=False)
+@click.option("--wait/--no-wait", default=False)
+@click.option("--security-groups", multiple=True, type=str)
+@click.option("--subnets", multiple=True, type=str)
+@click.option("--role", default="SageMakerRole", type=str)
+@click.option("--instance-counts", multiple=True, type=int)
+@click.option("--instance-types", multiple=True, type=str)
+@click.argument("script_args", nargs=-1, type=str)
+def train(
+    framework_version,
+    device,
+    py_versions,
+    training_input_mode,
+    networking_isolation,
+    wait,
+    security_groups,
+    subnets,
+    role,
+    instance_counts,
+    instance_types,
+    script_args,
+):
     iterator = itertools.product(instance_types, py_versions, instance_counts)
     for instance_type, py_version, instance_count in iterator:
         base_name = job_name(instance_type, instance_count, device, py_version)
 
-        mpi_options = '-x HOROVOD_HIERARCHICAL_ALLREDUCE=1 -x HOROVOD_FUSION_THRESHOLD=16777216 -x TF_CPP_MIN_LOG_LEVEL=0 -x HOROVOD_TIMELINE --output-filename /opt/ml/model/hlog'
+        mpi_options = "-x HOROVOD_HIERARCHICAL_ALLREDUCE=1 -x HOROVOD_FUSION_THRESHOLD=16777216 -x TF_CPP_MIN_LOG_LEVEL=0 -x HOROVOD_TIMELINE --output-filename /opt/ml/model/hlog"
         estimator = TensorFlow(
-            entry_point=os.path.join(dir_path, 'train.sh'),
+            entry_point=os.path.join(dir_path, "train.sh"),
             role=role,
-            dependencies=[os.path.join(dir_path, 'train_imagenet_resnet_hvd.py')],
+            dependencies=[os.path.join(dir_path, "train_imagenet_resnet_hvd.py")],
             base_job_name=base_name,
             train_instance_count=instance_count,
             train_instance_type=instance_type,
@@ -129,36 +134,34 @@ def train(framework_version,
             py_version=py_version,
             script_mode=True,
             hyperparameters={
-                'sagemaker_mpi_enabled': True,
-                'sagemaker_mpi_num_of_processes_per_host': 8,
-                'sagemaker_mpi_custom_mpi_options': mpi_options
+                "sagemaker_mpi_enabled": True,
+                "sagemaker_mpi_num_of_processes_per_host": 8,
+                "sagemaker_mpi_custom_mpi_options": mpi_options,
             },
             output_path=benchmark_results_dir,
             security_group_ids=security_groups,
-            subnets=subnets
+            subnets=subnets,
         )
 
         estimator.fit(wait=wait)
 
         if wait:
-            artifacts_path = os.path.join(dir_path, 'results',
-                                          estimator.latest_training_job.job_name)
-            model_path = os.path.join(artifacts_path, 'model.tar.gz')
+            artifacts_path = os.path.join(
+                dir_path, "results", estimator.latest_training_job.job_name
+            )
+            model_path = os.path.join(artifacts_path, "model.tar.gz")
             os.makedirs(artifacts_path)
-            subprocess.call(['aws', 's3', 'cp', estimator.model_data, model_path])
-            subprocess.call(['tar', '-xvzf', model_path], cwd=artifacts_path)
+            subprocess.call(["aws", "s3", "cp", estimator.model_data, model_path])
+            subprocess.call(["tar", "-xvzf", model_path], cwd=artifacts_path)
+
+            print("Model downloaded at %s" % model_path)
 
-            print('Model downloaded at %s' % model_path)
 
+def job_name(instance_type, instance_count, device, python_version):
+    instance_typename = instance_type.replace(".", "").replace("ml", "")
 
-def job_name(instance_type,
-             instance_count,
-             device,
-             python_version):
-    instance_typename = instance_type.replace('.', '').replace('ml', '')
+    return "hvd-%s-%s-%s-%s" % (instance_typename, instance_count, device, python_version)
 
-    return 'hvd-%s-%s-%s-%s' % (
-        instance_typename, instance_count, device, python_version)
 
-if __name__ == '__main__':
+if __name__ == "__main__":
     cli()
diff --git a/benchmarks/horovod-resnet/train_imagenet_resnet_hvd.py b/benchmarks/horovod-resnet/train_imagenet_resnet_hvd.py
index d415c62d..cf0e2486 100644
--- a/benchmarks/horovod-resnet/train_imagenet_resnet_hvd.py
+++ b/benchmarks/horovod-resnet/train_imagenet_resnet_hvd.py
@@ -51,18 +51,26 @@
 from operator import itemgetter
 from tensorflow.python.util import nest
 
+
 def rank0log(logger, *args, **kwargs):
     if hvd.rank() == 0:
         if logger:
-            logger.info(''.join([str(x) for x in list(args)]))
+            logger.info("".join([str(x) for x in list(args)]))
         else:
             print(*args, **kwargs)
 
 
 class LayerBuilder(object):
-    def __init__(self, activation=None, data_format='channels_last',
-                 training=False, use_batch_norm=False, batch_norm_config=None,
-                 conv_initializer=None, adv_bn_init=False):
+    def __init__(
+        self,
+        activation=None,
+        data_format="channels_last",
+        training=False,
+        use_batch_norm=False,
+        batch_norm_config=None,
+        conv_initializer=None,
+        adv_bn_init=False,
+    ):
         self.activation = activation
         self.data_format = data_format
         self.training = training
@@ -72,19 +80,22 @@ def __init__(self, activation=None, data_format='channels_last',
         self.adv_bn_init = adv_bn_init
         if self.batch_norm_config is None:
             self.batch_norm_config = {
-                'decay': 0.9,
-                'epsilon': 1e-4,
-                'scale': True,
-                'zero_debias_moving_mean': False,
+                "decay": 0.9,
+                "epsilon": 1e-4,
+                "scale": True,
+                "zero_debias_moving_mean": False,
             }
 
     def _conv2d(self, inputs, activation, *args, **kwargs):
         x = tf.layers.conv2d(
-            inputs, data_format=self.data_format,
+            inputs,
+            data_format=self.data_format,
             use_bias=not self.use_batch_norm,
             kernel_initializer=self.conv_initializer,
             activation=None if self.use_batch_norm else activation,
-            *args, **kwargs)
+            *args,
+            **kwargs
+        )
         if self.use_batch_norm:
             x = self.batch_norm(x)
             x = activation(x) if activation is not None else x
@@ -92,19 +103,23 @@ def _conv2d(self, inputs, activation, *args, **kwargs):
 
     def conv2d_linear_last_bn(self, inputs, *args, **kwargs):
         x = tf.layers.conv2d(
-            inputs, data_format=self.data_format,
+            inputs,
+            data_format=self.data_format,
             use_bias=False,
             kernel_initializer=self.conv_initializer,
-            activation=None, *args, **kwargs)
+            activation=None,
+            *args,
+            **kwargs
+        )
         param_initializers = {
-            'moving_mean': tf.zeros_initializer(),
-            'moving_variance': tf.ones_initializer(),
-            'beta': tf.zeros_initializer(),
+            "moving_mean": tf.zeros_initializer(),
+            "moving_variance": tf.ones_initializer(),
+            "beta": tf.zeros_initializer(),
         }
         if self.adv_bn_init:
-            param_initializers['gamma'] = tf.zeros_initializer()
+            param_initializers["gamma"] = tf.zeros_initializer()
         else:
-            param_initializers['gamma'] = tf.ones_initializer()
+            param_initializers["gamma"] = tf.ones_initializer()
         x = self.batch_norm(x, param_initializers=param_initializers)
         return x
 
@@ -125,19 +140,17 @@ def pad2d(self, inputs, begin, end=None):
             _ = end[1]
         except TypeError:
             end = [end, end]
-        if self.data_format == 'channels_last':
+        if self.data_format == "channels_last":
             padding = [[0, 0], [begin[0], end[0]], [begin[1], end[1]], [0, 0]]
         else:
             padding = [[0, 0], [0, 0], [begin[0], end[0]], [begin[1], end[1]]]
         return tf.pad(inputs, padding)
 
     def max_pooling2d(self, inputs, *args, **kwargs):
-        return tf.layers.max_pooling2d(
-            inputs, data_format=self.data_format, *args, **kwargs)
+        return tf.layers.max_pooling2d(inputs, data_format=self.data_format, *args, **kwargs)
 
     def average_pooling2d(self, inputs, *args, **kwargs):
-        return tf.layers.average_pooling2d(
-            inputs, data_format=self.data_format, *args, **kwargs)
+        return tf.layers.average_pooling2d(inputs, data_format=self.data_format, *args, **kwargs)
 
     def dense_linear(self, inputs, units, **kwargs):
         return tf.layers.dense(inputs, units, activation=None)
@@ -152,72 +165,72 @@ def activate(self, inputs, activation=None):
     def batch_norm(self, inputs, **kwargs):
         all_kwargs = dict(self.batch_norm_config)
         all_kwargs.update(kwargs)
-        data_format = 'NHWC' if self.data_format == 'channels_last' else 'NCHW'
+        data_format = "NHWC" if self.data_format == "channels_last" else "NCHW"
         return tf.contrib.layers.batch_norm(
-            inputs, is_training=self.training, data_format=data_format,
-            fused=True, **all_kwargs)
+            inputs, is_training=self.training, data_format=data_format, fused=True, **all_kwargs
+        )
 
     def spatial_average2d(self, inputs):
         shape = inputs.get_shape().as_list()
-        if self.data_format == 'channels_last':
+        if self.data_format == "channels_last":
             n, h, w, c = shape
         else:
             n, c, h, w = shape
         n = -1 if n is None else n
-        x = tf.layers.average_pooling2d(inputs, (h, w), (1, 1),
-                                        data_format=self.data_format)
+        x = tf.layers.average_pooling2d(inputs, (h, w), (1, 1), data_format=self.data_format)
         return tf.reshape(x, [n, c])
 
     def flatten2d(self, inputs):
         x = inputs
-        if self.data_format != 'channel_last':
+        if self.data_format != "channel_last":
             # Note: This ensures the output order matches that of NHWC networks
             x = tf.transpose(x, [0, 2, 3, 1])
         input_shape = x.get_shape().as_list()
         num_inputs = 1
         for dim in input_shape[1:]:
             num_inputs *= dim
-        return tf.reshape(x, [-1, num_inputs], name='flatten')
+        return tf.reshape(x, [-1, num_inputs], name="flatten")
 
     def residual2d(self, inputs, network, units=None, scale=1.0, activate=False):
         outputs = network(inputs)
-        c_axis = -1 if self.data_format == 'channels_last' else 1
-        h_axis = 1 if self.data_format == 'channels_last' else 2
+        c_axis = -1 if self.data_format == "channels_last" else 1
+        h_axis = 1 if self.data_format == "channels_last" else 2
         w_axis = h_axis + 1
         ishape, oshape = [y.get_shape().as_list() for y in [inputs, outputs]]
         ichans, ochans = ishape[c_axis], oshape[c_axis]
-        strides = ((ishape[h_axis] - 1) // oshape[h_axis] + 1,
-                   (ishape[w_axis] - 1) // oshape[w_axis] + 1)
-        with tf.name_scope('residual'):
-            if (ochans != ichans or strides[0] != 1 or strides[1] != 1):
-                inputs = self.conv2d_linear(inputs, units, 1, strides, 'SAME')
+        strides = (
+            (ishape[h_axis] - 1) // oshape[h_axis] + 1,
+            (ishape[w_axis] - 1) // oshape[w_axis] + 1,
+        )
+        with tf.name_scope("residual"):
+            if ochans != ichans or strides[0] != 1 or strides[1] != 1:
+                inputs = self.conv2d_linear(inputs, units, 1, strides, "SAME")
             x = inputs + scale * outputs
             if activate:
                 x = self.activate(x)
         return x
 
 
-def resnet_bottleneck_v1(builder, inputs, depth, depth_bottleneck, stride,
-                         basic=False):
+def resnet_bottleneck_v1(builder, inputs, depth, depth_bottleneck, stride, basic=False):
     num_inputs = inputs.get_shape().as_list()[1]
     x = inputs
-    with tf.name_scope('resnet_v1'):
+    with tf.name_scope("resnet_v1"):
         if depth == num_inputs:
             if stride == 1:
                 shortcut = x
             else:
                 shortcut = builder.max_pooling2d(x, 1, stride)
         else:
-            shortcut = builder.conv2d_linear(x, depth, 1, stride, 'SAME')
+            shortcut = builder.conv2d_linear(x, depth, 1, stride, "SAME")
         if basic:
             x = builder.pad2d(x, 1)
-            x = builder.conv2d(x, depth_bottleneck, 3, stride, 'VALID')
-            x = builder.conv2d_linear(x, depth, 3, 1, 'SAME')
+            x = builder.conv2d(x, depth_bottleneck, 3, stride, "VALID")
+            x = builder.conv2d_linear(x, depth, 3, 1, "SAME")
         else:
-            x = builder.conv2d(x, depth_bottleneck, 1, 1, 'SAME')
-            x = builder.conv2d(x, depth_bottleneck, 3, stride, 'SAME')
+            x = builder.conv2d(x, depth_bottleneck, 1, 1, "SAME")
+            x = builder.conv2d(x, depth_bottleneck, 3, stride, "SAME")
             # x = builder.conv2d_linear(x, depth,            1, 1,      'SAME')
-            x = builder.conv2d_linear_last_bn(x, depth, 1, 1, 'SAME')
+            x = builder.conv2d_linear_last_bn(x, depth, 1, 1, "SAME")
         x = tf.nn.relu(x + shortcut)
         return x
 
@@ -225,8 +238,8 @@ def resnet_bottleneck_v1(builder, inputs, depth, depth_bottleneck, stride,
 def inference_resnet_v1_impl(builder, inputs, layer_counts, basic=False):
     x = inputs
     x = builder.pad2d(x, 3)
-    x = builder.conv2d(x, 64, 7, 2, 'VALID')
-    x = builder.max_pooling2d(x, 3, 2, 'SAME')
+    x = builder.conv2d(x, 64, 7, 2, "VALID")
+    x = builder.max_pooling2d(x, 3, 2, "SAME")
     for i in range(layer_counts[0]):
         x = resnet_bottleneck_v1(builder, x, 256, 64, 1, basic)
     for i in range(layer_counts[1]):
@@ -238,13 +251,25 @@ def inference_resnet_v1_impl(builder, inputs, layer_counts, basic=False):
     return builder.spatial_average2d(x)
 
 
-def inference_resnet_v1(inputs, nlayer, data_format='channels_last',
-                        training=False, conv_initializer=None, adv_bn_init=False):
+def inference_resnet_v1(
+    inputs,
+    nlayer,
+    data_format="channels_last",
+    training=False,
+    conv_initializer=None,
+    adv_bn_init=False,
+):
     """Deep Residual Networks family of models
     https://arxiv.org/abs/1512.03385
     """
-    builder = LayerBuilder(tf.nn.relu, data_format, training, use_batch_norm=True,
-                           conv_initializer=conv_initializer, adv_bn_init=adv_bn_init)
+    builder = LayerBuilder(
+        tf.nn.relu,
+        data_format,
+        training,
+        use_batch_norm=True,
+        conv_initializer=conv_initializer,
+        adv_bn_init=adv_bn_init,
+    )
     if nlayer == 18:
         return inference_resnet_v1_impl(builder, inputs, [2, 2, 2, 2], basic=True)
     elif nlayer == 34:
@@ -256,83 +281,95 @@ def inference_resnet_v1(inputs, nlayer, data_format='channels_last',
     elif nlayer == 152:
         return inference_resnet_v1_impl(builder, inputs, [3, 8, 36, 3])
     else:
-        raise ValueError("Invalid nlayer (%i); must be one of: 18,34,50,101,152" %
-                         nlayer)
+        raise ValueError("Invalid nlayer (%i); must be one of: 18,34,50,101,152" % nlayer)
 
 
 def get_model_func(model_name):
-    if model_name.startswith('resnet'):
-        nlayer = int(model_name[len('resnet'):])
-        return lambda images, *args, **kwargs: \
-            inference_resnet_v1(images, nlayer, *args, **kwargs)
+    if model_name.startswith("resnet"):
+        nlayer = int(model_name[len("resnet") :])
+        return lambda images, *args, **kwargs: inference_resnet_v1(images, nlayer, *args, **kwargs)
     else:
         raise ValueError("Invalid model type: %s" % model_name)
 
 
 def deserialize_image_record(record):
     feature_map = {
-        'image/encoded': tf.FixedLenFeature([], tf.string, ''),
-        'image/class/label': tf.FixedLenFeature([1], tf.int64, -1),
-        'image/class/text': tf.FixedLenFeature([], tf.string, ''),
-        'image/object/bbox/xmin': tf.VarLenFeature(dtype=tf.float32),
-        'image/object/bbox/ymin': tf.VarLenFeature(dtype=tf.float32),
-        'image/object/bbox/xmax': tf.VarLenFeature(dtype=tf.float32),
-        'image/object/bbox/ymax': tf.VarLenFeature(dtype=tf.float32)
+        "image/encoded": tf.FixedLenFeature([], tf.string, ""),
+        "image/class/label": tf.FixedLenFeature([1], tf.int64, -1),
+        "image/class/text": tf.FixedLenFeature([], tf.string, ""),
+        "image/object/bbox/xmin": tf.VarLenFeature(dtype=tf.float32),
+        "image/object/bbox/ymin": tf.VarLenFeature(dtype=tf.float32),
+        "image/object/bbox/xmax": tf.VarLenFeature(dtype=tf.float32),
+        "image/object/bbox/ymax": tf.VarLenFeature(dtype=tf.float32),
     }
-    with tf.name_scope('deserialize_image_record'):
+    with tf.name_scope("deserialize_image_record"):
         obj = tf.parse_single_example(record, feature_map)
-        imgdata = obj['image/encoded']
-        label = tf.cast(obj['image/class/label'], tf.int32)
-        bbox = tf.stack([obj['image/object/bbox/%s' % x].values
-                         for x in ['ymin', 'xmin', 'ymax', 'xmax']])
+        imgdata = obj["image/encoded"]
+        label = tf.cast(obj["image/class/label"], tf.int32)
+        bbox = tf.stack(
+            [obj["image/object/bbox/%s" % x].values for x in ["ymin", "xmin", "ymax", "xmax"]]
+        )
         bbox = tf.transpose(tf.expand_dims(bbox, 0), [0, 2, 1])
-        text = obj['image/class/text']
+        text = obj["image/class/text"]
         return imgdata, label, bbox, text
 
 
 def decode_jpeg(imgdata, channels=3):
-    return tf.image.decode_jpeg(imgdata, channels=channels,
-                                fancy_upscaling=False,
-                                dct_method='INTEGER_FAST')
+    return tf.image.decode_jpeg(
+        imgdata, channels=channels, fancy_upscaling=False, dct_method="INTEGER_FAST"
+    )
 
 
-def crop_and_resize_image(image, original_bbox, height, width, 
-                          distort=False, nsummary=10):
-    with tf.name_scope('crop_and_resize'):
+def crop_and_resize_image(image, original_bbox, height, width, distort=False, nsummary=10):
+    with tf.name_scope("crop_and_resize"):
         # Evaluation is done on a center-crop of this ratio
         eval_crop_ratio = 0.8
         if distort:
-            initial_shape = [int(round(height / eval_crop_ratio)),
-                             int(round(width / eval_crop_ratio)),
-                             3]
-            bbox_begin, bbox_size, bbox = \
-                tf.image.sample_distorted_bounding_box(
-                    initial_shape,
-                    bounding_boxes=tf.constant([0.0, 0.0, 1.0, 1.0], dtype=tf.float32, shape=[1, 1, 4]),
-                    # tf.zeros(shape=[1,0,4]), # No bounding boxes
-                    min_object_covered=0.1,
-                    aspect_ratio_range=[3. / 4., 4. / 3.],
-                    area_range=[0.08, 1.0],
-                    max_attempts=100,
-                    seed=11 * hvd.rank(),  # Need to set for deterministic results
-                    use_image_if_no_bounding_boxes=True)
+            initial_shape = [
+                int(round(height / eval_crop_ratio)),
+                int(round(width / eval_crop_ratio)),
+                3,
+            ]
+            bbox_begin, bbox_size, bbox = tf.image.sample_distorted_bounding_box(
+                initial_shape,
+                bounding_boxes=tf.constant([0.0, 0.0, 1.0, 1.0], dtype=tf.float32, shape=[1, 1, 4]),
+                # tf.zeros(shape=[1,0,4]), # No bounding boxes
+                min_object_covered=0.1,
+                aspect_ratio_range=[3.0 / 4.0, 4.0 / 3.0],
+                area_range=[0.08, 1.0],
+                max_attempts=100,
+                seed=11 * hvd.rank(),  # Need to set for deterministic results
+                use_image_if_no_bounding_boxes=True,
+            )
             bbox = bbox[0, 0]  # Remove batch, box_idx dims
         else:
             # Central crop
             ratio_y = ratio_x = eval_crop_ratio
-            bbox = tf.constant([0.5 * (1 - ratio_y), 0.5 * (1 - ratio_x),
-                                0.5 * (1 + ratio_y), 0.5 * (1 + ratio_x)])
-        image = tf.image.crop_and_resize(
-            image[None, :, :, :], bbox[None, :], [0], [height, width])[0]
+            bbox = tf.constant(
+                [0.5 * (1 - ratio_y), 0.5 * (1 - ratio_x), 0.5 * (1 + ratio_y), 0.5 * (1 + ratio_x)]
+            )
+        image = tf.image.crop_and_resize(image[None, :, :, :], bbox[None, :], [0], [height, width])[
+            0
+        ]
         return image
 
 
-def parse_and_preprocess_image_record(record, counter, height, width,
-                                      brightness, contrast, saturation, hue,
-                                      distort=False, nsummary=10, increased_aug=False):
+def parse_and_preprocess_image_record(
+    record,
+    counter,
+    height,
+    width,
+    brightness,
+    contrast,
+    saturation,
+    hue,
+    distort=False,
+    nsummary=10,
+    increased_aug=False,
+):
     imgdata, label, bbox, text = deserialize_image_record(record)
     label -= 1  # Change to 0-based (don't use background class)
-    with tf.name_scope('preprocess_train'):
+    with tf.name_scope("preprocess_train"):
         try:
             image = decode_jpeg(imgdata, channels=3)
         except:
@@ -342,24 +379,44 @@ def parse_and_preprocess_image_record(record, counter, height, width,
             image = tf.image.random_flip_left_right(image)
             if increased_aug:
                 image = tf.image.random_brightness(image, max_delta=brightness)
-                image = distort_image_ops.random_hsv_in_yiq(image, 
-                                                            lower_saturation=saturation, 
-                                                            upper_saturation=2.0 - saturation, 
-                                                            max_delta_hue=hue * math.pi)
+                image = distort_image_ops.random_hsv_in_yiq(
+                    image,
+                    lower_saturation=saturation,
+                    upper_saturation=2.0 - saturation,
+                    max_delta_hue=hue * math.pi,
+                )
                 image = tf.image.random_contrast(image, lower=contrast, upper=2.0 - contrast)
-                tf.summary.image('distorted_color_image', tf.expand_dims(image, 0))
-        image = tf.clip_by_value(image, 0., 255.)
+                tf.summary.image("distorted_color_image", tf.expand_dims(image, 0))
+        image = tf.clip_by_value(image, 0.0, 255.0)
         image = tf.cast(image, tf.uint8)
         return image, label
 
-def make_dataset(filenames, take_count, batch_size, height, width,
-                 brightness, contrast, saturation, hue,
-                 training=False, num_threads=10, nsummary=10, shard=False, synthetic=False,
-                 increased_aug=False):
+
+def make_dataset(
+    filenames,
+    take_count,
+    batch_size,
+    height,
+    width,
+    brightness,
+    contrast,
+    saturation,
+    hue,
+    training=False,
+    num_threads=10,
+    nsummary=10,
+    shard=False,
+    synthetic=False,
+    increased_aug=False,
+):
     if synthetic and training:
         input_shape = [height, width, 3]
-        input_element = nest.map_structure(lambda s: tf.constant(0.5, tf.float32, s), tf.TensorShape(input_shape))
-        label_element = nest.map_structure(lambda s: tf.constant(1, tf.int32, s), tf.TensorShape([1]))
+        input_element = nest.map_structure(
+            lambda s: tf.constant(0.5, tf.float32, s), tf.TensorShape(input_shape)
+        )
+        label_element = nest.map_structure(
+            lambda s: tf.constant(1, tf.int32, s), tf.TensorShape([1])
+        )
         element = (input_element, label_element)
         ds = tf.data.Dataset.from_tensors(element).repeat()
     else:
@@ -380,16 +437,29 @@ def make_dataset(filenames, take_count, batch_size, height, width,
         if training:
             ds = ds.shuffle(1000, seed=7 * (1 + hvd.rank()))
 
-        ds = ds.interleave(
-            tf.data.TFRecordDataset, cycle_length=num_readers, block_length=1)
+        ds = ds.interleave(tf.data.TFRecordDataset, cycle_length=num_readers, block_length=1)
         counter = tf.data.Dataset.range(sys.maxsize)
         ds = tf.data.Dataset.zip((ds, counter))
         preproc_func = lambda record, counter_: parse_and_preprocess_image_record(
-            record, counter_, height, width, brightness, contrast, saturation, hue,
-            distort=training, nsummary=nsummary if training else 0, increased_aug=increased_aug)
+            record,
+            counter_,
+            height,
+            width,
+            brightness,
+            contrast,
+            saturation,
+            hue,
+            distort=training,
+            nsummary=nsummary if training else 0,
+            increased_aug=increased_aug,
+        )
         ds = ds.map(preproc_func, num_parallel_calls=num_threads)
         if training:
-            ds = ds.apply(tf.data.experimental.shuffle_and_repeat(shuffle_buffer_size, seed=5*(1+hvd.rank())))
+            ds = ds.apply(
+                tf.data.experimental.shuffle_and_repeat(
+                    shuffle_buffer_size, seed=5 * (1 + hvd.rank())
+                )
+            )
     ds = ds.batch(batch_size)
     return ds
 
@@ -399,18 +469,19 @@ def stage(tensors):
     """
     stage_area = data_flow_ops.StagingArea(
         dtypes=[tensor.dtype for tensor in tensors],
-        shapes=[tensor.get_shape() for tensor in tensors])
+        shapes=[tensor.get_shape() for tensor in tensors],
+    )
     put_op = stage_area.put(tensors)
     get_tensors = stage_area.get()
-    tf.add_to_collection('STAGING_AREA_PUTS', put_op)
+    tf.add_to_collection("STAGING_AREA_PUTS", put_op)
     return put_op, get_tensors
 
 
 class PrefillStagingAreasHook(tf.train.SessionRunHook):
     def after_create_session(self, session, coord):
-        enqueue_ops = tf.get_collection('STAGING_AREA_PUTS')
+        enqueue_ops = tf.get_collection("STAGING_AREA_PUTS")
         for i in range(len(enqueue_ops)):
-            session.run(enqueue_ops[:i + 1])
+            session.run(enqueue_ops[: i + 1])
 
 
 class LogSessionRunHook(tf.train.SessionRunHook):
@@ -421,15 +492,15 @@ def __init__(self, global_batch_size, num_records, display_every=10, logger=None
         self.logger = logger
 
     def after_create_session(self, session, coord):
-        rank0log(self.logger, '  Step Epoch Speed   Loss  FinLoss   LR')
-        self.elapsed_secs = 0.
+        rank0log(self.logger, "  Step Epoch Speed   Loss  FinLoss   LR")
+        self.elapsed_secs = 0.0
         self.count = 0
 
     def before_run(self, run_context):
         self.t0 = time.time()
         return tf.train.SessionRunArgs(
-            fetches=[tf.train.get_global_step(),
-                     'loss:0', 'total_loss:0', 'learning_rate:0'])
+            fetches=[tf.train.get_global_step(), "loss:0", "total_loss:0", "learning_rate:0"]
+        )
 
     def after_run(self, run_context, run_values):
         self.elapsed_secs += time.time() - self.t0
@@ -439,25 +510,37 @@ def after_run(self, run_context, run_values):
             dt = self.elapsed_secs / self.count
             img_per_sec = self.global_batch_size / dt
             epoch = global_step * self.global_batch_size / self.num_records
-            self.logger.info('%6i %5.1f %7.1f %6.3f %6.3f %7.5f' %
-                             (global_step, epoch, img_per_sec, loss, total_loss, lr))
-            self.elapsed_secs = 0.
+            self.logger.info(
+                "%6i %5.1f %7.1f %6.3f %6.3f %7.5f"
+                % (global_step, epoch, img_per_sec, loss, total_loss, lr)
+            )
+            self.elapsed_secs = 0.0
             self.count = 0
 
 
-def _fp32_trainvar_getter(getter, name, shape=None, dtype=None,
-                          trainable=True, regularizer=None,
-                          *args, **kwargs):
+def _fp32_trainvar_getter(
+    getter, name, shape=None, dtype=None, trainable=True, regularizer=None, *args, **kwargs
+):
     storage_dtype = tf.float32 if trainable else dtype
-    variable = getter(name, shape, dtype=storage_dtype,
-                      trainable=trainable,
-                      regularizer=regularizer if trainable and 'BatchNorm' not in name and 'batchnorm' not in name and 'batch_norm' not in name and 'Batch_Norm' not in name else None,
-                      *args, **kwargs)
+    variable = getter(
+        name,
+        shape,
+        dtype=storage_dtype,
+        trainable=trainable,
+        regularizer=regularizer
+        if trainable
+        and "BatchNorm" not in name
+        and "batchnorm" not in name
+        and "batch_norm" not in name
+        and "Batch_Norm" not in name
+        else None,
+        *args,
+        **kwargs
+    )
     if trainable and dtype != tf.float32:
-        cast_name = name + '/fp16_cast'
+        cast_name = name + "/fp16_cast"
         try:
-            cast_variable = tf.get_default_graph().get_tensor_by_name(
-                cast_name + ':0')
+            cast_variable = tf.get_default_graph().get_tensor_by_name(cast_name + ":0")
         except KeyError:
             cast_variable = tf.cast(variable, dtype, name=cast_name)
         cast_variable._ref = variable._ref
@@ -465,31 +548,26 @@ def _fp32_trainvar_getter(getter, name, shape=None, dtype=None,
     return variable
 
 
-def fp32_trainable_vars(name='fp32_vars', *args, **kwargs):
+def fp32_trainable_vars(name="fp32_vars", *args, **kwargs):
     """A varible scope with custom variable getter to convert fp16 trainable
     variables with fp32 storage followed by fp16 cast.
     """
-    return tf.variable_scope(
-        name, custom_getter=_fp32_trainvar_getter, *args, **kwargs)
+    return tf.variable_scope(name, custom_getter=_fp32_trainvar_getter, *args, **kwargs)
 
 
 class MixedPrecisionOptimizer(tf.train.Optimizer):
     """An optimizer that updates trainable variables in fp32."""
 
-    def __init__(self, optimizer,
-                 scale=None,
-                 name="MixedPrecisionOptimizer",
-                 use_locking=False):
-        super(MixedPrecisionOptimizer, self).__init__(
-            name=name, use_locking=use_locking)
+    def __init__(self, optimizer, scale=None, name="MixedPrecisionOptimizer", use_locking=False):
+        super(MixedPrecisionOptimizer, self).__init__(name=name, use_locking=use_locking)
         self._optimizer = optimizer
         self._scale = float(scale) if scale is not None else 1.0
 
     def compute_gradients(self, loss, var_list=None, *args, **kwargs):
         if var_list is None:
-            var_list = (
-                    tf.trainable_variables() +
-                    tf.get_collection(tf.GraphKeys.TRAINABLE_RESOURCE_VARIABLES))
+            var_list = tf.trainable_variables() + tf.get_collection(
+                tf.GraphKeys.TRAINABLE_RESOURCE_VARIABLES
+            )
 
         replaced_list = var_list
 
@@ -503,7 +581,7 @@ def compute_gradients(self, loss, var_list=None, *args, **kwargs):
             if var is not orig_var:
                 grad = tf.cast(grad, orig_var.dtype)
             if self._scale != 1.0:
-                grad = tf.scalar_mul(1. / self._scale, grad)
+                grad = tf.scalar_mul(1.0 / self._scale, grad)
             final_gradvar.append((grad, orig_var))
 
         return final_gradvar
@@ -511,6 +589,7 @@ def compute_gradients(self, loss, var_list=None, *args, **kwargs):
     def apply_gradients(self, *args, **kwargs):
         return self._optimizer.apply_gradients(*args, **kwargs)
 
+
 class LarcOptimizer(tf.train.Optimizer):
     """ LARC implementation
         -------------------
@@ -524,10 +603,17 @@ class LarcOptimizer(tf.train.Optimizer):
           - use_locking
     """
 
-    def __init__(self, optimizer, learning_rate, eta, clip=True, epsilon=1.,
-                 name="LarcOptimizer", use_locking=False):
-        super(LarcOptimizer, self).__init__(
-            name=name, use_locking=use_locking)
+    def __init__(
+        self,
+        optimizer,
+        learning_rate,
+        eta,
+        clip=True,
+        epsilon=1.0,
+        name="LarcOptimizer",
+        use_locking=False,
+    ):
+        super(LarcOptimizer, self).__init__(name=name, use_locking=use_locking)
         self._optimizer = optimizer
         self._learning_rate = learning_rate
         self._eta = float(eta)
@@ -539,16 +625,13 @@ def compute_gradients(self, *args, **kwargs):
 
     def apply_gradients(self, gradvars, *args, **kwargs):
         v_list = [tf.norm(tensor=v, ord=2) for _, v in gradvars]
-        g_list = [tf.norm(tensor=g, ord=2) if g is not None else 0.0
-                  for g, _ in gradvars]
+        g_list = [tf.norm(tensor=g, ord=2) if g is not None else 0.0 for g, _ in gradvars]
         v_norms = tf.stack(v_list)
         g_norms = tf.stack(g_list)
         zeds = tf.zeros_like(v_norms)
         # assign epsilon if weights or grads = 0, to avoid division by zero
         # also prevent biases to get stuck at initialization (0.)
-        cond = tf.logical_and(
-            tf.not_equal(v_norms, zeds),
-            tf.not_equal(g_norms, zeds))
+        cond = tf.logical_and(tf.not_equal(v_norms, zeds), tf.not_equal(g_norms, zeds))
         true_vals = tf.scalar_mul(self._eta, tf.div(v_norms, g_norms))
         # true_vals = tf.scalar_mul(tf.cast(self._eta, tf.float32), tf.div(tf.cast(v_norms, tf.float32), tf.cast(g_norms, tf.float32)))
         false_vals = tf.fill(tf.shape(v_norms), self._epsilon)
@@ -561,9 +644,10 @@ def apply_gradients(self, gradvars, *args, **kwargs):
             # for which learning rate is already fixed
             # We then have to scale the gradients instead of the learning rate.
             larc_local_lr = tf.minimum(tf.div(larc_local_lr, lr), ones)
-        gradvars = [(tf.multiply(larc_local_lr[i], g), v)
-                    if g is not None else (None, v)
-                    for i, (g, v) in enumerate(gradvars)]
+        gradvars = [
+            (tf.multiply(larc_local_lr[i], g), v) if g is not None else (None, v)
+            for i, (g, v) in enumerate(gradvars)
+        ]
         return self._optimizer.apply_gradients(gradvars, *args, **kwargs)
 
 
@@ -571,45 +655,64 @@ def get_with_default(obj, key, default_value):
     return obj[key] if key in obj and obj[key] is not None else default_value
 
 
-def get_lr(lr, steps, lr_steps, warmup_it, decay_steps, global_step, lr_decay_mode,
-           cdr_first_decay_ratio, cdr_t_mul, cdr_m_mul, cdr_alpha, lc_periods, lc_alpha, lc_beta):
-    if lr_decay_mode == 'steps':
-        learning_rate = tf.train.piecewise_constant(global_step,
-                                                    steps, lr_steps)
-    elif lr_decay_mode == 'poly' or lr_decay_mode == 'poly_cycle':
-        cycle = lr_decay_mode == 'poly_cycle'
-        learning_rate = tf.train.polynomial_decay(lr,
-                                                  global_step - warmup_it,
-                                                  decay_steps=decay_steps - warmup_it,
-                                                  end_learning_rate=0.00001,
-                                                  power=2,
-                                                  cycle=cycle)
-    elif lr_decay_mode == 'cosine_decay_restarts':
-        learning_rate = tf.train.cosine_decay_restarts(lr, 
-                                                       global_step - warmup_it,
-                                                       (decay_steps - warmup_it) * cdr_first_decay_ratio,
-                                                       t_mul=cdr_t_mul, 
-                                                       m_mul=cdr_m_mul,
-                                                       alpha=cdr_alpha)
-    elif lr_decay_mode == 'cosine':
-        learning_rate = tf.train.cosine_decay(lr,
-                                              global_step - warmup_it,
-                                              decay_steps=decay_steps - warmup_it,
-                                              alpha=0.0)
-    elif lr_decay_mode == 'linear_cosine':
-        learning_rate = tf.train.linear_cosine_decay(lr,
-                                                     global_step - warmup_it,
-                                                     decay_steps=decay_steps - warmup_it,
-                                                     num_periods=lc_periods,#0.47,
-                                                     alpha=lc_alpha,#0.0,
-                                                     beta=lc_beta)#0.00001)
+def get_lr(
+    lr,
+    steps,
+    lr_steps,
+    warmup_it,
+    decay_steps,
+    global_step,
+    lr_decay_mode,
+    cdr_first_decay_ratio,
+    cdr_t_mul,
+    cdr_m_mul,
+    cdr_alpha,
+    lc_periods,
+    lc_alpha,
+    lc_beta,
+):
+    if lr_decay_mode == "steps":
+        learning_rate = tf.train.piecewise_constant(global_step, steps, lr_steps)
+    elif lr_decay_mode == "poly" or lr_decay_mode == "poly_cycle":
+        cycle = lr_decay_mode == "poly_cycle"
+        learning_rate = tf.train.polynomial_decay(
+            lr,
+            global_step - warmup_it,
+            decay_steps=decay_steps - warmup_it,
+            end_learning_rate=0.00001,
+            power=2,
+            cycle=cycle,
+        )
+    elif lr_decay_mode == "cosine_decay_restarts":
+        learning_rate = tf.train.cosine_decay_restarts(
+            lr,
+            global_step - warmup_it,
+            (decay_steps - warmup_it) * cdr_first_decay_ratio,
+            t_mul=cdr_t_mul,
+            m_mul=cdr_m_mul,
+            alpha=cdr_alpha,
+        )
+    elif lr_decay_mode == "cosine":
+        learning_rate = tf.train.cosine_decay(
+            lr, global_step - warmup_it, decay_steps=decay_steps - warmup_it, alpha=0.0
+        )
+    elif lr_decay_mode == "linear_cosine":
+        learning_rate = tf.train.linear_cosine_decay(
+            lr,
+            global_step - warmup_it,
+            decay_steps=decay_steps - warmup_it,
+            num_periods=lc_periods,  # 0.47,
+            alpha=lc_alpha,  # 0.0,
+            beta=lc_beta,
+        )  # 0.00001)
     else:
-        raise ValueError('Invalid type of lr_decay_mode')
+        raise ValueError("Invalid type of lr_decay_mode")
     return learning_rate
 
 
 def warmup_decay(warmup_lr, global_step, warmup_steps, warmup_end_lr):
     from tensorflow.python.ops import math_ops
+
     p = tf.cast(global_step, tf.float32) / tf.cast(warmup_steps, tf.float32)
     diff = math_ops.subtract(warmup_end_lr, warmup_lr)
     res = math_ops.add(warmup_lr, math_ops.multiply(diff, p))
@@ -618,40 +721,40 @@ def warmup_decay(warmup_lr, global_step, warmup_steps, warmup_end_lr):
 
 def cnn_model_function(features, labels, mode, params):
     labels = tf.reshape(labels, (-1,))  # Squash unnecessary unary dim
-    lr = params['lr']
-    lr_steps = params['lr_steps']
-    steps = params['steps']
-    use_larc = params['use_larc']
-    leta = params['leta']
-    lr_decay_mode = params['lr_decay_mode']
-    decay_steps = params['decay_steps']
-    cdr_first_decay_ratio = params['cdr_first_decay_ratio']
-    cdr_t_mul = params['cdr_t_mul']
-    cdr_m_mul = params['cdr_m_mul']
-    cdr_alpha = params['cdr_alpha']
-    lc_periods = params['lc_periods']
-    lc_alpha = params['lc_alpha']
-    lc_beta = params['lc_beta']
-
-    model_name = params['model']
-    num_classes = params['n_classes']
-    model_dtype = get_with_default(params, 'dtype', tf.float32)
-    model_format = get_with_default(params, 'format', 'channels_first')
-    device = get_with_default(params, 'device', '/gpu:0')
+    lr = params["lr"]
+    lr_steps = params["lr_steps"]
+    steps = params["steps"]
+    use_larc = params["use_larc"]
+    leta = params["leta"]
+    lr_decay_mode = params["lr_decay_mode"]
+    decay_steps = params["decay_steps"]
+    cdr_first_decay_ratio = params["cdr_first_decay_ratio"]
+    cdr_t_mul = params["cdr_t_mul"]
+    cdr_m_mul = params["cdr_m_mul"]
+    cdr_alpha = params["cdr_alpha"]
+    lc_periods = params["lc_periods"]
+    lc_alpha = params["lc_alpha"]
+    lc_beta = params["lc_beta"]
+
+    model_name = params["model"]
+    num_classes = params["n_classes"]
+    model_dtype = get_with_default(params, "dtype", tf.float32)
+    model_format = get_with_default(params, "format", "channels_first")
+    device = get_with_default(params, "device", "/gpu:0")
     model_func = get_model_func(model_name)
     inputs = features  # TODO: Should be using feature columns?
-    is_training = (mode == tf.estimator.ModeKeys.TRAIN)
-    momentum = params['mom']
-    weight_decay = params['wdecay']
-    warmup_lr = params['warmup_lr']
-    warmup_it = params['warmup_it']
-    loss_scale = params['loss_scale']
+    is_training = mode == tf.estimator.ModeKeys.TRAIN
+    momentum = params["mom"]
+    weight_decay = params["wdecay"]
+    warmup_lr = params["warmup_lr"]
+    warmup_it = params["warmup_it"]
+    loss_scale = params["loss_scale"]
 
-    adv_bn_init = params['adv_bn_init']
-    conv_init = params['conv_init']
+    adv_bn_init = params["adv_bn_init"]
+    conv_init = params["conv_init"]
 
     if mode == tf.estimator.ModeKeys.TRAIN:
-        with tf.device('/cpu:0'):
+        with tf.device("/cpu:0"):
             preload_op, (inputs, labels) = stage([inputs, labels])
 
     with tf.device(device):
@@ -661,73 +764,87 @@ def cnn_model_function(features, labels, mode, params):
         imagenet_mean = np.array([121, 115, 100], dtype=np.float32)
         imagenet_std = np.array([70, 68, 71], dtype=np.float32)
         inputs = tf.subtract(inputs, imagenet_mean)
-        inputs = tf.multiply(inputs, 1. / imagenet_std)
-        if model_format == 'channels_first':
+        inputs = tf.multiply(inputs, 1.0 / imagenet_std)
+        if model_format == "channels_first":
             inputs = tf.transpose(inputs, [0, 3, 1, 2])
-        with fp32_trainable_vars(
-                regularizer=tf.contrib.layers.l2_regularizer(weight_decay)):
+        with fp32_trainable_vars(regularizer=tf.contrib.layers.l2_regularizer(weight_decay)):
             top_layer = model_func(
-                inputs, data_format=model_format, training=is_training,
-                conv_initializer=conv_init, adv_bn_init=adv_bn_init)
-            logits = tf.layers.dense(top_layer, num_classes,
-                                     kernel_initializer=tf.random_normal_initializer(stddev=0.01))
+                inputs,
+                data_format=model_format,
+                training=is_training,
+                conv_initializer=conv_init,
+                adv_bn_init=adv_bn_init,
+            )
+            logits = tf.layers.dense(
+                top_layer, num_classes, kernel_initializer=tf.random_normal_initializer(stddev=0.01)
+            )
         predicted_classes = tf.argmax(logits, axis=1, output_type=tf.int32)
         logits = tf.cast(logits, tf.float32)
         if mode == tf.estimator.ModeKeys.PREDICT:
             probabilities = tf.softmax(logits)
             predictions = {
-                'class_ids': predicted_classes[:, None],
-                'probabilities': probabilities,
-                'logits': logits
+                "class_ids": predicted_classes[:, None],
+                "probabilities": probabilities,
+                "logits": logits,
             }
             return tf.estimator.EstimatorSpec(mode, predictions=predictions)
-        loss = tf.losses.sparse_softmax_cross_entropy(
-            logits=logits, labels=labels)
-        loss = tf.identity(loss, name='loss')  # For access by logger (TODO: Better way to access it?)
+        loss = tf.losses.sparse_softmax_cross_entropy(logits=logits, labels=labels)
+        loss = tf.identity(
+            loss, name="loss"
+        )  # For access by logger (TODO: Better way to access it?)
 
         if mode == tf.estimator.ModeKeys.EVAL:
             with tf.device(None):  # Allow fallback to CPU if no GPU support for these ops
-                accuracy = tf.metrics.accuracy(
-                    labels=labels, predictions=predicted_classes)
-                top5acc = tf.metrics.mean(
-                    tf.cast(tf.nn.in_top_k(logits, labels, 5), tf.float32))
+                accuracy = tf.metrics.accuracy(labels=labels, predictions=predicted_classes)
+                top5acc = tf.metrics.mean(tf.cast(tf.nn.in_top_k(logits, labels, 5), tf.float32))
                 newaccuracy = (hvd.allreduce(accuracy[0]), accuracy[1])
                 newtop5acc = (hvd.allreduce(top5acc[0]), top5acc[1])
-                metrics = {'val-top1acc': newaccuracy, 'val-top5acc': newtop5acc}
-            return tf.estimator.EstimatorSpec(
-                mode, loss=loss, eval_metric_ops=metrics)
+                metrics = {"val-top1acc": newaccuracy, "val-top5acc": newtop5acc}
+            return tf.estimator.EstimatorSpec(mode, loss=loss, eval_metric_ops=metrics)
 
-        assert (mode == tf.estimator.ModeKeys.TRAIN)
+        assert mode == tf.estimator.ModeKeys.TRAIN
         reg_losses = tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES)
-        total_loss = tf.add_n([loss] + reg_losses, name='total_loss')
+        total_loss = tf.add_n([loss] + reg_losses, name="total_loss")
 
         batch_size = tf.shape(inputs)[0]
 
         global_step = tf.train.get_global_step()
 
-        with tf.device('/cpu:0'):  # Allow fallback to CPU if no GPU support for these ops
-            learning_rate = tf.cond(global_step < warmup_it,
-                                    lambda: warmup_decay(warmup_lr, global_step, warmup_it,
-                                                         lr),
-                                    lambda: get_lr(lr, steps, lr_steps, warmup_it, decay_steps, global_step,
-                                                   lr_decay_mode, 
-                                                   cdr_first_decay_ratio, cdr_t_mul, cdr_m_mul, cdr_alpha, 
-                                                   lc_periods, lc_alpha, lc_beta))
-            learning_rate = tf.identity(learning_rate, 'learning_rate')
-            tf.summary.scalar('learning_rate', learning_rate)
-
-        opt = tf.train.MomentumOptimizer(
-            learning_rate, momentum, use_nesterov=True)
+        with tf.device("/cpu:0"):  # Allow fallback to CPU if no GPU support for these ops
+            learning_rate = tf.cond(
+                global_step < warmup_it,
+                lambda: warmup_decay(warmup_lr, global_step, warmup_it, lr),
+                lambda: get_lr(
+                    lr,
+                    steps,
+                    lr_steps,
+                    warmup_it,
+                    decay_steps,
+                    global_step,
+                    lr_decay_mode,
+                    cdr_first_decay_ratio,
+                    cdr_t_mul,
+                    cdr_m_mul,
+                    cdr_alpha,
+                    lc_periods,
+                    lc_alpha,
+                    lc_beta,
+                ),
+            )
+            learning_rate = tf.identity(learning_rate, "learning_rate")
+            tf.summary.scalar("learning_rate", learning_rate)
+
+        opt = tf.train.MomentumOptimizer(learning_rate, momentum, use_nesterov=True)
         opt = hvd.DistributedOptimizer(opt)
         if use_larc:
             opt = LarcOptimizer(opt, learning_rate, leta, clip=True)
         opt = MixedPrecisionOptimizer(opt, scale=loss_scale)
         update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) or []
         with tf.control_dependencies(update_ops):
-            gate_gradients = (tf.train.Optimizer.GATE_NONE)
+            gate_gradients = tf.train.Optimizer.GATE_NONE
             train_op = opt.minimize(
-                total_loss, global_step=tf.train.get_global_step(),
-                gate_gradients=gate_gradients)
+                total_loss, global_step=tf.train.get_global_step(), gate_gradients=gate_gradients
+            )
         train_op = tf.group(preload_op, gpucopy_op, train_op)  # , update_ops)
 
         return tf.estimator.EstimatorSpec(mode, loss=total_loss, train_op=train_op)
@@ -741,158 +858,234 @@ def count_records(tf_record_filename):
         return count
 
     nfile = len(filenames)
-    return (count_records(filenames[0]) * (nfile - 1) +
-            count_records(filenames[-1]))
+    return count_records(filenames[0]) * (nfile - 1) + count_records(filenames[-1])
 
 
 def add_bool_argument(cmdline, shortname, longname=None, default=False, help=None):
     if longname is None:
         shortname, longname = None, shortname
     elif default == True:
-        raise ValueError("""Boolean arguments that are True by default should not have short names.""")
+        raise ValueError(
+            """Boolean arguments that are True by default should not have short names."""
+        )
     name = longname[2:]
     feature_parser = cmdline.add_mutually_exclusive_group(required=False)
     if shortname is not None:
-        feature_parser.add_argument(shortname, '--' + name, dest=name, action='store_true', help=help, default=default)
+        feature_parser.add_argument(
+            shortname, "--" + name, dest=name, action="store_true", help=help, default=default
+        )
     else:
-        feature_parser.add_argument('--' + name, dest=name, action='store_true', help=help, default=default)
-    feature_parser.add_argument('--no' + name, dest=name, action='store_false')
+        feature_parser.add_argument(
+            "--" + name, dest=name, action="store_true", help=help, default=default
+        )
+    feature_parser.add_argument("--no" + name, dest=name, action="store_false")
     return cmdline
 
 
 def add_cli_args():
-    cmdline = argparse.ArgumentParser(
-        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
+    cmdline = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
     # Basic options
-    cmdline.add_argument('-m', '--model', default='resnet50',
-                         help="""Name of model to run: resnet[18,34,50,101,152]""")
-    cmdline.add_argument('--data_dir',
-                         help="""Path to dataset in TFRecord format
+    cmdline.add_argument(
+        "-m",
+        "--model",
+        default="resnet50",
+        help="""Name of model to run: resnet[18,34,50,101,152]""",
+    )
+    cmdline.add_argument(
+        "--data_dir",
+        help="""Path to dataset in TFRecord format
                          (aka Example protobufs). Files should be
-                         named 'train-*' and 'validation-*'.""")
-    add_bool_argument(cmdline, '--synthetic', help="""Whether to use synthetic data for training""")
-    cmdline.add_argument('-b', '--batch_size', default=256, type=int,
-                         help="""Size of each minibatch per GPU""")
-    cmdline.add_argument('--num_batches', type=int,
-                         help="""Number of batches to run.
-                         Ignored during eval or if num epochs given""")
-    cmdline.add_argument('--num_epochs', type=int,
-                         help="""Number of epochs to run.
-                         Overrides --num_batches. Ignored during eval.""")
-    cmdline.add_argument('--log_dir', default='imagenet_resnet',
-                         help="""Directory in which to write training
+                         named 'train-*' and 'validation-*'.""",
+    )
+    add_bool_argument(cmdline, "--synthetic", help="""Whether to use synthetic data for training""")
+    cmdline.add_argument(
+        "-b", "--batch_size", default=256, type=int, help="""Size of each minibatch per GPU"""
+    )
+    cmdline.add_argument(
+        "--num_batches",
+        type=int,
+        help="""Number of batches to run.
+                         Ignored during eval or if num epochs given""",
+    )
+    cmdline.add_argument(
+        "--num_epochs",
+        type=int,
+        help="""Number of epochs to run.
+                         Overrides --num_batches. Ignored during eval.""",
+    )
+    cmdline.add_argument(
+        "--log_dir",
+        default="imagenet_resnet",
+        help="""Directory in which to write training
                          summaries and checkpoints. If the log directory already 
                          contains some checkpoints, it tries to resume training
                          from the last saved checkpoint. Pass --clear_log if you
-                         want to clear all checkpoints and start a fresh run""")
-    add_bool_argument(cmdline, '--clear_log', default=False,
-                      help="""Clear the log folder passed so a fresh run can be started""")
-    cmdline.add_argument('--log_name', type=str, default='hvd_train.log')
-    add_bool_argument(cmdline, '--local_ckpt',
-                      help="""Performs local checkpoints (i.e. one per node)""")
-    cmdline.add_argument('--display_every', default=50, type=int,
-                         help="""How often (in iterations) to print out
-                         running information.""")
-    add_bool_argument(cmdline, '--eval',
-                      help="""Evaluate the top-1 and top-5 accuracy of
+                         want to clear all checkpoints and start a fresh run""",
+    )
+    add_bool_argument(
+        cmdline,
+        "--clear_log",
+        default=False,
+        help="""Clear the log folder passed so a fresh run can be started""",
+    )
+    cmdline.add_argument("--log_name", type=str, default="hvd_train.log")
+    add_bool_argument(
+        cmdline, "--local_ckpt", help="""Performs local checkpoints (i.e. one per node)"""
+    )
+    cmdline.add_argument(
+        "--display_every",
+        default=50,
+        type=int,
+        help="""How often (in iterations) to print out
+                         running information.""",
+    )
+    add_bool_argument(
+        cmdline,
+        "--eval",
+        help="""Evaluate the top-1 and top-5 accuracy of
                       the latest checkpointed model. If you want to evaluate using multiple GPUs ensure that 
                       all processes have access to all checkpoints. Either if checkpoints 
                       were saved using --local_ckpt or they were saved to a shared directory which all processes
-                      can access.""")
-    cmdline.add_argument('--eval_interval', type=int,
-                         help="""Evaluate accuracy per eval_interval number of epochs""")
-    add_bool_argument(cmdline, '--fp16', default=True,
-                      help="""Train using float16 (half) precision instead
-                      of float32.""")
-    cmdline.add_argument('--num_gpus', default=1, type=int,
-                         help="""Specify total number of GPUS used to train a checkpointed model during eval.
-                                Used only to calculate epoch number to print during evaluation""")
-
-    cmdline.add_argument('--save_checkpoints_steps', type=int, default=1000)
-    cmdline.add_argument('--save_summary_steps', type=int, default=0)
-    add_bool_argument(cmdline, '--adv_bn_init', default=True,
-                      help="""init gamme of the last BN of each ResMod at 0.""")
-    add_bool_argument(cmdline, '--adv_conv_init', default=True,
-                      help="""init conv with MSRA initializer""")
-
-    cmdline.add_argument('--lr', type=float,
-                         help="""Start learning rate""")
-    cmdline.add_argument('--mom', default=0.90, type=float,
-                         help="""Momentum""")
-    cmdline.add_argument('--wdecay', default=0.0001, type=float,
-                         help="""Weight decay""")
-    cmdline.add_argument('--loss_scale', default=1024., type=float,
-                         help="""loss scale""")
-    cmdline.add_argument('--warmup_lr', default=0.001, type=float,
-                         help="""Warmup starting from this learning rate""")
-    cmdline.add_argument('--warmup_epochs', default=0, type=int,
-                         help="""Number of epochs in which to warmup to given lr""")
-    cmdline.add_argument('--lr_decay_steps', default='30,60,80', type=str,
-                         help="""epoch numbers at which lr is decayed by lr_decay_lrs. 
-                         Used when lr_decay_mode is steps""")
-    cmdline.add_argument('--lr_decay_lrs', default='', type=str,
-                         help="""learning rates at specific epochs""")
-    cmdline.add_argument('--lr_decay_mode', default='poly',
-                         help="""Takes either `steps` (decay by a factor at specified steps) 
-                         or `poly`(polynomial_decay with degree 2)""")
-    
-    add_bool_argument(cmdline, '--use_larc', default=False, 
-                        help="""Use Layer wise Adaptive Rate Control which helps convergence at really large batch sizes""")
-    cmdline.add_argument('--leta', default=0.013, type=float,
-                         help="""The trust coefficient for LARC optimization, LARC Eta""")
-    
-    cmdline.add_argument('--cdr_first_decay_ratio', default=0.33, type=float,
-                         help="""Cosine Decay Restart First Deacy Steps ratio""")
-    cmdline.add_argument('--cdr_t_mul', default=2.0, type=float,
-                         help="""Cosine Decay Restart t_mul""")
-    cmdline.add_argument('--cdr_m_mul', default=0.1, type=float,
-                         help="""Cosine Decay Restart m_mul""")
-    cmdline.add_argument('--cdr_alpha', default=0.0, type=float,
-                         help="""Cosine Decay Restart alpha""")
-    cmdline.add_argument('--lc_periods', default=0.47, type=float,
-                         help="""Linear Cosine num of periods""")
-    cmdline.add_argument('--lc_alpha', default=0.0, type=float,
-                         help="""linear Cosine alpha""")
-    cmdline.add_argument('--lc_beta', default=0.00001, type=float,
-                         help="""Liner Cosine Beta""")
-
-    add_bool_argument(cmdline, '--increased_aug', default=False, 
-                         help="""Increase augmentations helpful when training with large number of GPUs such as 128 or 256""")
-    cmdline.add_argument('--contrast', default=0.6, type=float,
-                         help="""contrast factor""")
-    cmdline.add_argument('--saturation', default=0.6, type=float,
-                         help="""saturation factor""")
-    cmdline.add_argument('--hue', default=0.13, type=float,
-                         help="""hue max delta factor, hue delta = hue * math.pi""")
-    cmdline.add_argument('--brightness', default=0.3, type=float,
-                         help="""Brightness factor""")
+                      can access.""",
+    )
+    cmdline.add_argument(
+        "--eval_interval", type=int, help="""Evaluate accuracy per eval_interval number of epochs"""
+    )
+    add_bool_argument(
+        cmdline,
+        "--fp16",
+        default=True,
+        help="""Train using float16 (half) precision instead
+                      of float32.""",
+    )
+    cmdline.add_argument(
+        "--num_gpus",
+        default=1,
+        type=int,
+        help="""Specify total number of GPUS used to train a checkpointed model during eval.
+                                Used only to calculate epoch number to print during evaluation""",
+    )
+
+    cmdline.add_argument("--save_checkpoints_steps", type=int, default=1000)
+    cmdline.add_argument("--save_summary_steps", type=int, default=0)
+    add_bool_argument(
+        cmdline,
+        "--adv_bn_init",
+        default=True,
+        help="""init gamme of the last BN of each ResMod at 0.""",
+    )
+    add_bool_argument(
+        cmdline, "--adv_conv_init", default=True, help="""init conv with MSRA initializer"""
+    )
+
+    cmdline.add_argument("--lr", type=float, help="""Start learning rate""")
+    cmdline.add_argument("--mom", default=0.90, type=float, help="""Momentum""")
+    cmdline.add_argument("--wdecay", default=0.0001, type=float, help="""Weight decay""")
+    cmdline.add_argument("--loss_scale", default=1024.0, type=float, help="""loss scale""")
+    cmdline.add_argument(
+        "--warmup_lr", default=0.001, type=float, help="""Warmup starting from this learning rate"""
+    )
+    cmdline.add_argument(
+        "--warmup_epochs",
+        default=0,
+        type=int,
+        help="""Number of epochs in which to warmup to given lr""",
+    )
+    cmdline.add_argument(
+        "--lr_decay_steps",
+        default="30,60,80",
+        type=str,
+        help="""epoch numbers at which lr is decayed by lr_decay_lrs. 
+                         Used when lr_decay_mode is steps""",
+    )
+    cmdline.add_argument(
+        "--lr_decay_lrs", default="", type=str, help="""learning rates at specific epochs"""
+    )
+    cmdline.add_argument(
+        "--lr_decay_mode",
+        default="poly",
+        help="""Takes either `steps` (decay by a factor at specified steps) 
+                         or `poly`(polynomial_decay with degree 2)""",
+    )
+
+    add_bool_argument(
+        cmdline,
+        "--use_larc",
+        default=False,
+        help="""Use Layer wise Adaptive Rate Control which helps convergence at really large batch sizes""",
+    )
+    cmdline.add_argument(
+        "--leta",
+        default=0.013,
+        type=float,
+        help="""The trust coefficient for LARC optimization, LARC Eta""",
+    )
+
+    cmdline.add_argument(
+        "--cdr_first_decay_ratio",
+        default=0.33,
+        type=float,
+        help="""Cosine Decay Restart First Deacy Steps ratio""",
+    )
+    cmdline.add_argument(
+        "--cdr_t_mul", default=2.0, type=float, help="""Cosine Decay Restart t_mul"""
+    )
+    cmdline.add_argument(
+        "--cdr_m_mul", default=0.1, type=float, help="""Cosine Decay Restart m_mul"""
+    )
+    cmdline.add_argument(
+        "--cdr_alpha", default=0.0, type=float, help="""Cosine Decay Restart alpha"""
+    )
+    cmdline.add_argument(
+        "--lc_periods", default=0.47, type=float, help="""Linear Cosine num of periods"""
+    )
+    cmdline.add_argument("--lc_alpha", default=0.0, type=float, help="""linear Cosine alpha""")
+    cmdline.add_argument("--lc_beta", default=0.00001, type=float, help="""Liner Cosine Beta""")
+
+    add_bool_argument(
+        cmdline,
+        "--increased_aug",
+        default=False,
+        help="""Increase augmentations helpful when training with large number of GPUs such as 128 or 256""",
+    )
+    cmdline.add_argument("--contrast", default=0.6, type=float, help="""contrast factor""")
+    cmdline.add_argument("--saturation", default=0.6, type=float, help="""saturation factor""")
+    cmdline.add_argument(
+        "--hue",
+        default=0.13,
+        type=float,
+        help="""hue max delta factor, hue delta = hue * math.pi""",
+    )
+    cmdline.add_argument("--brightness", default=0.3, type=float, help="""Brightness factor""")
     return cmdline
 
 
 def sort_and_load_ckpts(log_dir):
     ckpts = []
     for f in os.listdir(log_dir):
-        m = re.match(r'model.ckpt-([0-9]+).index', f)
+        m = re.match(r"model.ckpt-([0-9]+).index", f)
         if m is None:
             continue
         fullpath = os.path.join(log_dir, f)
-        ckpts.append({'step': int(m.group(1)),
-                      'path': os.path.splitext(fullpath)[0],
-                      'mtime': os.stat(fullpath).st_mtime,
-                      })
-    ckpts.sort(key=itemgetter('step'))
+        ckpts.append(
+            {
+                "step": int(m.group(1)),
+                "path": os.path.splitext(fullpath)[0],
+                "mtime": os.stat(fullpath).st_mtime,
+            }
+        )
+    ckpts.sort(key=itemgetter("step"))
     return ckpts
 
 
 def main():
     gpu_thread_count = 2
-    os.environ['TF_GPU_THREAD_MODE'] = 'gpu_private'
-    os.environ['TF_GPU_THREAD_COUNT'] = str(gpu_thread_count)
-    os.environ['TF_USE_CUDNN_BATCHNORM_SPATIAL_PERSISTENT'] = '1'
-    os.environ['TF_ENABLE_WINOGRAD_NONFUSED'] = '1'
+    os.environ["TF_GPU_THREAD_MODE"] = "gpu_private"
+    os.environ["TF_GPU_THREAD_COUNT"] = str(gpu_thread_count)
+    os.environ["TF_USE_CUDNN_BATCHNORM_SPATIAL_PERSISTENT"] = "1"
+    os.environ["TF_ENABLE_WINOGRAD_NONFUSED"] = "1"
     hvd.init()
 
-
     config = tf.ConfigProto()
     config.gpu_options.visible_device_list = str(hvd.local_rank())
     config.gpu_options.force_gpu_compatible = True  # Force pinned memory
@@ -914,7 +1107,7 @@ def main():
     FLAGS.log_dir = None if FLAGS.log_dir == "" else FLAGS.log_dir
 
     if FLAGS.eval:
-        FLAGS.log_name = 'eval_' + FLAGS.log_name
+        FLAGS.log_name = "eval_" + FLAGS.log_name
         if hvd.rank() != 0:
             return
     if FLAGS.local_ckpt:
@@ -930,7 +1123,7 @@ def main():
         os.makedirs(FLAGS.log_dir)
     barrier = hvd.allreduce(tf.constant(0, dtype=tf.float32))
     tf.Session(config=config).run(barrier)
-    
+
     logger = logging.getLogger(FLAGS.log_name)
     logger.setLevel(logging.INFO)  # INFO, ERROR
     # file handler which logs debug messages
@@ -939,7 +1132,7 @@ def main():
     ch.setLevel(logging.INFO)
     # add formatter to the handlers
     # formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
-    formatter = logging.Formatter('%(message)s')
+    formatter = logging.Formatter("%(message)s")
     ch.setFormatter(formatter)
     logger.addHandler(ch)
     if not hvd.rank():
@@ -948,23 +1141,25 @@ def main():
         fh.setFormatter(formatter)
         # add handlers to logger
         logger.addHandler(fh)
-    
+
     height, width = 224, 224
     global_batch_size = FLAGS.batch_size * hvd.size()
-    rank0log(logger, 'PY' + str(sys.version) + 'TF' + str(tf.__version__))
+    rank0log(logger, "PY" + str(sys.version) + "TF" + str(tf.__version__))
     rank0log(logger, "Horovod size: ", hvd.size())
 
     if FLAGS.data_dir:
-        filename_pattern = os.path.join(FLAGS.data_dir, '%s-*')
-        train_filenames = sorted(tf.gfile.Glob(filename_pattern % 'train'))
-        eval_filenames = sorted(tf.gfile.Glob(filename_pattern % 'validation'))
+        filename_pattern = os.path.join(FLAGS.data_dir, "%s-*")
+        train_filenames = sorted(tf.gfile.Glob(filename_pattern % "train"))
+        eval_filenames = sorted(tf.gfile.Glob(filename_pattern % "validation"))
         num_training_samples = get_num_records(train_filenames)
         rank0log(logger, "Using data from: ", FLAGS.data_dir)
         if not FLAGS.eval:
-            rank0log(logger, 'Found ', num_training_samples, ' training samples')
+            rank0log(logger, "Found ", num_training_samples, " training samples")
     else:
         if not FLAGS.synthetic:
-            raise ValueError('data_dir missing. Please pass --synthetic if you want to run on synthetic data. Else please pass --data_dir')
+            raise ValueError(
+                "data_dir missing. Please pass --synthetic if you want to run on synthetic data. Else please pass --data_dir"
+            )
         train_filenames = eval_filenames = []
         num_training_samples = 1281167
     training_samples_per_rank = num_training_samples // hvd.size()
@@ -979,9 +1174,9 @@ def main():
     nstep_per_epoch = num_training_samples // global_batch_size
     decay_steps = nstep
 
-    if FLAGS.lr_decay_mode == 'steps':
-        steps = [int(x) * nstep_per_epoch for x in FLAGS.lr_decay_steps.split(',')]
-        lr_steps = [float(x) for x in FLAGS.lr_decay_lrs.split(',')]
+    if FLAGS.lr_decay_mode == "steps":
+        steps = [int(x) * nstep_per_epoch for x in FLAGS.lr_decay_steps.split(",")]
+        lr_steps = [float(x) for x in FLAGS.lr_decay_lrs.split(",")]
     else:
         steps = []
         lr_steps = []
@@ -997,11 +1192,11 @@ def main():
     if not FLAGS.save_summary_steps:
         # default to save one checkpoint per epoch
         FLAGS.save_summary_steps = nstep_per_epoch
-    
+
     if not FLAGS.eval:
-        rank0log(logger, 'Using a learning rate of ', FLAGS.lr)
-        rank0log(logger, 'Checkpointing every ' + str(FLAGS.save_checkpoints_steps) + ' steps')
-        rank0log(logger, 'Saving summary every ' + str(FLAGS.save_summary_steps) + ' steps')
+        rank0log(logger, "Using a learning rate of ", FLAGS.lr)
+        rank0log(logger, "Checkpointing every " + str(FLAGS.save_checkpoints_steps) + " steps")
+        rank0log(logger, "Saving summary every " + str(FLAGS.save_summary_steps) + " steps")
 
     warmup_it = nstep_per_epoch * FLAGS.warmup_epochs
 
@@ -1009,62 +1204,74 @@ def main():
         model_fn=cnn_model_function,
         model_dir=FLAGS.log_dir,
         params={
-            'model': FLAGS.model,
-            'decay_steps': decay_steps,
-            'n_classes': 1000,
-            'dtype': tf.float16 if FLAGS.fp16 else tf.float32,
-            'format': 'channels_first',
-            'device': '/gpu:0',
-            'lr': FLAGS.lr,
-            'mom': FLAGS.mom,
-            'wdecay': FLAGS.wdecay,
-            'use_larc': FLAGS.use_larc,
-            'leta': FLAGS.leta,
-            'steps': steps,
-            'lr_steps': lr_steps,
-            'lr_decay_mode': FLAGS.lr_decay_mode,
-            'warmup_it': warmup_it,
-            'warmup_lr': FLAGS.warmup_lr,
-            'cdr_first_decay_ratio': FLAGS.cdr_first_decay_ratio,
-            'cdr_t_mul': FLAGS.cdr_t_mul,
-            'cdr_m_mul': FLAGS.cdr_m_mul,
-            'cdr_alpha': FLAGS.cdr_alpha,
-            'lc_periods': FLAGS.lc_periods,
-            'lc_alpha': FLAGS.lc_alpha,
-            'lc_beta': FLAGS.lc_beta,
-            'loss_scale': FLAGS.loss_scale,
-            'adv_bn_init': FLAGS.adv_bn_init,
-            'conv_init': tf.variance_scaling_initializer() if FLAGS.adv_conv_init else None
+            "model": FLAGS.model,
+            "decay_steps": decay_steps,
+            "n_classes": 1000,
+            "dtype": tf.float16 if FLAGS.fp16 else tf.float32,
+            "format": "channels_first",
+            "device": "/gpu:0",
+            "lr": FLAGS.lr,
+            "mom": FLAGS.mom,
+            "wdecay": FLAGS.wdecay,
+            "use_larc": FLAGS.use_larc,
+            "leta": FLAGS.leta,
+            "steps": steps,
+            "lr_steps": lr_steps,
+            "lr_decay_mode": FLAGS.lr_decay_mode,
+            "warmup_it": warmup_it,
+            "warmup_lr": FLAGS.warmup_lr,
+            "cdr_first_decay_ratio": FLAGS.cdr_first_decay_ratio,
+            "cdr_t_mul": FLAGS.cdr_t_mul,
+            "cdr_m_mul": FLAGS.cdr_m_mul,
+            "cdr_alpha": FLAGS.cdr_alpha,
+            "lc_periods": FLAGS.lc_periods,
+            "lc_alpha": FLAGS.lc_alpha,
+            "lc_beta": FLAGS.lc_beta,
+            "loss_scale": FLAGS.loss_scale,
+            "adv_bn_init": FLAGS.adv_bn_init,
+            "conv_init": tf.variance_scaling_initializer() if FLAGS.adv_conv_init else None,
         },
         config=tf.estimator.RunConfig(
             # tf_random_seed=31 * (1 + hvd.rank()),
             session_config=config,
             save_summary_steps=FLAGS.save_summary_steps if do_checkpoint else None,
             save_checkpoints_steps=FLAGS.save_checkpoints_steps if do_checkpoint else None,
-            keep_checkpoint_max=None))
+            keep_checkpoint_max=None,
+        ),
+    )
 
     if not FLAGS.eval:
         num_preproc_threads = 5
         rank0log(logger, "Using preprocessing threads per GPU: ", num_preproc_threads)
-        training_hooks = [hvd.BroadcastGlobalVariablesHook(0),
-                          PrefillStagingAreasHook()]
+        training_hooks = [hvd.BroadcastGlobalVariablesHook(0), PrefillStagingAreasHook()]
         if hvd.rank() == 0:
             training_hooks.append(
-                LogSessionRunHook(global_batch_size,
-                                  num_training_samples,
-                                  FLAGS.display_every, logger))
+                LogSessionRunHook(
+                    global_batch_size, num_training_samples, FLAGS.display_every, logger
+                )
+            )
         try:
             start_time = time.time()
             classifier.train(
                 input_fn=lambda: make_dataset(
                     train_filenames,
                     training_samples_per_rank,
-                    FLAGS.batch_size, height, width, 
-                    FLAGS.brightness, FLAGS.contrast, FLAGS.saturation, FLAGS.hue, 
-                    training=True, num_threads=num_preproc_threads, 
-                    shard=True, synthetic=FLAGS.synthetic, increased_aug=FLAGS.increased_aug),
+                    FLAGS.batch_size,
+                    height,
+                    width,
+                    FLAGS.brightness,
+                    FLAGS.contrast,
+                    FLAGS.saturation,
+                    FLAGS.hue,
+                    training=True,
+                    num_threads=num_preproc_threads,
+                    shard=True,
+                    synthetic=FLAGS.synthetic,
+                    increased_aug=FLAGS.increased_aug,
+                ),
                 max_steps=nstep,
-                hooks=training_hooks)
+                hooks=training_hooks,
+            )
             rank0log(logger, "Finished in ", time.time() - start_time)
         except KeyboardInterrupt:
             print("Keyboard interrupt")
@@ -1075,45 +1282,62 @@ def main():
         tf.Session(config=config).run(barrier)
         time.sleep(5)  # a little extra margin...
         if FLAGS.num_gpus == 1:
-            rank0log(logger, """If you are evaluating checkpoints of a multi-GPU run on a single GPU,
+            rank0log(
+                logger,
+                """If you are evaluating checkpoints of a multi-GPU run on a single GPU,
              ensure you set --num_gpus to the number of GPUs it was trained on.
-             This will ensure that the epoch number is accurately displayed in the below logs.""")
+             This will ensure that the epoch number is accurately displayed in the below logs.""",
+            )
         try:
             ckpts = sort_and_load_ckpts(FLAGS.log_dir)
             for i, c in enumerate(ckpts):
                 if i < len(ckpts) - 1:
-                    if (not FLAGS.eval_interval) or \
-                            (i % FLAGS.eval_interval != 0):
+                    if (not FLAGS.eval_interval) or (i % FLAGS.eval_interval != 0):
                         continue
                 eval_result = classifier.evaluate(
                     input_fn=lambda: make_dataset(
                         eval_filenames,
-                        get_num_records(eval_filenames), FLAGS.batch_size,
-                        height, width, 
-                        FLAGS.brightness, FLAGS.contrast, FLAGS.saturation, FLAGS.hue,
-                        training=False, shard=True, increased_aug=False),
-                    checkpoint_path=c['path'])
-                c['epoch'] = math.ceil(c['step'] / (num_training_samples / (FLAGS.batch_size * FLAGS.num_gpus)))
-                c['top1'] = eval_result['val-top1acc']
-                c['top5'] = eval_result['val-top5acc']
-                c['loss'] = eval_result['loss']
-            rank0log(logger, ' step  epoch  top1    top5     loss   checkpoint_time(UTC)')
+                        get_num_records(eval_filenames),
+                        FLAGS.batch_size,
+                        height,
+                        width,
+                        FLAGS.brightness,
+                        FLAGS.contrast,
+                        FLAGS.saturation,
+                        FLAGS.hue,
+                        training=False,
+                        shard=True,
+                        increased_aug=False,
+                    ),
+                    checkpoint_path=c["path"],
+                )
+                c["epoch"] = math.ceil(
+                    c["step"] / (num_training_samples / (FLAGS.batch_size * FLAGS.num_gpus))
+                )
+                c["top1"] = eval_result["val-top1acc"]
+                c["top5"] = eval_result["val-top5acc"]
+                c["loss"] = eval_result["loss"]
+            rank0log(logger, " step  epoch  top1    top5     loss   checkpoint_time(UTC)")
             barrier = hvd.allreduce(tf.constant(0, dtype=tf.float32))
             for i, c in enumerate(ckpts):
                 tf.Session(config=config).run(barrier)
-                if 'top1' not in c:
+                if "top1" not in c:
                     continue
-                rank0log(logger,'{:5d}  {:5.1f}  {:5.3f}  {:6.2f}  {:6.2f}  {time}'
-                         .format(c['step'],
-                                 c['epoch'],
-                                 c['top1'] * 100,
-                                 c['top5'] * 100,
-                                 c['loss'],
-                                 time=time.strftime('%Y-%m-%d %H:%M:%S', 
-                                    time.localtime(c['mtime']))))
+                rank0log(
+                    logger,
+                    "{:5d}  {:5.1f}  {:5.3f}  {:6.2f}  {:6.2f}  {time}".format(
+                        c["step"],
+                        c["epoch"],
+                        c["top1"] * 100,
+                        c["top5"] * 100,
+                        c["loss"],
+                        time=time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(c["mtime"])),
+                    ),
+                )
             rank0log(logger, "Finished evaluation")
         except KeyboardInterrupt:
             logger.error("Keyboard interrupt")
 
-if __name__ == '__main__':
+
+if __name__ == "__main__":
     main()
diff --git a/benchmarks/tf_benchmarks/README.md b/benchmarks/tf_benchmarks/README.md
index e1aecba4..badee3ba 100644
--- a/benchmarks/tf_benchmarks/README.md
+++ b/benchmarks/tf_benchmarks/README.md
@@ -1,9 +1,9 @@
 # TensorFlow benchmarking scripts
 
-This folder contains the TF training scripts https://github.com/tensorflow/benchmarks/tree/master/scripts/tf_cnn_benchmarks.
+This folder contains a copy of [TensorFlow's `tf_cnn_benchmarks.py` script](https://github.com/tensorflow/benchmarks/blob/e3bd1370ba21b02c4d34340934ffb4941977d96f/scripts/tf_cnn_benchmarks/tf_cnn_benchmarks.py).
 
 ## Basic usage
-**execute_tensorflow_training.py train** uses SageMaker python sdk to start a training job. 
+**execute_tensorflow_training.py train** uses SageMaker python sdk to start a training job.
 
 ```bash
 ./execute_tensorflow_training.py train --help
@@ -26,7 +26,7 @@ Options:
   --help                          Show this message and exit.
 
 ```
-**execute_tensorflow_training.py generate_reports** generate benchmark reports. 
+**execute_tensorflow_training.py generate_reports** generate benchmark reports.
 
 ## Examples:
 
diff --git a/benchmarks/tf_benchmarks/benchmarks b/benchmarks/tf_benchmarks/benchmarks
deleted file mode 160000
index ec056be5..00000000
--- a/benchmarks/tf_benchmarks/benchmarks
+++ /dev/null
@@ -1 +0,0 @@
-Subproject commit ec056be57f189ec96611a58e8dc5562a6d620139
diff --git a/benchmarks/tf_benchmarks/execute_tensorflow_training.py b/benchmarks/tf_benchmarks/execute_tensorflow_training.py
index b4f15304..e424638c 100755
--- a/benchmarks/tf_benchmarks/execute_tensorflow_training.py
+++ b/benchmarks/tf_benchmarks/execute_tensorflow_training.py
@@ -11,7 +11,6 @@
 # distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
 # ANY KIND, either express or implied. See the License for the specific
 # language governing permissions and limitations under the License.
-
 from __future__ import absolute_import
 
 import argparse
@@ -26,13 +25,13 @@
 dir_path = os.path.dirname(os.path.realpath(__file__))
 
 _DEFAULT_HYPERPARAMETERS = {
-    'batch_size':           32,
-    'model':                'resnet32',
-    'num_epochs':           10,
-    'data_format':          'NHWC',
-    'summary_verbosity':    1,
-    'save_summaries_steps': 10,
-    'data_name':            'cifar10'
+    "batch_size": 32,
+    "model": "resnet32",
+    "num_epochs": 10,
+    "data_format": "NHWC",
+    "summary_verbosity": 1,
+    "save_summaries_steps": 10,
+    "data_name": "cifar10",
 }
 
 
@@ -44,67 +43,73 @@ class ScriptModeTensorFlow(Framework):
 
     create_model = TensorFlow.create_model
 
-    def __init__(self, py_version='py3', **kwargs):
+    def __init__(self, py_version="py3", **kwargs):
         super(ScriptModeTensorFlow, self).__init__(**kwargs)
         self.py_version = py_version
         self.image_name = None
-        self.framework_version = '1.10.0'
+        self.framework_version = "1.10.0"
 
 
 def get_args():
     parser = argparse.ArgumentParser()
-    parser.add_argument('-t', '--instance-types', nargs='+', help=' Set flag', required=True)
-    parser.add_argument('-r', '--role', required=True)
-    parser.add_argument('-w', '--wait', action='store_true')
-    parser.add_argument('--region', default='us-west-2')
-    parser.add_argument('--py-versions', nargs='+', help=' Set flag', default=['py3'])
-    parser.add_argument('--checkpoint-path',
-                        default=os.path.join(default_bucket(), 'benchmarks', 'checkpoints'),
-                        help='The S3 location where the model checkpoints and tensorboard events are saved after training')
+    parser.add_argument(
+        "-t", "--instance-types", nargs="+", help=" Set flag", required=True
+    )
+    parser.add_argument("-r", "--role", required=True)
+    parser.add_argument("-w", "--wait", action="store_true")
+    parser.add_argument("--region", default="us-west-2")
+    parser.add_argument("--py-versions", nargs="+", help=" Set flag", default=["py3"])
+    parser.add_argument(
+        "--checkpoint-path",
+        default=os.path.join(default_bucket(), "benchmarks", "checkpoints"),
+        help="The S3 location where the model checkpoints and tensorboard events are saved after training",
+    )
 
     return parser.parse_known_args()
 
 
 def main(args, script_args):
     for instance_type, py_version in itertools.product(args.instance_types, args.py_versions):
-        base_name = '%s-%s-%s' % (py_version, instance_type[3:5], instance_type[6:])
+        base_name = "%s-%s-%s" % (py_version, instance_type[3:5], instance_type[6:])
         model_dir = os.path.join(args.checkpoint_path, base_name)
 
         job_hps = create_hyperparameters(model_dir, script_args)
 
-        print('hyperparameters:')
+        print("hyperparameters:")
         print(job_hps)
 
         estimator = ScriptModeTensorFlow(
-            entry_point='tf_cnn_benchmarks.py',
-            role='SageMakerRole',
-            source_dir=os.path.join(dir_path, 'tf_cnn_benchmarks'),
+            entry_point="tf_cnn_benchmarks.py",
+            role="SageMakerRole",
+            source_dir=os.path.join(dir_path, "tf_cnn_benchmarks"),
             base_job_name=base_name,
             train_instance_count=1,
             hyperparameters=job_hps,
             train_instance_type=instance_type,
         )
 
-        input_dir = 's3://sagemaker-sample-data-%s/spark/mnist/train/' % args.region
-        estimator.fit({'train': input_dir}, wait=args.wait)
+        input_dir = "s3://sagemaker-sample-data-%s/spark/mnist/train/" % args.region
+        estimator.fit({"train": input_dir}, wait=args.wait)
 
     print("To use TensorBoard, execute the following command:")
-    cmd = 'S3_USE_HTTPS=0 S3_VERIFY_SSL=0  AWS_REGION=%s tensorboard --host localhost --port 6006 --logdir %s'
+    cmd = "S3_USE_HTTPS=0 S3_VERIFY_SSL=0  AWS_REGION=%s tensorboard --host localhost --port 6006 --logdir %s"
     print(cmd % (args.region, args.checkpoint_path))
 
 
 def create_hyperparameters(model_dir, script_args):
     job_hps = _DEFAULT_HYPERPARAMETERS.copy()
 
-    job_hps.update({'train_dir': model_dir, 'eval_dir': model_dir})
+    job_hps.update({"train_dir": model_dir, "eval_dir": model_dir})
 
-    script_arg_keys_without_dashes = [key[2:] if key.startswith('--') else key[1:] for key in script_args[::2]]
+    script_arg_keys_without_dashes = [
+        key[2:] if key.startswith("--") else key[1:] for key in script_args[::2]
+    ]
     script_arg_values = script_args[1::2]
     job_hps.update(dict(zip(script_arg_keys_without_dashes, script_arg_values)))
 
     return job_hps
 
 
-if __name__ == '__main__':
+if __name__ == "__main__":
     args, script_args = get_args()
-    main(args, script_args)
\ No newline at end of file
+    main(args, script_args)
diff --git a/benchmarks/tf_benchmarks/models b/benchmarks/tf_benchmarks/models
deleted file mode 160000
index bd835e57..00000000
--- a/benchmarks/tf_benchmarks/models
+++ /dev/null
@@ -1 +0,0 @@
-Subproject commit bd835e5794e0833705a645ce74d4fdf8fbac6214
diff --git a/benchmarks/tf_benchmarks/tf_cnn_benchmarks/tf_cnn_benchmarks.py b/benchmarks/tf_benchmarks/tf_cnn_benchmarks/tf_cnn_benchmarks.py
new file mode 100644
index 00000000..c24f5e77
--- /dev/null
+++ b/benchmarks/tf_benchmarks/tf_cnn_benchmarks/tf_cnn_benchmarks.py
@@ -0,0 +1,68 @@
+# Copyright 2019 Amazon.com, Inc. or its affiliates. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"). You
+# may not use this file except in compliance with the License. A copy of
+# the License is located at
+#
+#     http://aws.amazon.com/apache2.0/
+#
+# or in the "license" file accompanying this file. This file is
+# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
+# ANY KIND, either express or implied. See the License for the specific
+# language governing permissions and limitations under the License.
+"""Benchmark script for TensorFlow.
+
+Originally copied from:
+https://github.com/tensorflow/benchmarks/blob/e3bd1370ba21b02c4d34340934ffb4941977d96f/scripts/tf_cnn_benchmarks/tf_cnn_benchmarks.py
+"""
+from __future__ import absolute_import, division, print_function
+
+from absl import app
+from absl import flags as absl_flags
+import tensorflow.compat.v1 as tf
+
+import benchmark_cnn
+import cnn_util
+import flags
+import mlperf
+from cnn_util import log_fn
+
+
+flags.define_flags()
+for name in flags.param_specs.keys():
+    absl_flags.declare_key_flag(name)
+
+absl_flags.DEFINE_boolean(
+    "ml_perf_compliance_logging",
+    False,
+    "Print logs required to be compliant with MLPerf. If set, must clone the "
+    "MLPerf training repo https://github.com/mlperf/training and add "
+    "https://github.com/mlperf/training/tree/master/compliance to the "
+    "PYTHONPATH",
+)
+
+
+def main(positional_arguments):
+    # Command-line arguments like '--distortions False' are equivalent to
+    # '--distortions=True False', where False is a positional argument. To prevent
+    # this from silently running with distortions, we do not allow positional
+    # arguments.
+    assert len(positional_arguments) >= 1
+    if len(positional_arguments) > 1:
+        raise ValueError("Received unknown positional arguments: %s" % positional_arguments[1:])
+
+    params = benchmark_cnn.make_params_from_flags()
+    with mlperf.mlperf_logger(absl_flags.FLAGS.ml_perf_compliance_logging, params.model):
+        params = benchmark_cnn.setup(params)
+        bench = benchmark_cnn.BenchmarkCNN(params)
+
+    tfversion = cnn_util.tensorflow_version_tuple()
+    log_fn("TensorFlow:  %i.%i" % (tfversion[0], tfversion[1]))
+
+    bench.print_info()
+    bench.run()
+
+
+if __name__ == "__main__":
+    tf.disable_v2_behavior()
+    app.run(main)  # Raises error on invalid flags, unlike tf.app.run()
diff --git a/buildspec-container-pr.yml b/buildspec-container-pr.yml
new file mode 100644
index 00000000..c43cb34f
--- /dev/null
+++ b/buildspec-container-pr.yml
@@ -0,0 +1,13 @@
+version: 0.2
+
+phases:
+  pre_build:
+    commands:
+      - PR_NUM=$(echo $CODEBUILD_SOURCE_VERSION | grep -o '[0-9]\+')
+      - echo 'Pull request number:' $PR_NUM '. No value means this build is not from pull request.'
+
+  build:
+    commands:
+
+      - error_cmd="echo 'In order to make changes to the docker files, please, use https://github.com/aws/deep-learning-containers repository.' && exit 1"
+      - execute-command-if-has-matching-changes "$error_cmd" "docker/"
diff --git a/buildspec-release.yml b/buildspec-release.yml
index e2ff7068..f2bd20c6 100644
--- a/buildspec-release.yml
+++ b/buildspec-release.yml
@@ -12,14 +12,14 @@ phases:
       # run unit tests
       - AWS_ACCESS_KEY_ID= AWS_SECRET_ACCESS_KEY= AWS_SESSION_TOKEN=
         AWS_CONTAINER_CREDENTIALS_RELATIVE_URI= AWS_DEFAULT_REGION=
-        tox -e py27,py36 -- test/unit
+        tox -e py27,py36,py37 --parallel all -- test/unit
 
       # run local integ tests
       #- $(aws ecr get-login --no-include-email --region us-west-2)
-      #- IGNORE_COVERAGE=- tox -e py27,py36 -- test/integ/local
+      #- IGNORE_COVERAGE=- tox -e py27,py37 -- test/integ/local
 
       # run sagemaker integ tests
-      #- IGNORE_COVERAGE=- tox -e py27,py36 -- test/integ/sagemaker
+      #- IGNORE_COVERAGE=- tox -e py27,py37 -- test/integ/sagemaker
 
       # generate the distribution package
       - python3 setup.py sdist
diff --git a/buildspec-unit.yml b/buildspec-unit.yml
deleted file mode 100644
index c3412df7..00000000
--- a/buildspec-unit.yml
+++ /dev/null
@@ -1,8 +0,0 @@
-version: 0.2
-
-phases:
-  build:
-    commands:
-    - pip install --upgrade pip --quiet
-    - pip install tox --quiet
-    - tox -e ${TOX_ENVLIST} -- test/unit
\ No newline at end of file
diff --git a/buildspec.yml b/buildspec.yml
index cf0e3e16..f4c4da8a 100644
--- a/buildspec.yml
+++ b/buildspec.yml
@@ -2,9 +2,12 @@ version: 0.2
 
 env:
   variables:
-    FRAMEWORK_VERSION: '1.15.0'
+    FRAMEWORK_VERSION: '1.15.2'
+    CPU_INSTANCE_TYPE: 'ml.c4.xlarge'
+    GPU_INSTANCE_TYPE: 'ml.p2.xlarge'
     ECR_REPO: 'sagemaker-test'
     GITHUB_REPO: 'sagemaker-tensorflow-container'
+    DLC_ACCOUNT: '763104351884'
     SETUP_FILE: 'setup_cmds.sh'
     SETUP_CMDS: '#!/bin/bash\npip install --upgrade pip\npip install -U -e .\npip install -U -e .[test]'
 
@@ -15,110 +18,76 @@ phases:
       - ACCOUNT=$(aws --region $AWS_DEFAULT_REGION sts --endpoint-url https://sts.$AWS_DEFAULT_REGION.amazonaws.com get-caller-identity --query 'Account' --output text)
       - PREPROD_IMAGE="$ACCOUNT.dkr.ecr.$AWS_DEFAULT_REGION.amazonaws.com/$ECR_REPO"
       - PR_NUM=$(echo $CODEBUILD_SOURCE_VERSION | grep -o '[0-9]\+')
+      - BUILD_ID="$(echo $CODEBUILD_BUILD_ID | sed -e 's/:/-/g')"
       - echo 'Pull request number:' $PR_NUM '. No value means this build is not from pull request.'
 
   build:
     commands:
+      - TOX_PARALLEL_NO_SPINNER=1
+      - PY_COLORS=0
+
       # install
-      - pip3 install -U -e .
       - pip3 install -U -e .[test]
 
-      # run flake8
+      # run linters
       - tox -e flake8,twine
 
       # run unit tests
-      - tox -e py36,py27 test/unit
+      - tox -e py27,py36,py37 --parallel all test/unit
+
+      # define tags
+      - GENERIC_TAG="$FRAMEWORK_VERSION-tensorflow-$BUILD_ID"
+      - DLC_CPU_TAG="$FRAMEWORK_VERSION-dlc-cpu-$BUILD_ID"
+      - DLC_GPU_TAG="$FRAMEWORK_VERSION-dlc-gpu-$BUILD_ID"
+
+      # run local CPU integration tests (build and push the image to ECR repo)
+      - test_cmd="pytest test/integration/local --build-image --push-image --dockerfile-type tf --region $AWS_DEFAULT_REGION --docker-base-name $ECR_REPO --account-id $ACCOUNT --framework-version $FRAMEWORK_VERSION --processor cpu --tag $GENERIC_TAG"
+      - execute-command-if-has-matching-changes "$test_cmd" "test/" "src/*.py" "setup.py" "setup.cfg" "buildspec.yml"
+      - test_cmd="pytest test/integration/local --build-image --push-image --dockerfile-type dlc.cpu --region $AWS_DEFAULT_REGION --docker-base-name $ECR_REPO --account-id $ACCOUNT --framework-version $FRAMEWORK_VERSION --processor cpu --tag $DLC_CPU_TAG"
+      - execute-command-if-has-matching-changes "$test_cmd" "test/" "src/*.py" "setup.py" "setup.cfg" "buildspec.yml"
+
+      # launch remote GPU instance
+      - prefix='ml.'
+      - instance_type=${GPU_INSTANCE_TYPE#"$prefix"}
+      - create-key-pair
+      - launch-ec2-instance --instance-type $instance_type --ami-name dlami-ubuntu-latest
 
-      # Create pip archive
-      - root_dir=$(pwd)
-      - build_id="$(echo $CODEBUILD_BUILD_ID | sed -e 's/:/-/g')"
+      # build DLC GPU image because the base DLC image is too big and takes too long to build as part of the test
       - python3 setup.py sdist
-      - tar_name=$(ls dist)
-
-      # Find build artifacts
-      - build_artifacts=$root_dir/docker/artifacts
-
-      # build py2 images
-
-      # prepare build context
-      - build_dir="$root_dir/docker/$FRAMEWORK_VERSION/py2"
-      - cp $root_dir/dist/$tar_name $build_dir
-      - cp $build_artifacts/* $build_dir/
-      - cd $build_dir
-
-      # build cpu image
-      - cpu_dockerfile="Dockerfile.cpu"
-      - CPU_TAG_PY2="$FRAMEWORK_VERSION-cpu-py2-$build_id"
-      - docker build -f $cpu_dockerfile -t $PREPROD_IMAGE:$CPU_TAG_PY2 .
-
-      # build gpu image
-      - gpu_dockerfile="Dockerfile.gpu"
-      - GPU_TAG_PY2="$FRAMEWORK_VERSION-gpu-py2-$build_id"
-      - docker build -f $gpu_dockerfile -t $PREPROD_IMAGE:$GPU_TAG_PY2 .
-
-      # build py3 images
-
-      # prepare build context
-      - build_dir="$root_dir/docker/$FRAMEWORK_VERSION/py3"
-      - cp $root_dir/dist/$tar_name $build_dir
-      - cp $build_artifacts/* $build_dir/
-      - cd $build_dir
-
-      # build cpu image
-      - cpu_dockerfile="Dockerfile.cpu"
-      - CPU_TAG_PY3="$FRAMEWORK_VERSION-cpu-py3-$build_id"
-      - docker build -f $cpu_dockerfile -t $PREPROD_IMAGE:$CPU_TAG_PY3 .
-
-      # build gpu image
-      - gpu_dockerfile="Dockerfile.gpu"
-      - GPU_TAG_PY3="$FRAMEWORK_VERSION-gpu-py3-$build_id"
-      - docker build -f $gpu_dockerfile -t $PREPROD_IMAGE:$GPU_TAG_PY3 .
-
-      # push images to ecr
+      - build_dir="test/container/$FRAMEWORK_VERSION"
+      - $(aws ecr get-login --registry-ids $DLC_ACCOUNT --no-include-email --region $AWS_DEFAULT_REGION)
+      - docker build -f "$build_dir/Dockerfile.dlc.gpu" -t $PREPROD_IMAGE:$DLC_GPU_TAG --build-arg region=$AWS_DEFAULT_REGION .
+      # push DLC GPU image to ECR
       - $(aws ecr get-login --registry-ids $ACCOUNT --no-include-email --region $AWS_DEFAULT_REGION)
-      - docker push $PREPROD_IMAGE:$CPU_TAG_PY2
-      - docker push $PREPROD_IMAGE:$GPU_TAG_PY2
-      - docker push $PREPROD_IMAGE:$CPU_TAG_PY3
-      - docker push $PREPROD_IMAGE:$GPU_TAG_PY3
-
-      # launch remote gpu instance
-      - instance_type='p2.xlarge'
-      - create-key-pair
-      - launch-ec2-instance --instance-type $instance_type --ami-name dlami-ubuntu
+      - docker push $PREPROD_IMAGE:$DLC_GPU_TAG
 
-      # run cpu integration tests
-      - py3_cmd="pytest test/integration/local --region $AWS_DEFAULT_REGION --docker-base-name $PREPROD_IMAGE --tag $CPU_TAG_PY2 --framework-version $FRAMEWORK_VERSION --py-version 2 --processor cpu"
-      - py2_cmd="pytest test/integration/local --region $AWS_DEFAULT_REGION --docker-base-name $PREPROD_IMAGE --tag $CPU_TAG_PY3 --framework-version $FRAMEWORK_VERSION --py-version 3 --processor cpu"
-      - execute-command-if-has-matching-changes "$py3_cmd" "test/" "src/*.py" "setup.py" "docker/*" "buildspec.yml"
-      - execute-command-if-has-matching-changes "$py2_cmd" "test/" "src/*.py" "setup.py" "docker/*" "buildspec.yml"
-
-      # run gpu integration tests
+      # run GPU local integration tests
       - printf "$SETUP_CMDS" > $SETUP_FILE
-      - cmd="pytest test/integration/local --region $AWS_DEFAULT_REGION --docker-base-name $PREPROD_IMAGE --tag $GPU_TAG_PY2 --framework-version $FRAMEWORK_VERSION --py-version 2 --processor gpu"
-      - py3_cmd="remote-test --github-repo $GITHUB_REPO --test-cmd \"$cmd\" --setup-file $SETUP_FILE --pr-number \"$PR_NUM\""
-      - execute-command-if-has-matching-changes "$py3_cmd" "test/" "src/*.py" "setup.py" "docker/*" "buildspec.yml"
-
-      - cmd="pytest test/integration/local --region $AWS_DEFAULT_REGION --docker-base-name $PREPROD_IMAGE --tag $GPU_TAG_PY3 --framework-version $FRAMEWORK_VERSION --py-version 3 --processor gpu"
-      - py2_cmd="remote-test --github-repo $GITHUB_REPO --test-cmd \"$cmd\" --setup-file $SETUP_FILE --pr-number \"$PR_NUM\""
-      - execute-command-if-has-matching-changes "$py2_cmd" "test/" "src/*.py" "setup.py" "docker/*" "buildspec.yml"
-
-      # run sagemaker tests
-      - test_cmd="pytest test/integration/sagemaker -n 8 --region $AWS_DEFAULT_REGION --docker-base-name $ECR_REPO --account-id $ACCOUNT --tag $CPU_TAG_PY2 --py-version 2 --processor cpu"
-      - execute-command-if-has-matching-changes "$test_cmd" "test/" "src/*.py" "setup.py" "docker/*" "buildspec.yml"
-      - test_cmd="pytest test/integration/sagemaker -n 8 --region $AWS_DEFAULT_REGION --docker-base-name $ECR_REPO --account-id $ACCOUNT --tag $GPU_TAG_PY2 --py-version 2 --processor gpu"
-      - execute-command-if-has-matching-changes "$test_cmd" "test/" "src/*.py" "setup.py" "docker/*" "buildspec.yml"
-      - test_cmd="pytest test/integration/sagemaker -n 8 --region $AWS_DEFAULT_REGION --docker-base-name $ECR_REPO --account-id $ACCOUNT --tag $CPU_TAG_PY3 --py-version 3 --processor cpu"
-      - execute-command-if-has-matching-changes "$test_cmd" "test/" "src/*.py" "setup.py" "docker/*" "buildspec.yml"
-      - test_cmd="pytest test/integration/sagemaker -n 8 --region $AWS_DEFAULT_REGION --docker-base-name $ECR_REPO --account-id $ACCOUNT --tag $GPU_TAG_PY3 --py-version 3 --processor gpu"
-      - execute-command-if-has-matching-changes "$test_cmd" "test/" "src/*.py" "setup.py" "docker/*" "buildspec.yml"
-
+      # no reason to rebuild the image again since it was already built and pushed to ECR during CPU tests
+      - generic_cmd="pytest test/integration/local --region $AWS_DEFAULT_REGION --docker-base-name $ECR_REPO --account-id $ACCOUNT --framework-version $FRAMEWORK_VERSION --processor gpu --tag $GENERIC_TAG"
+      - test_cmd="remote-test --github-repo $GITHUB_REPO --test-cmd \"$generic_cmd\" --setup-file $SETUP_FILE --pr-number \"$PR_NUM\""
+      - execute-command-if-has-matching-changes "$test_cmd" "test/" "src/*.py" "setup.py" "setup.cfg" "buildspec.yml"
+      - dlc_cmd="pytest test/integration/local --region $AWS_DEFAULT_REGION --docker-base-name $ECR_REPO --account-id $ACCOUNT --framework-version $FRAMEWORK_VERSION --processor gpu --tag $DLC_GPU_TAG"
+      - test_cmd="remote-test --github-repo $GITHUB_REPO --test-cmd \"$dlc_cmd\" --setup-file $SETUP_FILE --pr-number \"$PR_NUM\" --skip-setup"
+      - execute-command-if-has-matching-changes "$test_cmd" "test/" "src/*.py" "setup.py" "setup.cfg" "buildspec.yml"
+
+      # run CPU sagemaker integration tests
+      - test_cmd="pytest test/integration/sagemaker --region $AWS_DEFAULT_REGION --docker-base-name $ECR_REPO --account-id $ACCOUNT --framework-version $FRAMEWORK_VERSION --processor cpu --instance-type $CPU_INSTANCE_TYPE --tag $GENERIC_TAG"
+      - execute-command-if-has-matching-changes "$test_cmd" "test/" "src/*.py" "setup.py" "setup.cfg" "buildspec.yml"
+      - test_cmd="pytest test/integration/sagemaker --region $AWS_DEFAULT_REGION --docker-base-name $ECR_REPO --account-id $ACCOUNT --framework-version $FRAMEWORK_VERSION --processor cpu --instance-type $CPU_INSTANCE_TYPE --tag $DLC_CPU_TAG"
+      - execute-command-if-has-matching-changes "$test_cmd" "test/" "src/*.py" "setup.py" "setup.cfg" "buildspec.yml"
+
+      # run GPU sagemaker integration tests
+      - test_cmd="pytest test/integration/sagemaker --region $AWS_DEFAULT_REGION --docker-base-name $ECR_REPO --account-id $ACCOUNT --framework-version $FRAMEWORK_VERSION --processor gpu --instance-type $GPU_INSTANCE_TYPE --tag $GENERIC_TAG"
+      - execute-command-if-has-matching-changes "$test_cmd" "test/" "src/*.py" "setup.py" "setup.cfg" "buildspec.yml"
+      - test_cmd="pytest test/integration/sagemaker --region $AWS_DEFAULT_REGION --docker-base-name $ECR_REPO --account-id $ACCOUNT --framework-version $FRAMEWORK_VERSION --processor gpu --instance-type $GPU_INSTANCE_TYPE --tag $DLC_GPU_TAG"
+      - execute-command-if-has-matching-changes "$test_cmd" "test/" "src/*.py" "setup.py" "setup.cfg" "buildspec.yml"
     finally:
-      # shut down remote gpu instance
+      # shut down remote GPU instance
       - cleanup-gpu-instances
       - cleanup-key-pairs
 
-      # remove ecr image
-      - aws ecr batch-delete-image --repository-name $ECR_REPO --region $AWS_DEFAULT_REGION --image-ids imageTag=$CPU_TAG_PY2
-      - aws ecr batch-delete-image --repository-name $ECR_REPO --region $AWS_DEFAULT_REGION --image-ids imageTag=$GPU_TAG_PY2
-      - aws ecr batch-delete-image --repository-name $ECR_REPO --region $AWS_DEFAULT_REGION --image-ids imageTag=$CPU_TAG_PY3
-      - aws ecr batch-delete-image --repository-name $ECR_REPO --region $AWS_DEFAULT_REGION --image-ids imageTag=$GPU_TAG_PY3
+      # remove ECR image
+      - aws ecr batch-delete-image --repository-name $ECR_REPO --region $AWS_DEFAULT_REGION --image-ids imageTag=$GENERIC_TAG
+      - aws ecr batch-delete-image --repository-name $ECR_REPO --region $AWS_DEFAULT_REGION --image-ids imageTag=$DLC_CPU_TAG
+      - aws ecr batch-delete-image --repository-name $ECR_REPO --region $AWS_DEFAULT_REGION --image-ids imageTag=$DLC_GPU_TAG
diff --git a/docker/1.15.2/py2/Dockerfile.cpu b/docker/1.15.2/py2/Dockerfile.cpu
new file mode 100644
index 00000000..7bb9acaa
--- /dev/null
+++ b/docker/1.15.2/py2/Dockerfile.cpu
@@ -0,0 +1,118 @@
+FROM ubuntu:18.04
+
+LABEL maintainer="Amazon AI"
+
+# Prevent docker build get stopped by requesting user interaction
+ENV DEBIAN_FRONTEND=noninteractive
+ENV DEBCONF_NONINTERACTIVE_SEEN=true
+# Set environment variables for MKL
+# https://www.tensorflow.org/performance/performance_guide#tensorflow_with_intel%C2%AE_mkl_dnn
+ENV KMP_AFFINITY=granularity=fine,compact,1,0
+ENV KMP_BLOCKTIME=1
+ENV KMP_SETTINGS=0
+# Python won’t try to write .pyc or .pyo files on the import of source modules
+ENV PYTHONDONTWRITEBYTECODE=1
+ENV PYTHONUNBUFFERED=1
+# See http://bugs.python.org/issue19846
+ENV PYTHONIOENCODING=UTF-8
+ENV LANG=C.UTF-8
+ENV LC_ALL=C.UTF-8
+# Specify the location of module that contains the training logic for SageMaker
+# https://docs.aws.amazon.com/sagemaker/latest/dg/docker-container-environmental-variables-entrypoint.html
+ENV SAGEMAKER_TRAINING_MODULE=sagemaker_tensorflow_container.training:main
+
+# Define framework-related package sources
+ARG TF_URL=https://tensorflow-aws.s3-us-west-2.amazonaws.com/1.15.2/AmazonLinux/cpu/final/tensorflow-1.15.2-cp27-cp27mu-manylinux2010_x86_64.whl
+
+RUN apt-get update \
+ && apt-get install -y --no-install-recommends \
+    software-properties-common \
+    build-essential \
+    openssh-client \
+    openssh-server \
+    ca-certificates \
+    curl \
+    git \
+    wget \
+    vim \
+    zlib1g-dev \
+ && rm -rf /var/lib/apt/lists/*
+
+# Install Open MPI
+RUN mkdir /tmp/openmpi \
+ && cd /tmp/openmpi \
+ && curl -fSsL -O https://download.open-mpi.org/release/open-mpi/v4.0/openmpi-4.0.1.tar.gz \
+ && tar zxf openmpi-4.0.1.tar.gz \
+ && cd openmpi-4.0.1 \
+ && ./configure --enable-orterun-prefix-by-default \
+ && make -j $(nproc) all \
+ && make install \
+ && ldconfig \
+ && rm -rf /tmp/openmpi
+
+# Create a wrapper for OpenMPI to allow running as root by default
+RUN mv /usr/local/bin/mpirun /usr/local/bin/mpirun.real \
+ && echo '#!/bin/bash' > /usr/local/bin/mpirun \
+ && echo 'mpirun.real --allow-run-as-root "$@"' >> /usr/local/bin/mpirun \
+ && chmod a+x /usr/local/bin/mpirun
+
+RUN echo "hwloc_base_binding_policy = none" >> /usr/local/etc/openmpi-mca-params.conf \
+ && echo "rmaps_base_mapping_policy = slot" >> /usr/local/etc/openmpi-mca-params.conf
+
+ENV LD_LIBRARY_PATH=/usr/local/openmpi/lib:$LD_LIBRARY_PATH
+ENV PATH=/usr/local/openmpi/bin/:$PATH
+
+# SSH login fix. Otherwise user is kicked off after login
+RUN sed 's@session\s*required\s*pam_loginuid.so@session optional pam_loginuid.so@g' -i /etc/pam.d/sshd
+
+# Create SSH key.
+RUN mkdir -p /root/.ssh/ \
+ && mkdir -p /var/run/sshd \
+ && ssh-keygen -q -t rsa -N '' -f /root/.ssh/id_rsa \
+ && cp /root/.ssh/id_rsa.pub /root/.ssh/authorized_keys \
+ && printf "Host *\n  StrictHostKeyChecking no\n" >> /root/.ssh/config
+
+WORKDIR /
+
+RUN apt-get update \
+ && apt-get install -y \
+    python \
+    python-pip
+
+RUN pip --no-cache-dir install --upgrade \
+    pip \
+    setuptools
+
+# Some TF tools expect a "python" binary
+RUN ln -s $(which python) /usr/local/bin/python
+
+RUN pip install --no-cache-dir -U \
+    numpy==1.16.5 \
+    scipy==1.2.2 \
+    scikit-learn==0.20.3 \
+    pandas==0.24.2 \
+    Pillow==6.2.2 \
+    h5py==2.9.0 \
+    keras_applications==1.0.8 \
+    keras_preprocessing==1.1.0 \
+    requests==2.22.0 \
+    keras==2.3.1 \
+    mpi4py==3.0.2 \
+    "cryptography>=2.3" \
+    "sagemaker-tensorflow>=1.15,<1.16" \
+    "sagemaker-tensorflow-training>=2,<3" \
+    # Let's install TensorFlow separately in the end to avoid the library version to be overwritten
+ && pip install --force-reinstall --no-cache-dir -U \
+    ${TF_URL} \
+ && pip install --no-cache-dir -U \
+    awscli \
+ && pip install --no-cache-dir -U \
+    horovod==0.18.2
+
+ADD https://raw.githubusercontent.com/aws/aws-deep-learning-containers-utils/master/deep_learning_container.py /usr/local/bin/deep_learning_container.py
+
+RUN chmod +x /usr/local/bin/deep_learning_container.py
+
+RUN curl https://aws-dlc-licenses.s3.amazonaws.com/tensorflow/license.txt -o /license.txt
+
+CMD ["bin/bash"]
diff --git a/docker/1.15.2/py2/Dockerfile.gpu b/docker/1.15.2/py2/Dockerfile.gpu
new file mode 100644
index 00000000..35686af5
--- /dev/null
+++ b/docker/1.15.2/py2/Dockerfile.gpu
@@ -0,0 +1,160 @@
+# Nvidia does not publish a TensorRT Runtime library for Ubuntu 18.04 with Cuda 10.1 support, so we stick with cuda 10.0.
+# https://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu1804/x86_64/
+FROM nvidia/cuda:10.0-base-ubuntu18.04
+
+LABEL maintainer="Amazon AI"
+
+# Prevent docker build get stopped by requesting user interaction
+ENV DEBIAN_FRONTEND=noninteractive
+ENV DEBCONF_NONINTERACTIVE_SEEN=true
+# Python won’t try to write .pyc or .pyo files on the import of source modules
+ENV PYTHONDONTWRITEBYTECODE=1
+ENV PYTHONUNBUFFERED=1
+# See http://bugs.python.org/issue19846
+ENV PYTHONIOENCODING=UTF-8
+ENV LANG=C.UTF-8
+ENV LC_ALL=C.UTF-8
+# Specify the location of module that contains the training logic for SageMaker
+# https://docs.aws.amazon.com/sagemaker/latest/dg/docker-container-environmental-variables-entrypoint.html
+ENV SAGEMAKER_TRAINING_MODULE=sagemaker_tensorflow_container.training:main
+
+# Define framework-related package sources
+ARG TF_URL=https://tensorflow-aws.s3-us-west-2.amazonaws.com/1.15.2/AmazonLinux/gpu/final/tensorflow_gpu-1.15.2-cp27-cp27mu-manylinux2010_x86_64.whl
+
+RUN apt-get update \
+ && apt-get install -y --no-install-recommends --allow-unauthenticated \
+    ca-certificates \
+    cuda-command-line-tools-10-0 \
+    cuda-cublas-dev-10-0 \
+    cuda-cudart-dev-10-0 \
+    cuda-cufft-dev-10-0 \
+    cuda-curand-dev-10-0 \
+    cuda-cusolver-dev-10-0 \
+    cuda-cusparse-dev-10-0 \
+    curl \
+    libcudnn7=7.5.1.10-1+cuda10.0 \
+    # TensorFlow doesn't require libnccl anymore but Open MPI still depends on it
+    libnccl2=2.4.7-1+cuda10.0 \
+    libgomp1 \
+    libnccl-dev=2.4.7-1+cuda10.0 \
+    libfreetype6-dev \
+    libhdf5-serial-dev \
+    libpng-dev \
+    libzmq3-dev \
+    git \
+    wget \
+    vim \
+    build-essential \
+    openssh-client \
+    openssh-server \
+    zlib1g-dev \
+    # The 'apt-get install' of nvinfer-runtime-trt-repo-ubuntu1804-5.0.2-ga-cuda10.0
+    # adds a new list which contains libnvinfer library, so it needs another
+    # 'apt-get update' to retrieve that list before it can actually install the library.
+    # We don't install libnvinfer-dev since we don't need to build against TensorRT,
+    # and libnvinfer4 doesn't contain libnvinfer.a static library.
+ && apt-get update \
+ && apt-get install -y --no-install-recommends --allow-unauthenticated  \
+    nvinfer-runtime-trt-repo-ubuntu1804-5.0.2-ga-cuda10.0 \
+ && apt-get update \
+ && apt-get install -y --no-install-recommends --allow-unauthenticated  \
+    libnvinfer5=5.0.2-1+cuda10.0 \
+ && rm /usr/lib/x86_64-linux-gnu/libnvinfer_plugin* \
+ && rm /usr/lib/x86_64-linux-gnu/libnvcaffe_parser* \
+ && rm /usr/lib/x86_64-linux-gnu/libnvparsers* \
+ && rm -rf /var/lib/apt/lists/* \
+ && mkdir -p /var/run/sshd
+
+# Install Open MPI
+RUN mkdir /tmp/openmpi \
+ && cd /tmp/openmpi \
+ && curl -fSsL -O https://download.open-mpi.org/release/open-mpi/v4.0/openmpi-4.0.1.tar.gz \
+ && tar zxf openmpi-4.0.1.tar.gz \
+ && cd openmpi-4.0.1 \
+ && ./configure --enable-orterun-prefix-by-default \
+ && make -j $(nproc) all \
+ && make install \
+ && ldconfig \
+ && rm -rf /tmp/openmpi
+
+RUN apt-get update \
+ && apt-get install -y \
+    python \
+    python-pip
+
+# Create a wrapper for OpenMPI to allow running as root by default
+RUN mv /usr/local/bin/mpirun /usr/local/bin/mpirun.real \
+ && echo '#!/bin/bash' > /usr/local/bin/mpirun \
+ && echo 'mpirun.real --allow-run-as-root "$@"' >> /usr/local/bin/mpirun \
+ && chmod a+x /usr/local/bin/mpirun
+
+# Configure OpenMPI to run good defaults:
+#   --bind-to none --map-by slot --mca btl_tcp_if_exclude lo,docker0
+RUN echo "hwloc_base_binding_policy = none" >> /usr/local/etc/openmpi-mca-params.conf \
+ && echo "rmaps_base_mapping_policy = slot" >> /usr/local/etc/openmpi-mca-params.conf
+
+# Set default NCCL parameters
+RUN echo NCCL_DEBUG=INFO >> /etc/nccl.conf
+
+ENV LD_LIBRARY_PATH=/usr/local/openmpi/lib:$LD_LIBRARY_PATH
+ENV PATH /usr/local/openmpi/bin/:$PATH
+ENV PATH=/usr/local/nvidia/bin:$PATH
+
+# SSH login fix. Otherwise user is kicked off after login
+RUN mkdir -p /var/run/sshd \
+ && sed 's@session\s*required\s*pam_loginuid.so@session optional pam_loginuid.so@g' -i /etc/pam.d/sshd
+
+# Create SSH key.
+RUN mkdir -p /root/.ssh/ \
+ && ssh-keygen -q -t rsa -N '' -f /root/.ssh/id_rsa \
+ && cp /root/.ssh/id_rsa.pub /root/.ssh/authorized_keys \
+ && printf "Host *\n  StrictHostKeyChecking no\n" >> /root/.ssh/config
+
+WORKDIR /
+
+RUN pip --no-cache-dir install --upgrade \
+    pip \
+    setuptools
+
+# Some TF tools expect a "python" binary
+RUN ln -s $(which python) /usr/local/bin/python
+
+RUN pip install --no-cache-dir -U \
+    numpy==1.16.5 \
+    scipy==1.2.2 \
+    scikit-learn==0.20.3 \
+    pandas==0.24.2 \
+    Pillow==6.2.2 \
+    h5py==2.9.0 \
+    keras_applications==1.0.8 \
+    keras_preprocessing==1.1.0 \
+    requests==2.22.0 \
+    keras==2.3.1 \
+    mpi4py==3.0.2 \
+    "cryptography>=2.3" \
+    "sagemaker-tensorflow>=1.15,<1.16" \
+    "sagemaker-tensorflow-training>=2,<3" \
+    # Let's install TensorFlow separately in the end to avoid the library version to be overwritten
+ && pip install --force-reinstall --no-cache-dir -U \
+    ${TF_URL} \
+ && pip install --no-cache-dir -U \
+    awscli
+
+# Install Horovod, temporarily using CUDA stubs
+RUN ldconfig /usr/local/cuda/targets/x86_64-linux/lib/stubs \
+ && HOROVOD_GPU_ALLREDUCE=NCCL HOROVOD_WITH_TENSORFLOW=1 pip install --no-cache-dir \
+    horovod==0.18.2 \
+ && ldconfig
+
+# Allow OpenSSH to talk to containers without asking for confirmation
+RUN cat /etc/ssh/ssh_config | grep -v StrictHostKeyChecking > /etc/ssh/ssh_config.new \
+ && echo "    StrictHostKeyChecking no" >> /etc/ssh/ssh_config.new \
+ && mv /etc/ssh/ssh_config.new /etc/ssh/ssh_config
+
+ADD https://raw.githubusercontent.com/aws/aws-deep-learning-containers-utils/master/deep_learning_container.py /usr/local/bin/deep_learning_container.py
+
+RUN chmod +x /usr/local/bin/deep_learning_container.py
+
+RUN curl https://aws-dlc-licenses.s3.amazonaws.com/tensorflow/license.txt -o /license.txt
+
+CMD ["bin/bash"]
diff --git a/docker/1.15.2/py3/Dockerfile.cpu b/docker/1.15.2/py3/Dockerfile.cpu
new file mode 100644
index 00000000..667a3edf
--- /dev/null
+++ b/docker/1.15.2/py3/Dockerfile.cpu
@@ -0,0 +1,121 @@
+FROM ubuntu:18.04
+
+LABEL maintainer="Amazon AI"
+
+# Prevent docker build get stopped by requesting user interaction
+ENV DEBIAN_FRONTEND=noninteractive
+ENV DEBCONF_NONINTERACTIVE_SEEN=true
+# Set environment variables for MKL
+# https://www.tensorflow.org/performance/performance_guide#tensorflow_with_intel%C2%AE_mkl_dnn
+ENV KMP_AFFINITY=granularity=fine,compact,1,0
+ENV KMP_BLOCKTIME=1
+ENV KMP_SETTINGS=0
+# Python won’t try to write .pyc or .pyo files on the import of source modules
+ENV PYTHONDONTWRITEBYTECODE=1
+ENV PYTHONUNBUFFERED=1
+# See http://bugs.python.org/issue19846
+ENV PYTHONIOENCODING=UTF-8
+ENV LANG=C.UTF-8
+ENV LC_ALL=C.UTF-8
+# Specify the location of module that contains the training logic for SageMaker
+# https://docs.aws.amazon.com/sagemaker/latest/dg/docker-container-environmental-variables-entrypoint.html
+ENV SAGEMAKER_TRAINING_MODULE=sagemaker_tensorflow_container.training:main
+
+# Define framework-related package sources
+ARG TF_URL=https://tensorflow-aws.s3-us-west-2.amazonaws.com/1.15.2/AmazonLinux/cpu/final/tensorflow-1.15.2-cp36-cp36m-manylinux2010_x86_64.whl
+
+RUN apt-get update \
+ && apt-get install -y --no-install-recommends \
+    python3-dev \
+    python3-pip \
+    python3-setuptools \
+    software-properties-common \
+    build-essential \
+    openssh-client \
+    openssh-server \
+    ca-certificates \
+    curl \
+    git \
+    wget \
+    vim \
+    zlib1g-dev \
+ && rm -rf /var/lib/apt/lists/*
+
+# Install Open MPI
+RUN mkdir /tmp/openmpi \
+ && cd /tmp/openmpi \
+ && curl -fSsL -O https://download.open-mpi.org/release/open-mpi/v4.0/openmpi-4.0.1.tar.gz \
+ && tar zxf openmpi-4.0.1.tar.gz \
+ && cd openmpi-4.0.1 \
+ && ./configure --enable-orterun-prefix-by-default \
+ && make -j $(nproc) all \
+ && make install \
+ && ldconfig \
+ && rm -rf /tmp/openmpi
+
+# Create a wrapper for OpenMPI to allow running as root by default
+RUN mv /usr/local/bin/mpirun /usr/local/bin/mpirun.real \
+ && echo '#!/bin/bash' > /usr/local/bin/mpirun \
+ && echo 'mpirun.real --allow-run-as-root "$@"' >> /usr/local/bin/mpirun \
+ && chmod a+x /usr/local/bin/mpirun
+
+RUN echo "hwloc_base_binding_policy = none" >> /usr/local/etc/openmpi-mca-params.conf \
+ && echo "rmaps_base_mapping_policy = slot" >> /usr/local/etc/openmpi-mca-params.conf
+
+ENV LD_LIBRARY_PATH=/usr/local/openmpi/lib:$LD_LIBRARY_PATH
+ENV PATH=/usr/local/openmpi/bin/:$PATH
+
+# SSH login fix. Otherwise user is kicked off after login
+RUN sed 's@session\s*required\s*pam_loginuid.so@session optional pam_loginuid.so@g' -i /etc/pam.d/sshd
+
+# Create SSH key.
+RUN mkdir -p /root/.ssh/ \
+ && mkdir -p /var/run/sshd \
+ && ssh-keygen -q -t rsa -N '' -f /root/.ssh/id_rsa \
+ && cp /root/.ssh/id_rsa.pub /root/.ssh/authorized_keys \
+ && printf "Host *\n  StrictHostKeyChecking no\n" >> /root/.ssh/config
+
+WORKDIR /
+
+RUN pip3 --no-cache-dir install --upgrade \
+    pip \
+    setuptools
+
+# Some TF tools expect a "python" binary
+RUN ln -s $(which python3) /usr/local/bin/python \
+ && ln -s $(which pip3) /usr/bin/pip
+
+RUN pip install --no-cache-dir -U \
+    numpy==1.17.4 \
+    scipy==1.2.2 \
+    scikit-learn==0.20.3 \
+    pandas==0.24.2 \
+    Pillow==7.0.0 \
+    h5py==2.9.0 \
+    keras_applications==1.0.8 \
+    keras_preprocessing==1.1.0 \
+    keras==2.3.1 \
+    requests==2.22.0 \
+    smdebug==0.7.2 \
+    sagemaker==1.50.17 \
+    sagemaker-experiments==0.1.7 \
+    mpi4py==3.0.2 \
+    "cryptography>=2.3" \
+    "sagemaker-tensorflow>=1.15,<1.16" \
+    "sagemaker-tensorflow-training>=2,<3" \
+    # Let's install TensorFlow separately in the end to avoid
+    # the library version to be overwritten
+ && pip install --force-reinstall --no-cache-dir -U \
+    ${TF_URL} \
+ && pip install --force-reinstall --no-cache-dir -U \
+    horovod==0.18.2 \
+ && pip install --no-cache-dir -U \
+    awscli
+
+ADD https://raw.githubusercontent.com/aws/aws-deep-learning-containers-utils/master/deep_learning_container.py /usr/local/bin/deep_learning_container.py
+
+RUN chmod +x /usr/local/bin/deep_learning_container.py
+
+RUN curl https://aws-dlc-licenses.s3.amazonaws.com/tensorflow/license.txt -o /license.txt
+
+CMD ["bin/bash"]
diff --git a/docker/1.15.2/py3/Dockerfile.gpu b/docker/1.15.2/py3/Dockerfile.gpu
new file mode 100644
index 00000000..56b5df5b
--- /dev/null
+++ b/docker/1.15.2/py3/Dockerfile.gpu
@@ -0,0 +1,167 @@
+# Nvidia does not publish a TensorRT Runtime library for Ubuntu 18.04 with Cuda 10.1 support, so we stick with cuda 10.0.
+# https://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu1804/x86_64/
+FROM nvidia/cuda:10.0-base-ubuntu18.04
+
+LABEL maintainer="Amazon AI"
+
+# Prevent docker build get stopped by requesting user interaction
+ENV DEBIAN_FRONTEND=noninteractive
+ENV DEBCONF_NONINTERACTIVE_SEEN=true
+# Python won’t try to write .pyc or .pyo files on the import of source modules
+ENV PYTHONDONTWRITEBYTECODE=1
+ENV PYTHONUNBUFFERED=1
+# See http://bugs.python.org/issue19846
+ENV PYTHONIOENCODING=UTF-8
+ENV LANG=C.UTF-8
+ENV LC_ALL=C.UTF-8
+# Specify the location of module that contains the training logic for SageMaker
+# https://docs.aws.amazon.com/sagemaker/latest/dg/docker-container-environmental-variables-entrypoint.html
+ENV SAGEMAKER_TRAINING_MODULE=sagemaker_tensorflow_container.training:main
+
+# Define framework-related package sources
+ARG TF_URL=https://tensorflow-aws.s3-us-west-2.amazonaws.com/1.15.2/AmazonLinux/gpu/final/tensorflow_gpu-1.15.2-cp36-cp36m-manylinux2010_x86_64.whl
+
+RUN apt-get update \
+ && apt-get install -y --no-install-recommends --allow-unauthenticated \
+    python3-dev \
+    python3-pip \
+    python3-setuptools \
+    python3-dev \
+    ca-certificates \
+    cuda-command-line-tools-10-0 \
+    cuda-cublas-dev-10-0 \
+    cuda-cudart-dev-10-0 \
+    cuda-cufft-dev-10-0 \
+    cuda-curand-dev-10-0 \
+    cuda-cusolver-dev-10-0 \
+    cuda-cusparse-dev-10-0 \
+    curl \
+    libcudnn7=7.5.1.10-1+cuda10.0 \
+    # TensorFlow doesn't require libnccl anymore but Open MPI still depends on it
+    libnccl2=2.4.7-1+cuda10.0 \
+    libgomp1 \
+    libnccl-dev=2.4.7-1+cuda10.0 \
+    libfreetype6-dev \
+    libhdf5-serial-dev \
+    libpng-dev \
+    libzmq3-dev \
+    git \
+    wget \
+    vim \
+    build-essential \
+    openssh-client \
+    openssh-server \
+    zlib1g-dev \
+    # The 'apt-get install' of nvinfer-runtime-trt-repo-ubuntu1804-5.0.2-ga-cuda10.0
+    # adds a new list which contains libnvinfer library, so it needs another
+    # 'apt-get update' to retrieve that list before it can actually install the
+    # library.
+    # We don't install libnvinfer-dev since we don't need to build against TensorRT,
+    # and libnvinfer4 doesn't contain libnvinfer.a static library.
+ && apt-get update && apt-get install -y --no-install-recommends --allow-unauthenticated  \
+    nvinfer-runtime-trt-repo-ubuntu1804-5.0.2-ga-cuda10.0 \
+ && apt-get update && apt-get install -y --no-install-recommends --allow-unauthenticated  \
+    libnvinfer5=5.0.2-1+cuda10.0 \
+ && rm /usr/lib/x86_64-linux-gnu/libnvinfer_plugin* \
+ && rm /usr/lib/x86_64-linux-gnu/libnvcaffe_parser* \
+ && rm /usr/lib/x86_64-linux-gnu/libnvparsers* \
+ && rm -rf /var/lib/apt/lists/* \
+ && mkdir -p /var/run/sshd
+
+###########################################################################
+# Horovod & its dependencies
+###########################################################################
+
+# Install Open MPI
+RUN mkdir /tmp/openmpi \
+ && cd /tmp/openmpi \
+ && curl -fSsL -O https://download.open-mpi.org/release/open-mpi/v4.0/openmpi-4.0.1.tar.gz \
+ && tar zxf openmpi-4.0.1.tar.gz \
+ && cd openmpi-4.0.1 \
+ && ./configure --enable-orterun-prefix-by-default \
+ && make -j $(nproc) all \
+ && make install \
+ && ldconfig \
+ && rm -rf /tmp/openmpi
+
+# Create a wrapper for OpenMPI to allow running as root by default
+RUN mv /usr/local/bin/mpirun /usr/local/bin/mpirun.real \
+ && echo '#!/bin/bash' > /usr/local/bin/mpirun \
+ && echo 'mpirun.real --allow-run-as-root "$@"' >> /usr/local/bin/mpirun \
+ && chmod a+x /usr/local/bin/mpirun
+
+# Configure OpenMPI to run good defaults:
+#   --bind-to none --map-by slot --mca btl_tcp_if_exclude lo,docker0
+RUN echo "hwloc_base_binding_policy = none" >> /usr/local/etc/openmpi-mca-params.conf \
+ && echo "rmaps_base_mapping_policy = slot" >> /usr/local/etc/openmpi-mca-params.conf
+
+# Set default NCCL parameters
+RUN echo NCCL_DEBUG=INFO >> /etc/nccl.conf
+
+ENV LD_LIBRARY_PATH=/usr/local/openmpi/lib:$LD_LIBRARY_PATH
+ENV PATH=/usr/local/openmpi/bin/:$PATH
+ENV PATH=/usr/local/nvidia/bin:$PATH
+
+# SSH login fix. Otherwise user is kicked off after login
+RUN mkdir -p /var/run/sshd \
+ && sed 's@session\s*required\s*pam_loginuid.so@session optional pam_loginuid.so@g' -i /etc/pam.d/sshd
+
+# Create SSH key.
+RUN mkdir -p /root/.ssh/ \
+ && ssh-keygen -q -t rsa -N '' -f /root/.ssh/id_rsa \
+ && cp /root/.ssh/id_rsa.pub /root/.ssh/authorized_keys \
+ && printf "Host *\n  StrictHostKeyChecking no\n" >> /root/.ssh/config
+
+WORKDIR /
+
+RUN pip3 --no-cache-dir install --upgrade \
+    pip \
+    setuptools
+
+# Some TF tools expect a "python" binary
+RUN ln -s $(which python3) /usr/local/bin/python \
+ && ln -s $(which pip3) /usr/bin/pip
+
+RUN pip install --no-cache-dir -U \
+    numpy==1.17.4 \
+    scipy==1.2.2 \
+    scikit-learn==0.20.3 \
+    pandas==0.24.2 \
+    Pillow==7.0.0 \
+    h5py==2.9.0 \
+    keras_applications==1.0.8 \
+    keras_preprocessing==1.1.0 \
+    requests==2.22.0 \
+    keras==2.3.1 \
+    smdebug==0.7.2 \
+    sagemaker==1.50.17 \
+    sagemaker-experiments==0.1.7 \
+    mpi4py==3.0.2 \
+    "cryptography>=2.3" \
+    "sagemaker-tensorflow>=1.15,<1.16" \
+    "sagemaker-tensorflow-training>=2,<3" \
+    # Let's install TensorFlow separately in the end to avoid
+    # the library version to be overwritten
+ && pip install --force-reinstall --no-cache-dir -U \
+    ${TF_URL} \
+ && pip install --no-cache-dir -U \
+    awscli
+
+# Install Horovod, temporarily using CUDA stubs
+RUN ldconfig /usr/local/cuda-10.0/targets/x86_64-linux/lib/stubs \
+ && HOROVOD_GPU_ALLREDUCE=NCCL HOROVOD_WITH_TENSORFLOW=1 pip install --no-cache-dir \
+    horovod==0.18.2 \
+ && ldconfig
+
+# Allow OpenSSH to talk to containers without asking for confirmation
+RUN cat /etc/ssh/ssh_config | grep -v StrictHostKeyChecking > /etc/ssh/ssh_config.new \
+ && echo "    StrictHostKeyChecking no" >> /etc/ssh/ssh_config.new \
+ && mv /etc/ssh/ssh_config.new /etc/ssh/ssh_config
+
+ADD https://raw.githubusercontent.com/aws/aws-deep-learning-containers-utils/master/deep_learning_container.py /usr/local/bin/deep_learning_container.py
+
+RUN chmod +x /usr/local/bin/deep_learning_container.py
+
+RUN curl https://aws-dlc-licenses.s3.amazonaws.com/tensorflow/license.txt -o /license.txt
+
+CMD ["bin/bash"]
diff --git a/docker/1.15.2/py37/Dockerfile.cpu b/docker/1.15.2/py37/Dockerfile.cpu
new file mode 100644
index 00000000..e46ea361
--- /dev/null
+++ b/docker/1.15.2/py37/Dockerfile.cpu
@@ -0,0 +1,138 @@
+FROM ubuntu:18.04
+
+LABEL maintainer="Amazon AI"
+
+# Prevent docker build get stopped by requesting user interaction
+ENV DEBIAN_FRONTEND=noninteractive
+ENV DEBCONF_NONINTERACTIVE_SEEN=true
+# Set environment variables for MKL
+# https://www.tensorflow.org/performance/performance_guide#tensorflow_with_intel%C2%AE_mkl_dnn
+ENV KMP_AFFINITY=granularity=fine,compact,1,0
+ENV KMP_BLOCKTIME=1
+ENV KMP_SETTINGS=0
+# Python won’t try to write .pyc or .pyo files on the import of source modules
+ENV PYTHONDONTWRITEBYTECODE=1
+ENV PYTHONUNBUFFERED=1
+# See http://bugs.python.org/issue19846
+ENV PYTHONIOENCODING=UTF-8
+ENV LANG=C.UTF-8
+ENV LC_ALL=C.UTF-8
+# Specify the location of module that contains the training logic for SageMaker
+# https://docs.aws.amazon.com/sagemaker/latest/dg/docker-container-environmental-variables-entrypoint.html
+ENV SAGEMAKER_TRAINING_MODULE=sagemaker_tensorflow_container.training:main
+
+# Define framework-related package sources
+ARG TF_URL=https://tensorflow-aws.s3-us-west-2.amazonaws.com/1.15.2/AmazonLinux/cpu/final/tensorflow_cpu-1.15.2-cp37-cp37m-manylinux2010_x86_64.whl
+ARG PYTHON=python3
+ARG PYTHON_PIP=python3-pip
+ARG PIP=pip3
+ARG PYTHON_VERSION=3.7.7
+
+RUN apt-get update \
+ && apt-get install -y --no-install-recommends \
+    build-essential \
+    ca-certificates \
+    curl \
+    git \
+    openssh-client \
+    openssh-server \
+    vim \
+    wget \
+    zlib1g-dev \
+ && rm -rf /var/lib/apt/lists/* \
+ && apt-get clean
+
+# Install Open MPI
+RUN mkdir /tmp/openmpi \
+ && cd /tmp/openmpi \
+ && curl -fSsL -O https://download.open-mpi.org/release/open-mpi/v4.0/openmpi-4.0.1.tar.gz \
+ && tar zxf openmpi-4.0.1.tar.gz \
+ && cd openmpi-4.0.1 \
+ && ./configure --enable-orterun-prefix-by-default \
+ && make -j $(nproc) all \
+ && make install \
+ && ldconfig \
+ && rm -rf /tmp/openmpi
+
+# Create a wrapper for OpenMPI to allow running as root by default
+RUN mv /usr/local/bin/mpirun /usr/local/bin/mpirun.real \
+ && echo '#!/bin/bash' > /usr/local/bin/mpirun \
+ && echo 'mpirun.real --allow-run-as-root "$@"' >> /usr/local/bin/mpirun \
+ && chmod a+x /usr/local/bin/mpirun
+
+RUN echo "hwloc_base_binding_policy = none" >> /usr/local/etc/openmpi-mca-params.conf \
+ && echo "rmaps_base_mapping_policy = slot" >> /usr/local/etc/openmpi-mca-params.conf
+
+ENV LD_LIBRARY_PATH=/usr/local/openmpi/lib:$LD_LIBRARY_PATH
+ENV PATH=/usr/local/openmpi/bin/:$PATH
+
+# SSH login fix. Otherwise user is kicked off after login
+RUN sed 's@session\s*required\s*pam_loginuid.so@session optional pam_loginuid.so@g' -i /etc/pam.d/sshd
+
+# Create SSH key.
+RUN mkdir -p /root/.ssh/ \
+ && mkdir -p /var/run/sshd \
+ && ssh-keygen -q -t rsa -N '' -f /root/.ssh/id_rsa \
+ && cp /root/.ssh/id_rsa.pub /root/.ssh/authorized_keys \
+ && printf "Host *\n  StrictHostKeyChecking no\n" >> /root/.ssh/config
+
+WORKDIR /
+
+RUN apt-get update \
+ && apt-get install -y --no-install-recommends \
+    libbz2-dev \
+    libc6-dev \
+    libffi-dev \
+    libgdbm-dev \
+    libncursesw5-dev \
+    libreadline-gplv2-dev \
+    libsqlite3-dev \
+    libssl-dev \
+    tk-dev \
+    && rm -rf /var/lib/apt/lists/* \
+    && apt-get clean
+
+RUN wget https://www.python.org/ftp/python/$PYTHON_VERSION/Python-$PYTHON_VERSION.tgz \
+ && tar -xvf Python-$PYTHON_VERSION.tgz \
+ && cd Python-$PYTHON_VERSION \
+ && ./configure && make && make install \
+ && make && make install && rm -rf ../Python-$PYTHON_VERSION*
+
+RUN ${PIP} --no-cache-dir install --upgrade \
+    pip \
+    setuptools
+
+# Some TF tools expect a "python" binary
+RUN ln -s $(which python3) /usr/local/bin/python \
+ && ln -s $(which pip3) /usr/bin/pip
+
+RUN ${PIP} install --no-cache-dir -U \
+    numpy==1.17.4 \
+    scipy==1.2.2 \
+    scikit-learn==0.20.3 \
+    pandas==0.24.2 \
+    Pillow==7.0.0 \
+    h5py==2.10.0 \
+    requests==2.22.0 \
+    smdebug==0.7.2 \
+    sagemaker-experiments==0.1.7 \
+    mpi4py==3.0.2 \
+    "cryptography>=2.3" \
+    "sagemaker-tensorflow>=1.15,<1.16" \
+    sagemaker-tensorflow-training==10.1.0 \
+    # Let's install TensorFlow separately in the end to avoid
+    # the library version to be overwritten
+ && ${PIP} install --force-reinstall --no-cache-dir -U \
+    ${TF_URL} \
+ && ${PIP} install --force-reinstall --no-cache-dir -U \
+    horovod==0.18.2 \
+ && ${PIP} install --no-cache-dir -U \
+    awscli
+
+ADD https://raw.githubusercontent.com/aws/aws-deep-learning-containers-utils/master/deep_learning_container.py /usr/local/bin/deep_learning_container.py
+
+RUN chmod +x /usr/local/bin/deep_learning_container.py
+
+RUN curl https://aws-dlc-licenses.s3.amazonaws.com/tensorflow/license.txt -o /license.txt
+
+CMD ["bin/bash"]
diff --git a/docker/1.15.2/py37/Dockerfile.gpu b/docker/1.15.2/py37/Dockerfile.gpu
new file mode 100644
index 00000000..aefc97ab
--- /dev/null
+++ b/docker/1.15.2/py37/Dockerfile.gpu
@@ -0,0 +1,184 @@
+# Nvidia does not publish a TensorRT Runtime library for Ubuntu 18.04 with Cuda 10.1 support, so we stick with cuda 10.0.
+# https://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu1804/x86_64/
+FROM nvidia/cuda:10.0-base-ubuntu18.04
+
+LABEL maintainer="Amazon AI"
+
+# Prevent docker build get stopped by requesting user interaction
+ENV DEBIAN_FRONTEND=noninteractive
+ENV DEBCONF_NONINTERACTIVE_SEEN=true
+# Python won’t try to write .pyc or .pyo files on the import of source modules
+ENV PYTHONDONTWRITEBYTECODE=1
+ENV PYTHONUNBUFFERED=1
+# See http://bugs.python.org/issue19846
+ENV PYTHONIOENCODING=UTF-8
+ENV LANG=C.UTF-8
+ENV LC_ALL=C.UTF-8
+# Specify the location of module that contains the training logic for SageMaker
+# https://docs.aws.amazon.com/sagemaker/latest/dg/docker-container-environmental-variables-entrypoint.html
+ENV SAGEMAKER_TRAINING_MODULE=sagemaker_tensorflow_container.training:main
+
+# Define framework-related package sources
+ARG TF_URL=https://tensorflow-aws.s3-us-west-2.amazonaws.com/1.15.2/AmazonLinux/gpu/final/tensorflow_gpu-1.15.2-cp37-cp37m-manylinux2010_x86_64.whl
+ARG PYTHON=python3
+ARG PYTHON_PIP=python3-pip
+ARG PIP=pip3
+ARG PYTHON_VERSION=3.7.7
+
+RUN apt-get update \
+ && apt-get install -y --no-install-recommends --allow-unauthenticated \
+    build-essential \
+    ca-certificates \
+    cuda-command-line-tools-10-0 \
+    cuda-cublas-dev-10-0 \
+    cuda-cudart-dev-10-0 \
+    cuda-cufft-dev-10-0 \
+    cuda-curand-dev-10-0 \
+    cuda-cusolver-dev-10-0 \
+    cuda-cusparse-dev-10-0 \
+    curl \
+    libcudnn7=7.5.1.10-1+cuda10.0 \
+    # TensorFlow doesn't require libnccl anymore but Open MPI still depends on it
+    libnccl2=2.4.7-1+cuda10.0 \
+    libgomp1 \
+    libnccl-dev=2.4.7-1+cuda10.0 \
+    libfreetype6-dev \
+    libhdf5-serial-dev \
+    libpng-dev \
+    libzmq3-dev \
+    git \
+    wget \
+    vim \
+    openssh-client \
+    openssh-server \
+    zlib1g-dev \
+    # The 'apt-get install' of nvinfer-runtime-trt-repo-ubuntu1804-5.0.2-ga-cuda10.0
+    # adds a new list which contains libnvinfer library, so it needs another
+    # 'apt-get update' to retrieve that list before it can actually install the
+    # library.
+    # We don't install libnvinfer-dev since we don't need to build against TensorRT,
+    # and libnvinfer4 doesn't contain libnvinfer.a static library.
+ && apt-get update && apt-get install -y --no-install-recommends --allow-unauthenticated  \
+    nvinfer-runtime-trt-repo-ubuntu1804-5.0.2-ga-cuda10.0 \
+ && apt-get update && apt-get install -y --no-install-recommends --allow-unauthenticated  \
+    libnvinfer5=5.0.2-1+cuda10.0 \
+ && rm /usr/lib/x86_64-linux-gnu/libnvinfer_plugin* \
+ && rm /usr/lib/x86_64-linux-gnu/libnvcaffe_parser* \
+ && rm /usr/lib/x86_64-linux-gnu/libnvparsers* \
+ && rm -rf /var/lib/apt/lists/* \
+ && mkdir -p /var/run/sshd
+
+###########################################################################
+# Horovod & its dependencies
+###########################################################################
+
+# Install Open MPI
+RUN mkdir /tmp/openmpi \
+ && cd /tmp/openmpi \
+ && curl -fSsL -O https://download.open-mpi.org/release/open-mpi/v4.0/openmpi-4.0.1.tar.gz \
+ && tar zxf openmpi-4.0.1.tar.gz \
+ && cd openmpi-4.0.1 \
+ && ./configure --enable-orterun-prefix-by-default \
+ && make -j $(nproc) all \
+ && make install \
+ && ldconfig \
+ && rm -rf /tmp/openmpi
+
+# Create a wrapper for OpenMPI to allow running as root by default
+RUN mv /usr/local/bin/mpirun /usr/local/bin/mpirun.real \
+ && echo '#!/bin/bash' > /usr/local/bin/mpirun \
+ && echo 'mpirun.real --allow-run-as-root "$@"' >> /usr/local/bin/mpirun \
+ && chmod a+x /usr/local/bin/mpirun
+
+# Configure OpenMPI to run good defaults:
+#   --bind-to none --map-by slot --mca btl_tcp_if_exclude lo,docker0
+RUN echo "hwloc_base_binding_policy = none" >> /usr/local/etc/openmpi-mca-params.conf \
+ && echo "rmaps_base_mapping_policy = slot" >> /usr/local/etc/openmpi-mca-params.conf
+
+# Set default NCCL parameters
+RUN echo NCCL_DEBUG=INFO >> /etc/nccl.conf
+
+ENV LD_LIBRARY_PATH=/usr/local/openmpi/lib:$LD_LIBRARY_PATH
+ENV PATH=/usr/local/openmpi/bin/:$PATH
+ENV PATH=/usr/local/nvidia/bin:$PATH
+
+
+# SSH login fix. Otherwise user is kicked off after login
+RUN mkdir -p /var/run/sshd \
+ && sed 's@session\s*required\s*pam_loginuid.so@session optional pam_loginuid.so@g' -i /etc/pam.d/sshd
+
+# Create SSH key.
+RUN mkdir -p /root/.ssh/ \
+ && ssh-keygen -q -t rsa -N '' -f /root/.ssh/id_rsa \
+ && cp /root/.ssh/id_rsa.pub /root/.ssh/authorized_keys \
+ && printf "Host *\n  StrictHostKeyChecking no\n" >> /root/.ssh/config
+
+WORKDIR /
+
+RUN apt-get update \
+ && apt-get install -y --no-install-recommends \
+    libbz2-dev \
+    libc6-dev \
+    libffi-dev \
+    libgdbm-dev \
+    libncursesw5-dev \
+    libreadline-gplv2-dev \
+    libsqlite3-dev \
+    libssl-dev \
+    tk-dev \
+    && rm -rf /var/lib/apt/lists/* \
+    && apt-get clean
+
+RUN wget https://www.python.org/ftp/python/$PYTHON_VERSION/Python-$PYTHON_VERSION.tgz \
+ && tar -xvf Python-$PYTHON_VERSION.tgz \
+ && cd Python-$PYTHON_VERSION \
+ && ./configure && make && make install \
+ && make && make install && rm -rf ../Python-$PYTHON_VERSION*
+
+RUN ${PIP} --no-cache-dir install --upgrade \
+    pip \
+    setuptools
+
+# Some TF tools expect a "python" binary
+RUN ln -s $(which python3) /usr/local/bin/python \
+ && ln -s $(which pip3) /usr/bin/pip
+
+RUN ${PIP} install --no-cache-dir -U \
+    numpy==1.17.4 \
+    scipy==1.2.2 \
+    scikit-learn==0.20.3 \
+    pandas==0.24.2 \
+    Pillow==7.0.0 \
+    h5py==2.10.0 \
+    requests==2.22.0 \
+    smdebug==0.7.2 \
+    sagemaker-experiments==0.1.7 \
+    mpi4py==3.0.2 \
+    "cryptography>=2.3" \
+    "sagemaker-tensorflow>=1.15,<1.16" \
+    sagemaker-tensorflow-training==10.1.0 \
+    # Let's install TensorFlow separately in the end to avoid
+    # the library version to be overwritten
+ && ${PIP} install --force-reinstall --no-cache-dir -U \
+    ${TF_URL} \
+ && ${PIP} install --no-cache-dir -U \
+    awscli
+
+# Install Horovod, temporarily using CUDA stubs
+RUN ldconfig /usr/local/cuda-10.0/targets/x86_64-linux/lib/stubs \
+ && HOROVOD_GPU_ALLREDUCE=NCCL HOROVOD_WITH_TENSORFLOW=1 pip install --no-cache-dir \
+    horovod==0.18.2 \
+ && ldconfig
+
+# Allow OpenSSH to talk to containers without asking for confirmation
+RUN cat /etc/ssh/ssh_config | grep -v StrictHostKeyChecking > /etc/ssh/ssh_config.new \
+ && echo "    StrictHostKeyChecking no" >> /etc/ssh/ssh_config.new \
+ && mv /etc/ssh/ssh_config.new /etc/ssh/ssh_config
+
+ADD https://raw.githubusercontent.com/aws/aws-deep-learning-containers-utils/master/deep_learning_container.py /usr/local/bin/deep_learning_container.py
+
+RUN chmod +x /usr/local/bin/deep_learning_container.py
+
+RUN curl https://aws-dlc-licenses.s3.amazonaws.com/tensorflow/license.txt -o /license.txt
+
+CMD ["bin/bash"]
diff --git a/pytest.ini b/pytest.ini
new file mode 100644
index 00000000..4c5649dc
--- /dev/null
+++ b/pytest.ini
@@ -0,0 +1,5 @@
+[pytest]
+markers =
+    deploy_test
+    skip_cpu
+    skip_gpu
diff --git a/scripts/build_all.py b/scripts/build_all.py
deleted file mode 100644
index 9f340d5d..00000000
--- a/scripts/build_all.py
+++ /dev/null
@@ -1,85 +0,0 @@
-# Copyright 2019-2020 Amazon.com, Inc. or its affiliates. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License"). You
-# may not use this file except in compliance with the License. A copy of
-# the License is located at
-#
-#     http://aws.amazon.com/apache2.0/
-#
-# or in the "license" file accompanying this file. This file is
-# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
-# ANY KIND, either express or implied. See the License for the specific
-# language governing permissions and limitations under the License.
-from __future__ import absolute_import
-
-import argparse
-import os
-import subprocess
-
-VERSION = '1.13.1'
-REPO = 'sagemaker-tensorflow-scriptmode'
-PY2_CPU_BINARY = 'https://s3-us-west-2.amazonaws.com/tensorflow-aws/1.13/AmazonLinux/cpu/latest-patch-latest-patch/tensorflow-1.13.1-cp27-cp27mu-linux_x86_64.whl' # noqa
-PY3_CPU_BINARY = 'https://s3-us-west-2.amazonaws.com/tensorflow-aws/1.13/AmazonLinux/cpu/latest-patch-latest-patch/tensorflow-1.13.1-cp36-cp36m-linux_x86_64.whl' # noqa
-PY2_GPU_BINARY = 'https://s3-us-west-2.amazonaws.com/tensorflow-aws/1.13/AmazonLinux/gpu/latest-patch-latest-patch/tensorflow-1.13.1-cp27-cp27mu-linux_x86_64.whl' # noqa
-PY3_GPU_BINARY = 'https://s3-us-west-2.amazonaws.com/tensorflow-aws/1.13/AmazonLinux/gpu/latest-patch-latest-patch/tensorflow-1.13.1-cp36-cp36m-linux_x86_64.whl' # noqa
-DEV_ACCOUNT = '142577830533'
-REGION = 'us-west-2'
-
-
-def _parse_args():
-
-    parser = argparse.ArgumentParser()
-
-    parser.add_argument('--account', type=str, default=DEV_ACCOUNT)
-    parser.add_argument('--region', type=str, default=REGION)
-    parser.add_argument('--version', type=str, default=VERSION)
-    parser.add_argument('--py2-cpu-binary', type=str, default=PY2_CPU_BINARY)
-    parser.add_argument('--py3-cpu-binary', type=str, default=PY3_CPU_BINARY)
-    parser.add_argument('--py2-gpu-binary', type=str, default=PY2_GPU_BINARY)
-    parser.add_argument('--py3-gpu-binary', type=str, default=PY3_GPU_BINARY)
-    parser.add_argument('--repo', type=str, default=REPO)
-
-    return parser.parse_args()
-
-
-args = _parse_args()
-binaries = {
-    'py2-cpu': args.py2_cpu_binary,
-    'py3-cpu': args.py3_cpu_binary,
-    'py2-gpu': args.py2_gpu_binary,
-    'py3-gpu': args.py3_gpu_binary
-}
-build_dir = os.path.join('docker', args.version)
-
-# Run docker-login so we can pull the cached image
-login_cmd = subprocess.check_output(
-    'aws ecr get-login --no-include-email --registry-id {}'.format(args.account).split())
-print('Executing docker login command: '.format(login_cmd))
-subprocess.check_call(login_cmd.split())
-
-for arch in ['cpu', 'gpu']:
-    for py_version in ['2', '3']:
-
-        binary_url = binaries['py{}-{}'.format(py_version, arch)]
-        binary_file = os.path.basename(binary_url)
-        cmd = 'wget -O {}/{} {}'.format(build_dir, binary_file, binary_url)
-        print('Downloading binary file: {}'.format(cmd))
-        subprocess.check_call(cmd.split())
-
-        tag = '{}-{}-py{}'.format(args.version, arch, py_version)
-        prev_image_uri = '{}.dkr.ecr.{}.amazonaws.com/{}:{}'.format(args.account, args.region, args.repo, tag)
-        dockerfile = os.path.join(build_dir, 'Dockerfile.{}'.format(arch))
-
-        tar_file_name = subprocess.check_output('ls {}/sagemaker_tensorflow_container*'.format(build_dir),
-                                                shell=True).strip().decode('ascii')
-        print('framework_support_installable is {}'.format(os.path.basename(tar_file_name)))
-
-        build_cmd = 'docker build -f {} --cache-from {} --build-arg framework_support_installable={} ' \
-                    '--build-arg py_version={} --build-arg framework_installable={} ' \
-                    '-t {}:{} {}'.format(dockerfile, prev_image_uri, os.path.basename(tar_file_name), py_version,
-                                         binary_file, args.repo, tag, build_dir)
-        print('Building docker image: {}'.format(build_cmd))
-        subprocess.check_call(build_cmd.split())
-
-        print('Deleting binary file {}'.format(binary_file))
-        subprocess.check_call('rm {}'.format(os.path.join(build_dir, binary_file)).split())
diff --git a/scripts/publish_all.py b/scripts/publish_all.py
deleted file mode 100644
index 2c78e8a7..00000000
--- a/scripts/publish_all.py
+++ /dev/null
@@ -1,52 +0,0 @@
-# Copyright 2019-2020 Amazon.com, Inc. or its affiliates. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License"). You
-# may not use this file except in compliance with the License. A copy of
-# the License is located at
-#
-#     http://aws.amazon.com/apache2.0/
-#
-# or in the "license" file accompanying this file. This file is
-# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
-# ANY KIND, either express or implied. See the License for the specific
-# language governing permissions and limitations under the License.
-from __future__ import absolute_import
-
-import argparse
-import subprocess
-
-DEV_ACCOUNT = '142577830533'
-VERSION = '1.13.1'
-REGION = 'us-west-2'
-REPO = 'sagemaker-tensorflow-scriptmode'
-
-
-def _parse_args():
-
-    parser = argparse.ArgumentParser()
-
-    parser.add_argument('--account', type=str, default=DEV_ACCOUNT)
-    parser.add_argument('--version', type=str, default=VERSION)
-    parser.add_argument('--repo', type=str, default=REPO)
-    parser.add_argument('--region', type=str, default=REGION)
-
-    return parser.parse_args()
-
-
-args = _parse_args()
-
-for arch in ['cpu', 'gpu']:
-    for py_version in ['2', '3']:
-        source = '{}:{}-{}-py{}'.format(args.repo, args.version, arch, py_version)
-        dest = '{}.dkr.ecr.{}.amazonaws.com/{}'.format(args.account, args.region, source)
-        tag_cmd = 'docker tag {} {}'.format(source, dest)
-        print('Tagging image: {}'.format(tag_cmd))
-        subprocess.check_call(tag_cmd.split())
-        login_cmd = subprocess.check_output(
-            'aws ecr get-login --no-include-email --registry-id {} --region {}'
-            .format(args.account, args.region).split())
-        print('Executing docker login command: {}'.format(login_cmd))
-        subprocess.check_call(login_cmd.split())
-        push_cmd = 'docker push {}'.format(dest)
-        print('Pushing image: {}'.format(push_cmd))
-        subprocess.check_call(push_cmd.split())
diff --git a/setup.py b/setup.py
index 983ebd13..67cfbe56 100644
--- a/setup.py
+++ b/setup.py
@@ -16,6 +16,7 @@
 import os
 from os.path import basename
 from os.path import splitext
+import sys
 
 from setuptools import find_packages, setup
 
@@ -25,41 +26,60 @@ def read(fname):
 
 
 def read_version():
-    return read('VERSION').strip()
+    return read("VERSION").strip()
 
 
-setup(
-    name='sagemaker_tensorflow_training',
-    version=read_version(),
-    description='Open source library for creating '
-                'TensorFlow containers to run on Amazon SageMaker.',
+test_dependencies = [
+    "tox",
+    "flake8",
+    "pytest",
+    "pytest-cov",
+    "pytest-xdist",
+    "mock",
+    "sagemaker==1.50.1",
+    "tensorflow<2.0",
+    "docker-compose",
+    "boto3==1.10.50",
+    "six==1.13.0",
+    "python-dateutil>=2.1,<2.8.1",
+    "botocore==1.13.50",
+    "requests-mock",
+    "awscli>=1.16.314",
+]
 
-    packages=find_packages(where='src', exclude=('test',)),
-    package_dir={'': 'src'},
-    py_modules=[splitext(basename(path))[0] for path in glob('src/*.py')],
-
-    long_description=read('README.rst'),
-    author='Amazon Web Services',
-    url='https://github.com/aws/sagemaker-tensorflow-containers',
-    license='Apache License 2.0',
+if sys.version_info.major > 2:
+    test_dependencies.append("sagemaker-experiments==0.1.7")
 
+setup(
+    name="sagemaker_tensorflow_training",
+    version=read_version(),
+    description="Open source library for creating "
+    "TensorFlow containers to run on Amazon SageMaker.",
+    packages=find_packages(where="src", exclude=("test",)),
+    package_dir={"": "src"},
+    py_modules=[splitext(basename(path))[0] for path in glob("src/*.py")],
+    long_description=read("README.rst"),
+    author="Amazon Web Services",
+    url="https://github.com/aws/sagemaker-tensorflow-containers",
+    license="Apache License 2.0",
     classifiers=[
         "Development Status :: 5 - Production/Stable",
         "Intended Audience :: Developers",
         "Natural Language :: English",
         "License :: OSI Approved :: Apache Software License",
         "Programming Language :: Python",
-        'Programming Language :: Python :: 2.7',
-        'Programming Language :: Python :: 3.6',
+        "Programming Language :: Python :: 2.7",
+        "Programming Language :: Python :: 3.6",
+        "Programming Language :: Python :: 3.7",
     ],
-
-    install_requires=['sagemaker-containers>=2.6.2', 'numpy', 'scipy', 'sklearn',
-                      'pandas', 'Pillow', 'h5py'],
-    extras_require={
-        'test': ['tox', 'flake8', 'pytest', 'pytest-cov', 'pytest-xdist', 'mock',
-                 'sagemaker==1.50.1', 'tensorflow<2.0', 'docker-compose', 'boto3==1.10.50',
-                 'six==1.13.0', 'python-dateutil>=2.1,<2.8.1', 'botocore==1.13.50',
-                 'requests-mock', 'awscli==1.16.314'],
-        'benchmark': ['click']
-    },
+    install_requires=[
+        "sagemaker-training>=4.3.0,<5.2.0",
+        "numpy",
+        "scipy",
+        "sklearn",
+        "pandas",
+        "Pillow",
+        "h5py",
+    ],
+    extras_require={"test": test_dependencies, "benchmark": ["click"]},
 )
diff --git a/src/sagemaker_tensorflow_container/s3_utils.py b/src/sagemaker_tensorflow_container/s3_utils.py
index 0137ef25..15902c55 100644
--- a/src/sagemaker_tensorflow_container/s3_utils.py
+++ b/src/sagemaker_tensorflow_container/s3_utils.py
@@ -20,23 +20,23 @@
 
 def configure(model_dir, job_region):
 
-    os.environ['S3_REGION'] = _s3_region(job_region, model_dir)
+    os.environ["S3_REGION"] = _s3_region(job_region, model_dir)
 
     # setting log level to WARNING
-    os.environ['TF_CPP_MIN_LOG_LEVEL'] = '1'
-    os.environ['S3_USE_HTTPS'] = '1'
+    os.environ["TF_CPP_MIN_LOG_LEVEL"] = "1"
+    os.environ["S3_USE_HTTPS"] = "1"
 
 
 def _s3_region(job_region, model_dir):
-    if model_dir and model_dir.startswith('s3://'):
-        s3 = boto3.client('s3', region_name=job_region)
+    if model_dir and model_dir.startswith("s3://"):
+        s3 = boto3.client("s3", region_name=job_region)
 
         # We get the AWS region of the checkpoint bucket, which may be different from
         # the region this container is currently running in.
         parsed_url = urlparse(model_dir)
         bucket_name = parsed_url.netloc
 
-        bucket_location = s3.get_bucket_location(Bucket=bucket_name)['LocationConstraint']
+        bucket_location = s3.get_bucket_location(Bucket=bucket_name)["LocationConstraint"]
 
         return bucket_location or job_region
     else:
diff --git a/src/sagemaker_tensorflow_container/training.py b/src/sagemaker_tensorflow_container/training.py
index bce6a69c..f71db52a 100644
--- a/src/sagemaker_tensorflow_container/training.py
+++ b/src/sagemaker_tensorflow_container/training.py
@@ -19,15 +19,15 @@
 import subprocess
 import time
 
-import sagemaker_containers.beta.framework as framework
+from sagemaker_training import entry_point, environment, mapping, runner
 import tensorflow as tf
 
 from sagemaker_tensorflow_container import s3_utils
 
 logger = logging.getLogger(__name__)
 
-SAGEMAKER_PARAMETER_SERVER_ENABLED = 'sagemaker_parameter_server_enabled'
-MODEL_DIR = '/opt/ml/model'
+SAGEMAKER_PARAMETER_SERVER_ENABLED = "sagemaker_parameter_server_enabled"
+MODEL_DIR = "/opt/ml/model"
 
 
 def _is_host_master(hosts, current_host):
@@ -56,50 +56,46 @@ def _build_tf_config(hosts, current_host, ps_task=False):
     ps = hosts if len(hosts) > 1 else None
 
     def host_addresses(hosts, port=2222):
-        return ['{}:{}'.format(host, port) for host in hosts]
+        return ["{}:{}".format(host, port) for host in hosts]
 
-    tf_config = {
-        'cluster': {
-            'master': host_addresses(masters)
-        },
-        'environment': 'cloud'
-    }
+    tf_config = {"cluster": {"master": host_addresses(masters)}, "environment": "cloud"}
 
     if ps:
-        tf_config['cluster']['ps'] = host_addresses(ps, port='2223')
+        tf_config["cluster"]["ps"] = host_addresses(ps, port="2223")
 
     if workers:
-        tf_config['cluster']['worker'] = host_addresses(workers)
+        tf_config["cluster"]["worker"] = host_addresses(workers)
 
     if ps_task:
         if ps is None:
             raise ValueError(
-                'Cannot have a ps task if there are no parameter servers in the cluster')
-        task_type = 'ps'
+                "Cannot have a ps task if there are no parameter servers in the cluster"
+            )
+        task_type = "ps"
         task_index = ps.index(current_host)
     elif _is_host_master(hosts, current_host):
-        task_type = 'master'
+        task_type = "master"
         task_index = 0
     else:
-        task_type = 'worker'
+        task_type = "worker"
         task_index = workers.index(current_host)
 
-    tf_config['task'] = {'index': task_index, 'type': task_type}
+    tf_config["task"] = {"index": task_index, "type": task_type}
     return tf_config
 
 
 def _run_ps(env, cluster):
-    logger.info('Running distributed training job with parameter servers')
+    logger.info("Running distributed training job with parameter servers")
 
     cluster_spec = tf.train.ClusterSpec(cluster)
     task_index = env.hosts.index(env.current_host)
     # Force parameter server to run on cpu. Running multiple TensorFlow processes on the same
     # GPU is not safe:
     # https://stackoverflow.com/questions/46145100/is-it-unsafe-to-run-multiple-tensorflow-processes-on-the-same-gpu
-    no_gpu_config = tf.ConfigProto(device_count={'GPU': 0})
+    no_gpu_config = tf.ConfigProto(device_count={"GPU": 0})
 
     server = tf.train.Server(
-        cluster_spec, job_name='ps', task_index=task_index, config=no_gpu_config
+        cluster_spec, job_name="ps", task_index=task_index, config=no_gpu_config
     )
 
     multiprocessing.Process(target=lambda: server.join()).start()
@@ -107,20 +103,27 @@ def _run_ps(env, cluster):
 
 def _run_worker(env, cmd_args, tf_config):
     env_vars = env.to_env_vars()
-    env_vars['TF_CONFIG'] = json.dumps(tf_config)
-
-    framework.entry_point.run(env.module_dir, env.user_entry_point, cmd_args, env_vars)
+    env_vars["TF_CONFIG"] = json.dumps(tf_config)
+
+    entry_point.run(
+        uri=env.module_dir,
+        user_entry_point=env.user_entry_point,
+        args=cmd_args,
+        env_vars=env_vars,
+        capture_error=True,
+    )
 
 
 def _wait_until_master_is_down(master):
     while True:
         try:
             subprocess.check_call(
-                ['curl', '{}:2222'.format(master)], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
-            logger.info('master {} is still up, waiting for it to exit'.format(master))
+                ["curl", "{}:2222".format(master)], stdout=subprocess.PIPE, stderr=subprocess.PIPE
+            )
+            logger.info("master {} is still up, waiting for it to exit".format(master))
             time.sleep(10)
         except subprocess.CalledProcessError:
-            logger.info('master {} is down, stopping parameter server'.format(master))
+            logger.info("master {} is down, stopping parameter server".format(master))
             return
 
 
@@ -128,18 +131,19 @@ def train(env, cmd_args):
     """Get training job environment from env and run the training job.
 
     Args:
-        env (sagemaker_containers.beta.framework.env.TrainingEnv): Instance of TrainingEnv class
+        env (sagemaker_training.env.TrainingEnv): Instance of TrainingEnv class
     """
     parameter_server_enabled = env.additional_framework_parameters.get(
-        SAGEMAKER_PARAMETER_SERVER_ENABLED, False)
+        SAGEMAKER_PARAMETER_SERVER_ENABLED, False
+    )
     if len(env.hosts) > 1 and parameter_server_enabled:
 
         tf_config = _build_tf_config(hosts=env.hosts, current_host=env.current_host)
 
-        logger.info('Running distributed training job with parameter servers')
-        logger.info('Launching parameter server process')
-        _run_ps(env, tf_config['cluster'])
-        logger.info('Launching worker process')
+        logger.info("Running distributed training job with parameter servers")
+        logger.info("Launching parameter server process")
+        _run_ps(env, tf_config["cluster"])
+        logger.info("Launching worker process")
         _run_worker(env, cmd_args, tf_config)
 
         if not _is_host_master(env.hosts, env.current_host):
@@ -147,15 +151,21 @@ def train(env, cmd_args):
 
     else:
 
-        mpi_enabled = env.additional_framework_parameters.get('sagemaker_mpi_enabled')
+        mpi_enabled = env.additional_framework_parameters.get("sagemaker_mpi_enabled")
 
         if mpi_enabled:
-            runner_type = framework.runner.MPIRunnerType
+            runner_type = runner.MPIRunnerType
         else:
-            runner_type = framework.runner.ProcessRunnerType
+            runner_type = runner.ProcessRunnerType
 
-        framework.entry_point.run(env.module_dir, env.user_entry_point, cmd_args, env.to_env_vars(),
-                                  runner=runner_type)
+        entry_point.run(
+            uri=env.module_dir,
+            user_entry_point=env.user_entry_point,
+            args=cmd_args,
+            env_vars=env.to_env_vars(),
+            capture_error=True,
+            runner_type=runner_type,
+        )
 
 
 def _log_model_missing_warning(model_dir):
@@ -165,48 +175,56 @@ def _log_model_missing_warning(model_dir):
         if filenames:
             file_exists = True
         for f in filenames:
-            if 'saved_model.pb' in f or 'saved_model.pbtxt' in f:
+            if "saved_model.pb" in f or "saved_model.pbtxt" in f:
                 pb_file_exists = True
                 path, direct_parent_dir = os.path.split(dirpath)
                 if not str.isdigit(direct_parent_dir):
-                    logger.warn('Your model will NOT be servable with SageMaker TensorFlow Serving containers. '
-                                'The SavedModel bundle is under directory \"{}\", not a numeric name.'
-                                .format(direct_parent_dir))
+                    logger.warn(
+                        "Your model will NOT be servable with SageMaker TensorFlow Serving containers. "
+                        'The SavedModel bundle is under directory "{}", not a numeric name.'.format(
+                            direct_parent_dir
+                        )
+                    )
 
     if not file_exists:
-        logger.warn('No model artifact is saved under path {}.'
-                    ' Your training job will not save any model files to S3.\n'
-                    'For details of how to construct your training script see:\n'
-                    'https://sagemaker.readthedocs.io/en/stable/using_tf.html#adapting-your-local-tensorflow-script'
-                    .format(model_dir))
+        logger.warn(
+            "No model artifact is saved under path {}."
+            " Your training job will not save any model files to S3.\n"
+            "For details of how to construct your training script see:\n"
+            "https://sagemaker.readthedocs.io/en/stable/using_tf.html#adapting-your-local-tensorflow-script".format(
+                model_dir
+            )
+        )
     elif not pb_file_exists:
-        logger.warn('Your model will NOT be servable with SageMaker TensorFlow Serving container. '
-                    'The model artifact was not saved in the TensorFlow SavedModel directory structure:\n'
-                    'https://www.tensorflow.org/guide/saved_model#structure_of_a_savedmodel_directory')
+        logger.warn(
+            "Your model will NOT be servable with SageMaker TensorFlow Serving container. "
+            "The model artifact was not saved in the TensorFlow SavedModel directory structure:\n"
+            "https://www.tensorflow.org/guide/saved_model#structure_of_a_savedmodel_directory"
+        )
 
 
 def _model_dir_with_training_job(model_dir, job_name):
-    if model_dir.startswith('/opt/ml'):
+    if model_dir and model_dir.startswith("/opt/ml"):
         return model_dir
     else:
-        return '{}/{}/model'.format(model_dir, job_name)
+        return "{}/{}/model".format(model_dir, job_name)
 
 
 def main():
     """Training entry point
     """
-    hyperparameters = framework.env.read_hyperparameters()
-    env = framework.training_env(hyperparameters=hyperparameters)
+    hyperparameters = environment.read_hyperparameters()
+    env = environment.Environment(hyperparameters=hyperparameters)
 
     user_hyperparameters = env.hyperparameters
 
     # If the training job is part of the multiple training jobs for tuning, we need to append the training job name to
     # model_dir in case they read from/write to the same object
-    if '_tuning_objective_metric' in hyperparameters:
-        model_dir = _model_dir_with_training_job(hyperparameters.get('model_dir'), env.job_name)
-        logger.info('Appending the training job name to model_dir: {}'.format(model_dir))
-        user_hyperparameters['model_dir'] = model_dir
+    if "_tuning_objective_metric" in hyperparameters:
+        model_dir = _model_dir_with_training_job(hyperparameters.get("model_dir"), env.job_name)
+        logger.info("Appending the training job name to model_dir: {}".format(model_dir))
+        user_hyperparameters["model_dir"] = model_dir
 
-    s3_utils.configure(user_hyperparameters.get('model_dir'), os.environ.get('SAGEMAKER_REGION'))
-    train(env, framework.mapping.to_cmd_args(user_hyperparameters))
+    s3_utils.configure(user_hyperparameters.get("model_dir"), os.environ.get("SAGEMAKER_REGION"))
+    train(env, mapping.to_cmd_args(user_hyperparameters))
     _log_model_missing_warning(MODEL_DIR)
diff --git a/test/__init__.py b/test/__init__.py
deleted file mode 100644
index 57862f92..00000000
--- a/test/__init__.py
+++ /dev/null
@@ -1,13 +0,0 @@
-#  Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved.
-#
-#  Licensed under the Apache License, Version 2.0 (the "License").
-#  You may not use this file except in compliance with the License.
-#  A copy of the License is located at
-#
-#      http://www.apache.org/licenses/LICENSE-2.0
-#
-#  or in the "license" file accompanying this file. This file is distributed
-#  on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
-#  express or implied. See the License for the specific language governing
-#  permissions and limitations under the License.
-from __future__ import absolute_import
diff --git a/test/conftest.py b/test/conftest.py
new file mode 100644
index 00000000..56d58673
--- /dev/null
+++ b/test/conftest.py
@@ -0,0 +1,197 @@
+#  Copyright 2018-2019 Amazon.com, Inc. or its affiliates. All Rights Reserved.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License").
+#  You may not use this file except in compliance with the License.
+#  A copy of the License is located at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  or in the "license" file accompanying this file. This file is distributed
+#  on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
+#  express or implied. See the License for the specific language governing
+#  permissions and limitations under the License.
+from __future__ import absolute_import
+
+import logging
+import os
+
+import boto3
+import pytest
+from sagemaker import LocalSession, Session
+
+from utils import image_utils
+
+# these regions have some p2 and p3 instances, but not enough for automated testing
+NO_P2_REGIONS = [
+    "ca-central-1",
+    "eu-central-1",
+    "eu-west-2",
+    "us-west-1",
+    "eu-west-3",
+    "eu-north-1",
+    "sa-east-1",
+    "ap-east-1",
+    "me-south-1",
+]
+NO_P3_REGIONS = [
+    "ap-southeast-1",
+    "ap-southeast-2",
+    "ap-south-1",
+    "ca-central-1",
+    "eu-central-1",
+    "eu-west-2",
+    "us-west-1" "eu-west-3",
+    "eu-north-1",
+    "sa-east-1",
+    "ap-east-1",
+    "me-south-1",
+]
+
+
+logger = logging.getLogger(__name__)
+logging.getLogger("boto").setLevel(logging.INFO)
+logging.getLogger("botocore").setLevel(logging.INFO)
+logging.getLogger("factory.py").setLevel(logging.INFO)
+logging.getLogger("auth.py").setLevel(logging.INFO)
+logging.getLogger("connectionpool.py").setLevel(logging.INFO)
+
+DIR_PATH = os.path.dirname(os.path.realpath(__file__))
+
+
+def pytest_addoption(parser):
+    parser.addoption("--build-image", "-B", action="store_true")
+    parser.addoption("--push-image", "-P", action="store_true")
+    parser.addoption("--dockerfile-type", "-T", choices=["dlc.cpu", "dlc.gpu", "tf"], default="tf")
+    parser.addoption("--dockerfile", "-D", default=None)
+    parser.addoption("--docker-base-name", default="sagemaker-tensorflow-training")
+    parser.addoption("--tag", default=None)
+    parser.addoption("--region", default="us-west-2")
+    parser.addoption("--framework-version", default="1.15.2")
+    parser.addoption("--processor", default="cpu", choices=["cpu", "gpu", "cpu,gpu"])
+    parser.addoption("--py-version", default="3", choices=["2", "3", "2,3"])
+    parser.addoption("--account-id", default="142577830533")
+    parser.addoption("--instance-type", default=None)
+
+
+def pytest_generate_tests(metafunc):
+    if "py_version" in metafunc.fixturenames:
+        py_version_params = ["py" + v for v in metafunc.config.getoption("--py-version").split(",")]
+        metafunc.parametrize("py_version", py_version_params, scope="session")
+
+    if "processor" in metafunc.fixturenames:
+        processor_params = metafunc.config.getoption("--processor").split(",")
+        metafunc.parametrize("processor", processor_params, scope="session")
+
+
+@pytest.fixture(scope="session", name="dockerfile_type")
+def fixture_dockerfile_type(request):
+    return request.config.getoption("--dockerfile-type")
+
+
+@pytest.fixture(scope="session", name="dockerfile")
+def fixture_dockerfile(request, dockerfile_type):
+    dockerfile = request.config.getoption("--dockerfile")
+    return dockerfile if dockerfile else "Dockerfile.{}".format(dockerfile_type)
+
+
+@pytest.fixture(scope="session", name="build_image", autouse=True)
+def fixture_build_image(request, framework_version, dockerfile, image_uri, region):
+    build_image = request.config.getoption("--build-image")
+    if build_image:
+        return image_utils.build_image(
+            framework_version=framework_version,
+            dockerfile=dockerfile,
+            image_uri=image_uri,
+            region=region,
+            cwd=os.path.join(DIR_PATH, ".."),
+        )
+
+    return image_uri
+
+
+@pytest.fixture(scope="session", name="push_image", autouse=True)
+def fixture_push_image(request, image_uri, region, account_id):
+    push_image = request.config.getoption("--push-image")
+    if push_image:
+        return image_utils.push_image(image_uri, region, account_id)
+    return None
+
+
+@pytest.fixture(scope="session")
+def docker_base_name(request):
+    return request.config.getoption("--docker-base-name")
+
+
+@pytest.fixture(scope="session")
+def region(request):
+    return request.config.getoption("--region")
+
+
+@pytest.fixture(scope="session")
+def framework_version(request):
+    return request.config.getoption("--framework-version")
+
+
+@pytest.fixture(scope="session")
+def tag(request, framework_version, processor, py_version):
+    provided_tag = request.config.getoption("--tag")
+    default_tag = "{}-{}-py{}".format(framework_version, processor, py_version)
+    return provided_tag if provided_tag is not None else default_tag
+
+
+@pytest.fixture(scope="session")
+def sagemaker_session(region):
+    return Session(boto_session=boto3.Session(region_name=region))
+
+
+@pytest.fixture(scope="session")
+def sagemaker_local_session(region):
+    return LocalSession(boto_session=boto3.Session(region_name=region))
+
+
+@pytest.fixture(scope="session")
+def account_id(request):
+    return request.config.getoption("--account-id")
+
+
+@pytest.fixture
+def instance_type(request, processor):
+    provided_instance_type = request.config.getoption("--instance-type")
+    default_instance_type = "ml.c4.xlarge" if processor == "cpu" else "ml.p2.xlarge"
+    return provided_instance_type if provided_instance_type is not None else default_instance_type
+
+
+@pytest.fixture(autouse=True)
+def skip_by_device_type(request, processor):
+    is_gpu = processor == "gpu"
+    if (request.node.get_closest_marker("skip_gpu") and is_gpu) or (
+        request.node.get_closest_marker("skip_cpu") and not is_gpu
+    ):
+        pytest.skip("Skipping because running on '{}' instance".format(processor))
+
+
+@pytest.fixture(autouse=True)
+def skip_gpu_instance_restricted_regions(region, instance_type):
+    if (region in NO_P2_REGIONS and instance_type.startswith("ml.p2")) or (
+        region in NO_P3_REGIONS and instance_type.startswith("ml.p3")
+    ):
+        pytest.skip("Skipping GPU test in region {}".format(region))
+
+
+@pytest.fixture(autouse=True)
+def skip_by_dockerfile_type(request, dockerfile_type):
+    is_generic = dockerfile_type == "tf"
+    if request.node.get_closest_marker("skip_generic") and is_generic:
+        pytest.skip("Skipping because running generic image without mpi and horovod")
+
+
+@pytest.fixture(name="docker_registry", scope="session")
+def fixture_docker_registry(account_id, region):
+    return "{}.dkr.ecr.{}.amazonaws.com".format(account_id, region) if account_id else None
+
+
+@pytest.fixture(name="image_uri", scope="session")
+def fixture_image_uri(docker_registry, docker_base_name, tag):
+    if docker_registry:
+        return "{}/{}:{}".format(docker_registry, docker_base_name, tag)
+    return "{}:{}".format(docker_base_name, tag)
diff --git a/test/container/1.15.2/Dockerfile.dlc.cpu b/test/container/1.15.2/Dockerfile.dlc.cpu
new file mode 100644
index 00000000..98764974
--- /dev/null
+++ b/test/container/1.15.2/Dockerfile.dlc.cpu
@@ -0,0 +1,6 @@
+ARG region
+FROM 763104351884.dkr.ecr.$region.amazonaws.com/tensorflow-training:1.15.2-cpu-py2
+
+COPY dist/sagemaker_tensorflow_training-*.tar.gz /sagemaker_tensorflow_training.tar.gz
+RUN pip install --upgrade --no-cache-dir /sagemaker_tensorflow_training.tar.gz && \
+    rm /sagemaker_tensorflow_training.tar.gz
diff --git a/test/container/1.15.2/Dockerfile.dlc.gpu b/test/container/1.15.2/Dockerfile.dlc.gpu
new file mode 100644
index 00000000..15344f6e
--- /dev/null
+++ b/test/container/1.15.2/Dockerfile.dlc.gpu
@@ -0,0 +1,6 @@
+ARG region
+FROM 763104351884.dkr.ecr.$region.amazonaws.com/tensorflow-training:1.15.2-gpu-py3
+
+COPY dist/sagemaker_tensorflow_training-*.tar.gz /sagemaker_tensorflow_training.tar.gz
+RUN pip install --upgrade --no-cache-dir /sagemaker_tensorflow_training.tar.gz && \
+    rm /sagemaker_tensorflow_training.tar.gz
diff --git a/test/container/1.15.2/Dockerfile.tf b/test/container/1.15.2/Dockerfile.tf
new file mode 100644
index 00000000..b1a62168
--- /dev/null
+++ b/test/container/1.15.2/Dockerfile.tf
@@ -0,0 +1,7 @@
+FROM tensorflow/tensorflow:1.15.2-gpu-py3
+
+ENV SAGEMAKER_TRAINING_MODULE sagemaker_tensorflow_container.training:main
+
+COPY dist/sagemaker_tensorflow_training-*.tar.gz /sagemaker_tensorflow_training.tar.gz
+RUN pip install --upgrade --no-cache-dir /sagemaker_tensorflow_training.tar.gz && \
+    rm /sagemaker_tensorflow_training.tar.gz
diff --git a/test/integration/__init__.py b/test/integration/__init__.py
index 966dd7d4..a2e25d25 100644
--- a/test/integration/__init__.py
+++ b/test/integration/__init__.py
@@ -14,35 +14,18 @@
 
 import logging
 import os
+import random
+import time
 
-logging.getLogger('boto3').setLevel(logging.INFO)
-logging.getLogger('botocore').setLevel(logging.INFO)
+logging.getLogger("boto3").setLevel(logging.INFO)
+logging.getLogger("botocore").setLevel(logging.INFO)
 
-RESOURCE_PATH = os.path.join(os.path.dirname(__file__), '..', 'resources')
+RESOURCE_PATH = os.path.join(os.path.dirname(__file__), "..", "resources")
 
-# these regions have some p2 and p3 instances, but not enough for automated testing
-NO_P2_REGIONS = [
-    'ca-central-1',
-    'eu-central-1',
-    'eu-west-2',
-    'us-west-1',
-    'eu-west-3',
-    'eu-north-1',
-    'sa-east-1',
-    'ap-east-1',
-    'me-south-1'
-]
-NO_P3_REGIONS = [
-    'ap-southeast-1',
-    'ap-southeast-2',
-    'ap-south-1',
-    'ca-central-1',
-    'eu-central-1',
-    'eu-west-2',
-    'us-west-1'
-    'eu-west-3',
-    'eu-north-1',
-    'sa-east-1',
-    'ap-east-1',
-    'me-south-1'
-]
+
+def unique_name_from_base(base, max_length=63):
+    unique = "%04x" % random.randrange(16 ** 4)  # 4-digit hex
+    ts = str(int(time.time()))
+    available_length = max_length - 2 - len(ts) - len(unique)
+    trimmed = base[:available_length]
+    return "{}-{}-{}".format(trimmed, ts, unique)
diff --git a/test/integration/conftest.py b/test/integration/conftest.py
deleted file mode 100644
index 4b599675..00000000
--- a/test/integration/conftest.py
+++ /dev/null
@@ -1,118 +0,0 @@
-#  Copyright 2018-2019 Amazon.com, Inc. or its affiliates. All Rights Reserved.
-#
-#  Licensed under the Apache License, Version 2.0 (the "License").
-#  You may not use this file except in compliance with the License.
-#  A copy of the License is located at
-#
-#      http://www.apache.org/licenses/LICENSE-2.0
-#
-#  or in the "license" file accompanying this file. This file is distributed
-#  on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
-#  express or implied. See the License for the specific language governing
-#  permissions and limitations under the License.
-from __future__ import absolute_import
-
-import logging
-import os
-
-import boto3
-import pytest
-from sagemaker import LocalSession, Session
-from sagemaker.tensorflow import TensorFlow
-
-from test.integration import NO_P2_REGIONS, NO_P3_REGIONS
-
-logger = logging.getLogger(__name__)
-logging.getLogger('boto').setLevel(logging.INFO)
-logging.getLogger('botocore').setLevel(logging.INFO)
-logging.getLogger('factory.py').setLevel(logging.INFO)
-logging.getLogger('auth.py').setLevel(logging.INFO)
-logging.getLogger('connectionpool.py').setLevel(logging.INFO)
-
-SCRIPT_PATH = os.path.dirname(os.path.realpath(__file__))
-
-
-def pytest_addoption(parser):
-    parser.addoption('--docker-base-name', default='sagemaker-tensorflow-scriptmode')
-    parser.addoption('--tag', default=None)
-    parser.addoption('--region', default='us-west-2')
-    parser.addoption('--framework-version', default=TensorFlow.LATEST_VERSION)
-    parser.addoption('--processor', default='cpu', choices=['cpu', 'gpu', 'cpu,gpu'])
-    parser.addoption('--py-version', default='3', choices=['2', '3', '2,3'])
-    parser.addoption('--account-id', default='142577830533')
-    parser.addoption('--instance-type', default=None)
-
-
-def pytest_configure(config):
-    os.environ['TEST_PY_VERSIONS'] = config.getoption('--py-version')
-    os.environ['TEST_PROCESSORS'] = config.getoption('--processor')
-
-
-@pytest.fixture(scope='session')
-def docker_base_name(request):
-    return request.config.getoption('--docker-base-name')
-
-
-@pytest.fixture(scope='session')
-def region(request):
-    return request.config.getoption('--region')
-
-
-@pytest.fixture(scope='session')
-def framework_version(request):
-    return request.config.getoption('--framework-version')
-
-
-@pytest.fixture
-def tag(request, framework_version, processor, py_version):
-    provided_tag = request.config.getoption('--tag')
-    default_tag = '{}-{}-py{}'.format(framework_version, processor, py_version)
-    return provided_tag if provided_tag is not None else default_tag
-
-
-@pytest.fixture(scope='session')
-def sagemaker_session(region):
-    return Session(boto_session=boto3.Session(region_name=region))
-
-
-@pytest.fixture(scope='session')
-def sagemaker_local_session(region):
-    return LocalSession(boto_session=boto3.Session(region_name=region))
-
-
-@pytest.fixture(scope='session')
-def account_id(request):
-    return request.config.getoption('--account-id')
-
-
-@pytest.fixture
-def instance_type(request, processor):
-    provided_instance_type = request.config.getoption('--instance-type')
-    default_instance_type = 'ml.c4.xlarge' if processor == 'cpu' else 'ml.p2.xlarge'
-    return provided_instance_type if provided_instance_type is not None else default_instance_type
-
-
-@pytest.fixture(autouse=True)
-def skip_by_device_type(request, processor):
-    is_gpu = (processor == 'gpu')
-    if (request.node.get_closest_marker('skip_gpu') and is_gpu) or \
-            (request.node.get_closest_marker('skip_cpu') and not is_gpu):
-        pytest.skip('Skipping because running on \'{}\' instance'.format(processor))
-
-
-@pytest.fixture(autouse=True)
-def skip_gpu_instance_restricted_regions(region, instance_type):
-    if (region in NO_P2_REGIONS and instance_type.startswith('ml.p2')) or \
-            (region in NO_P3_REGIONS and instance_type.startswith('ml.p3')):
-        pytest.skip('Skipping GPU test in region {}'.format(region))
-
-
-@pytest.fixture
-def docker_image(docker_base_name, tag):
-    return '{}:{}'.format(docker_base_name, tag)
-
-
-@pytest.fixture
-def ecr_image(account_id, docker_base_name, tag, region):
-    return '{}.dkr.ecr.{}.amazonaws.com/{}:{}'.format(
-        account_id, region, docker_base_name, tag)
diff --git a/test/integration/local/test_horovod.py b/test/integration/local/test_horovod.py
index f35ba03a..2137f4ab 100644
--- a/test/integration/local/test_horovod.py
+++ b/test/integration/local/test_horovod.py
@@ -19,48 +19,64 @@
 import pytest
 from sagemaker.tensorflow import TensorFlow
 
-from test.integration.utils import processor, py_version  # noqa: F401
+RESOURCE_PATH = os.path.join(os.path.dirname(__file__), "..", "..", "resources")
 
-RESOURCE_PATH = os.path.join(os.path.dirname(__file__), '..', '..', 'resources')
+
+@pytest.mark.skip_cpu
+@pytest.mark.skip_generic
+def test_distributed_training_horovod_gpu(
+    sagemaker_local_session, image_uri, tmpdir, framework_version
+):
+    _test_distributed_training_horovod(
+        1, 2, sagemaker_local_session, image_uri, tmpdir, framework_version, "local_gpu"
+    )
 
 
 @pytest.mark.skip_gpu
-@pytest.mark.parametrize('instances, processes', [
-    [1, 2],
-    (2, 1),
-    (2, 2),
-    (5, 2)])
-def test_distributed_training_horovod_basic(instances,
-                                            processes,
-                                            sagemaker_local_session,
-                                            docker_image,
-                                            tmpdir,
-                                            framework_version):
-    output_path = 'file://%s' % tmpdir
+@pytest.mark.skip_generic
+@pytest.mark.parametrize("instances, processes", [(1, 2), (2, 1), (2, 2), (5, 2)])
+def test_distributed_training_horovod_cpu(
+    instances, processes, sagemaker_local_session, image_uri, tmpdir, framework_version
+):
+    _test_distributed_training_horovod(
+        instances, processes, sagemaker_local_session, image_uri, tmpdir, framework_version, "local"
+    )
+
+
+def _test_distributed_training_horovod(
+    instances, processes, session, image_uri, tmpdir, framework_version, instance_type
+):
+    output_path = "file://%s" % tmpdir
     estimator = TensorFlow(
-        entry_point=os.path.join(RESOURCE_PATH, 'hvdbasic', 'train_hvd_basic.py'),
-        role='SageMakerRole',
-        train_instance_type='local',
-        sagemaker_session=sagemaker_local_session,
+        entry_point=os.path.join(RESOURCE_PATH, "hvdbasic", "train_hvd_basic.py"),
+        role="SageMakerRole",
+        train_instance_type=instance_type,
+        sagemaker_session=session,
         train_instance_count=instances,
-        image_name=docker_image,
+        image_name=image_uri,
         output_path=output_path,
         framework_version=framework_version,
-        hyperparameters={'sagemaker_mpi_enabled': True,
-                         'sagemaker_network_interface_name': 'eth0',
-                         'sagemaker_mpi_num_of_processes_per_host': processes})
+        hyperparameters={
+            "sagemaker_mpi_enabled": True,
+            "sagemaker_network_interface_name": "eth0",
+            "sagemaker_mpi_num_of_processes_per_host": processes,
+        },
+    )
 
-    estimator.fit('file://{}'.format(os.path.join(RESOURCE_PATH, 'mnist', 'data-distributed')))
+    estimator.fit("file://{}".format(os.path.join(RESOURCE_PATH, "mnist", "data-distributed")))
 
     tmp = str(tmpdir)
-    extract_files(output_path.replace('file://', ''), tmp)
+    extract_files(output_path.replace("file://", ""), tmp)
 
     size = instances * processes
 
     for rank in range(size):
         local_rank = rank % processes
-        assert read_json('local-rank-%s-rank-%s' % (local_rank, rank), tmp) == {
-            'local-rank': local_rank, 'rank': rank, 'size': size}
+        assert read_json("local-rank-%s-rank-%s" % (local_rank, rank), tmp) == {
+            "local-rank": local_rank,
+            "rank": rank,
+            "size": size,
+        }
 
 
 def read_json(file, tmp):
@@ -69,14 +85,14 @@ def read_json(file, tmp):
 
 
 def assert_files_exist_in_tar(output_path, files):
-    if output_path.startswith('file://'):
+    if output_path.startswith("file://"):
         output_path = output_path[7:]
-    model_file = os.path.join(output_path, 'model.tar.gz')
+    model_file = os.path.join(output_path, "model.tar.gz")
     with tarfile.open(model_file) as tar:
         for f in files:
             tar.getmember(f)
 
 
 def extract_files(output_path, tmpdir):
-    with tarfile.open(os.path.join(output_path, 'model.tar.gz')) as tar:
+    with tarfile.open(os.path.join(output_path, "model.tar.gz")) as tar:
         tar.extractall(tmpdir)
diff --git a/test/integration/local/test_keras.py b/test/integration/local/test_keras.py
deleted file mode 100644
index 1eca0c2a..00000000
--- a/test/integration/local/test_keras.py
+++ /dev/null
@@ -1,57 +0,0 @@
-# Copyright 2018-2020 Amazon.com, Inc. or its affiliates. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License"). You
-# may not use this file except in compliance with the License. A copy of
-# the License is located at
-#
-#     http://aws.amazon.com/apache2.0/
-#
-# or in the "license" file accompanying this file. This file is
-# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
-# ANY KIND, either express or implied. See the License for the specific
-# language governing permissions and limitations under the License.
-from __future__ import absolute_import
-
-import logging
-import os
-
-import numpy as np
-import pytest
-from sagemaker.tensorflow import serving, TensorFlow
-
-from test.integration import RESOURCE_PATH
-from test.integration.utils import processor, py_version  # noqa: F401
-
-
-logging.basicConfig(level=logging.DEBUG)
-
-
-@pytest.mark.skip(reason="Serving part fails because of version mismatch.")
-def test_keras_training(sagemaker_local_session, docker_image, tmpdir, framework_version):
-    entry_point = os.path.join(RESOURCE_PATH, 'keras_inception.py')
-    output_path = 'file://{}'.format(tmpdir)
-
-    estimator = TensorFlow(
-        entry_point=entry_point,
-        role='SageMakerRole',
-        train_instance_count=1,
-        train_instance_type='local',
-        image_name=docker_image,
-        sagemaker_session=sagemaker_local_session,
-        model_dir='/opt/ml/model',
-        output_path=output_path,
-        framework_version=framework_version,
-        py_version='py3')
-
-    estimator.fit()
-
-    model = serving.Model(model_data=output_path,
-                          role='SageMakerRole',
-                          framework_version=framework_version,
-                          sagemaker_session=sagemaker_local_session)
-
-    predictor = model.deploy(initial_instance_count=1, instance_type='local')
-
-    assert predictor.predict(np.random.randn(4, 4, 4, 2) * 255)
-
-    predictor.delete_endpoint()
diff --git a/test/integration/local/test_training.py b/test/integration/local/test_training.py
index bd1641b0..35a676a6 100644
--- a/test/integration/local/test_training.py
+++ b/test/integration/local/test_training.py
@@ -18,136 +18,109 @@
 import pytest
 from sagemaker.tensorflow import TensorFlow
 
-from test.integration.utils import processor, py_version  # noqa: F401
-
-RESOURCE_PATH = os.path.join(os.path.dirname(__file__), '..', '..', 'resources')
-TF_CHECKPOINT_FILES = ['graph.pbtxt', 'model.ckpt-0.index', 'model.ckpt-0.meta']
+RESOURCE_PATH = os.path.join(os.path.dirname(__file__), "..", "..", "resources")
+TF_CHECKPOINT_FILES = ["graph.pbtxt", "model.ckpt-0.index", "model.ckpt-0.meta"]
 
 
 @pytest.fixture  # noqa: F811
 def py_full_version(py_version):  # noqa: F811
-    if py_version == '2':
-        return '2.7'
+    if py_version == "2":
+        return "2.7"
     else:
-        return '3.6'
-
-
-@pytest.mark.skip_gpu
-def test_py_versions(sagemaker_local_session, docker_image, py_full_version, framework_version, tmpdir):
-    output_path = 'file://{}'.format(tmpdir)
-    run_tf_training(script=os.path.join(RESOURCE_PATH, 'test_py_version', 'entry.py'),
-                    instance_type='local',
-                    instance_count=1,
-                    sagemaker_local_session=sagemaker_local_session,
-                    docker_image=docker_image,
-                    framework_version=framework_version,
-                    output_path=output_path,
-                    training_data_path=None)
-
-    with tarfile.open(os.path.join(str(tmpdir), 'output.tar.gz')) as tar:
-        output_file = tar.getmember('py_version')
-        tar.extractall(path=str(tmpdir), members=[output_file])
-
-    with open(os.path.join(str(tmpdir), 'py_version')) as f:
-        assert f.read().strip() == py_full_version
+        return "3.6"
 
 
 @pytest.mark.skip_gpu
-def test_mnist_cpu(sagemaker_local_session, docker_image, tmpdir, framework_version):
-    output_path = 'file://{}'.format(tmpdir)
-    run_tf_training(script=os.path.join(RESOURCE_PATH, 'mnist', 'mnist.py'),
-                    instance_type='local',
-                    instance_count=1,
-                    sagemaker_local_session=sagemaker_local_session,
-                    docker_image=docker_image,
-                    framework_version=framework_version,
-                    output_path=output_path,
-                    training_data_path='file://{}'.format(
-                        os.path.join(RESOURCE_PATH, 'mnist', 'data')))
-    _assert_files_exist_in_tar(output_path, ['my_model.h5'])
-
-
-@pytest.mark.skip_cpu
-def test_gpu(sagemaker_local_session, docker_image, framework_version):
-    run_tf_training(script=os.path.join(RESOURCE_PATH, 'gpu_device_placement.py'),
-                    instance_type='local_gpu',
-                    instance_count=1,
-                    sagemaker_local_session=sagemaker_local_session,
-                    docker_image=docker_image,
-                    framework_version=framework_version,
-                    training_data_path='file://{}'.format(
-                        os.path.join(RESOURCE_PATH, 'mnist', 'data')))
+def test_mnist_cpu(sagemaker_local_session, image_uri, tmpdir, framework_version):
+    output_path = "file://{}".format(tmpdir)
+    run_tf_training(
+        script=os.path.join(RESOURCE_PATH, "mnist", "mnist.py"),
+        instance_type="local",
+        instance_count=1,
+        sagemaker_local_session=sagemaker_local_session,
+        image_uri=image_uri,
+        framework_version=framework_version,
+        output_path=output_path,
+        training_data_path="file://{}".format(os.path.join(RESOURCE_PATH, "mnist", "data")),
+    )
+    _assert_files_exist_in_tar(output_path, ["my_model.h5"])
 
 
 @pytest.mark.skip_gpu
-def test_distributed_training_cpu_no_ps(sagemaker_local_session,
-                                        docker_image,
-                                        tmpdir,
-                                        framework_version):
-    output_path = 'file://{}'.format(tmpdir)
-    run_tf_training(script=os.path.join(RESOURCE_PATH, 'mnist', 'mnist_estimator.py'),
-                    instance_type='local',
-                    instance_count=2,
-                    sagemaker_local_session=sagemaker_local_session,
-                    docker_image=docker_image,
-                    framework_version=framework_version,
-                    output_path=output_path,
-                    training_data_path='file://{}'.format(
-                        os.path.join(RESOURCE_PATH, 'mnist', 'data-distributed')))
+def test_distributed_training_cpu_no_ps(
+    sagemaker_local_session, image_uri, tmpdir, framework_version
+):
+    output_path = "file://{}".format(tmpdir)
+    run_tf_training(
+        script=os.path.join(RESOURCE_PATH, "mnist", "mnist_estimator.py"),
+        instance_type="local",
+        instance_count=2,
+        sagemaker_local_session=sagemaker_local_session,
+        image_uri=image_uri,
+        framework_version=framework_version,
+        output_path=output_path,
+        training_data_path="file://{}".format(
+            os.path.join(RESOURCE_PATH, "mnist", "data-distributed")
+        ),
+    )
     _assert_files_exist_in_tar(output_path, TF_CHECKPOINT_FILES)
 
 
 @pytest.mark.skip_gpu
-def test_distributed_training_cpu_ps(sagemaker_local_session,
-                                     docker_image,
-                                     tmpdir,
-                                     framework_version):
-    output_path = 'file://{}'.format(tmpdir)
-    run_tf_training(script=os.path.join(RESOURCE_PATH, 'mnist', 'mnist_estimator.py'),
-                    instance_type='local',
-                    instance_count=2,
-                    sagemaker_local_session=sagemaker_local_session,
-                    docker_image=docker_image,
-                    framework_version=framework_version,
-                    output_path=output_path,
-                    hyperparameters={'sagemaker_parameter_server_enabled': True},
-                    training_data_path='file://{}'.format(
-                        os.path.join(RESOURCE_PATH, 'mnist', 'data-distributed')))
+def test_distributed_training_cpu_ps(sagemaker_local_session, image_uri, tmpdir, framework_version):
+    output_path = "file://{}".format(tmpdir)
+    run_tf_training(
+        script=os.path.join(RESOURCE_PATH, "mnist", "mnist_estimator.py"),
+        instance_type="local",
+        instance_count=2,
+        sagemaker_local_session=sagemaker_local_session,
+        image_uri=image_uri,
+        framework_version=framework_version,
+        output_path=output_path,
+        hyperparameters={"sagemaker_parameter_server_enabled": True},
+        training_data_path="file://{}".format(
+            os.path.join(RESOURCE_PATH, "mnist", "data-distributed")
+        ),
+    )
     _assert_files_exist_in_tar(output_path, TF_CHECKPOINT_FILES)
 
 
-def run_tf_training(script,
-                    instance_type,
-                    instance_count,
-                    sagemaker_local_session,
-                    docker_image,
-                    framework_version,
-                    training_data_path,
-                    output_path=None,
-                    hyperparameters=None):
+def run_tf_training(
+    script,
+    instance_type,
+    instance_count,
+    sagemaker_local_session,
+    image_uri,
+    framework_version,
+    training_data_path,
+    output_path=None,
+    hyperparameters=None,
+):
 
     hyperparameters = hyperparameters or {}
 
-    estimator = TensorFlow(entry_point=script,
-                           role='SageMakerRole',
-                           train_instance_count=instance_count,
-                           train_instance_type=instance_type,
-                           sagemaker_session=sagemaker_local_session,
-                           image_name=docker_image,
-                           model_dir='/opt/ml/model',
-                           output_path=output_path,
-                           hyperparameters=hyperparameters,
-                           base_job_name='test-tf',
-                           framework_version=framework_version,
-                           py_version='py3')
+    estimator = TensorFlow(
+        entry_point=script,
+        role="SageMakerRole",
+        train_instance_count=instance_count,
+        train_instance_type=instance_type,
+        sagemaker_session=sagemaker_local_session,
+        image_name=image_uri,
+        model_dir="/opt/ml/model",
+        output_path=output_path,
+        hyperparameters=hyperparameters,
+        base_job_name="test-tf",
+        framework_version=framework_version,
+        py_version="py3",
+    )
 
     estimator.fit(training_data_path)
 
 
 def _assert_files_exist_in_tar(output_path, files):
-    if output_path.startswith('file://'):
+    if output_path.startswith("file://"):
         output_path = output_path[7:]
-    model_file = os.path.join(output_path, 'model.tar.gz')
+    model_file = os.path.join(output_path, "model.tar.gz")
     with tarfile.open(model_file) as tar:
         for f in files:
             tar.getmember(f)
diff --git a/test/integration/sagemaker/test_horovod.py b/test/integration/sagemaker/test_horovod.py
index 1d2bd8ac..de7c3ff1 100644
--- a/test/integration/sagemaker/test_horovod.py
+++ b/test/integration/sagemaker/test_horovod.py
@@ -14,39 +14,68 @@
 
 import os
 
+import pytest
 import sagemaker
 from sagemaker.tensorflow import TensorFlow
+from sagemaker.utils import unique_name_from_base
 
-from test.integration.utils import processor, py_version, unique_name_from_base  # noqa: F401
+RESOURCE_PATH = os.path.join(os.path.dirname(__file__), "..", "..", "resources")
 
-RESOURCE_PATH = os.path.join(os.path.dirname(__file__), '..', '..', 'resources')
 
+@pytest.mark.skip_generic
+def test_distributed_training_horovod(
+    sagemaker_session, instance_type, image_uri, tmpdir, framework_version
+):
 
-def test_distributed_training_horovod(sagemaker_session,
-                                      instance_type,
-                                      ecr_image,
-                                      tmpdir,
-                                      framework_version):
-
-    mpi_options = '-verbose -x orte_base_help_aggregate=0'
+    mpi_options = "-verbose -x orte_base_help_aggregate=0"
     estimator = TensorFlow(
-        entry_point=os.path.join(RESOURCE_PATH, 'mnist', 'horovod_mnist.py'),
-        role='SageMakerRole',
+        entry_point=os.path.join(RESOURCE_PATH, "mnist", "horovod_mnist.py"),
+        role="SageMakerRole",
         train_instance_type=instance_type,
         train_instance_count=2,
-        image_name=ecr_image,
+        image_name=image_uri,
         framework_version=framework_version,
-        py_version='py3',
+        py_version="py3",
         script_mode=True,
-        hyperparameters={'sagemaker_mpi_enabled': True,
-                         'sagemaker_mpi_custom_mpi_options': mpi_options,
-                         'sagemaker_mpi_num_of_processes_per_host': 1},
-        sagemaker_session=sagemaker_session)
+        hyperparameters={
+            "sagemaker_mpi_enabled": True,
+            "sagemaker_mpi_custom_mpi_options": mpi_options,
+            "sagemaker_mpi_num_of_processes_per_host": 1,
+        },
+        sagemaker_session=sagemaker_session,
+    )
 
-    estimator.fit(job_name=unique_name_from_base('test-tf-horovod'))
+    estimator.fit(job_name=unique_name_from_base("test-tf-horovod"))
 
     model_data_source = sagemaker.local.data.get_data_source_instance(
-        estimator.model_data, sagemaker_session)
+        estimator.model_data, sagemaker_session
+    )
 
     for filename in model_data_source.get_file_list():
-        assert os.path.basename(filename) == 'model.tar.gz'
+        assert os.path.basename(filename) == "model.tar.gz"
+
+
+@pytest.mark.skip_generic
+def test_distributed_training_horovod_with_env_vars(
+    sagemaker_session, instance_type, image_uri, tmpdir, framework_version
+):
+
+    mpi_options = "-verbose -x orte_base_help_aggregate=0"
+    estimator = TensorFlow(
+        entry_point=os.path.join(RESOURCE_PATH, "hvdbasic", "train_hvd_env_vars.py"),
+        role="SageMakerRole",
+        train_instance_type=instance_type,
+        train_instance_count=2,
+        image_name=image_uri,
+        framework_version=framework_version,
+        py_version="py3",
+        script_mode=True,
+        hyperparameters={
+            "sagemaker_mpi_enabled": True,
+            "sagemaker_mpi_custom_mpi_options": mpi_options,
+            "sagemaker_mpi_num_of_processes_per_host": 2,
+        },
+        sagemaker_session=sagemaker_session,
+    )
+
+    estimator.fit(job_name=unique_name_from_base("test-tf-horovod-env-vars"))
diff --git a/test/integration/sagemaker/test_mnist.py b/test/integration/sagemaker/test_mnist.py
index 25c8db3e..c466f573 100644
--- a/test/integration/sagemaker/test_mnist.py
+++ b/test/integration/sagemaker/test_mnist.py
@@ -18,143 +18,125 @@
 import pytest
 from sagemaker.tensorflow import TensorFlow
 from sagemaker.tuner import HyperparameterTuner, IntegerParameter
+from sagemaker.utils import unique_name_from_base
 from six.moves.urllib.parse import urlparse
 
-from test.integration.utils import processor, py_version, unique_name_from_base  # noqa: F401
 from timeout import timeout
 
 
 @pytest.mark.deploy_test
-def test_mnist(sagemaker_session, ecr_image, instance_type, framework_version):
-    resource_path = os.path.join(os.path.dirname(__file__), '..', '..', 'resources')
-    script = os.path.join(resource_path, 'mnist', 'mnist.py')
-    estimator = TensorFlow(entry_point=script,
-                           role='SageMakerRole',
-                           train_instance_type=instance_type,
-                           train_instance_count=1,
-                           sagemaker_session=sagemaker_session,
-                           image_name=ecr_image,
-                           framework_version=framework_version,
-                           script_mode=True)
+def test_mnist(sagemaker_session, image_uri, instance_type, framework_version):
+    resource_path = os.path.join(os.path.dirname(__file__), "..", "..", "resources")
+    script = os.path.join(resource_path, "mnist", "mnist.py")
+    estimator = TensorFlow(
+        entry_point=script,
+        role="SageMakerRole",
+        train_instance_type=instance_type,
+        train_instance_count=1,
+        sagemaker_session=sagemaker_session,
+        image_name=image_uri,
+        framework_version=framework_version,
+        script_mode=True,
+    )
     inputs = estimator.sagemaker_session.upload_data(
-        path=os.path.join(resource_path, 'mnist', 'data'),
-        key_prefix='scriptmode/mnist')
-    estimator.fit(inputs, job_name=unique_name_from_base('test-sagemaker-mnist'))
+        path=os.path.join(resource_path, "mnist", "data"), key_prefix="scriptmode/mnist"
+    )
+    estimator.fit(inputs, job_name=unique_name_from_base("test-sagemaker-mnist"))
     _assert_s3_file_exists(sagemaker_session.boto_region_name, estimator.model_data)
 
 
-def test_distributed_mnist_no_ps(sagemaker_session, ecr_image, instance_type, framework_version):
-    resource_path = os.path.join(os.path.dirname(__file__), '..', '..', 'resources')
-    script = os.path.join(resource_path, 'mnist', 'mnist.py')
-    estimator = TensorFlow(entry_point=script,
-                           role='SageMakerRole',
-                           train_instance_count=2,
-                           train_instance_type=instance_type,
-                           sagemaker_session=sagemaker_session,
-                           image_name=ecr_image,
-                           framework_version=framework_version,
-                           script_mode=True)
+def test_distributed_mnist_no_ps(sagemaker_session, image_uri, instance_type, framework_version):
+    resource_path = os.path.join(os.path.dirname(__file__), "..", "..", "resources")
+    script = os.path.join(resource_path, "mnist", "mnist.py")
+    estimator = TensorFlow(
+        entry_point=script,
+        role="SageMakerRole",
+        train_instance_count=2,
+        train_instance_type=instance_type,
+        sagemaker_session=sagemaker_session,
+        image_name=image_uri,
+        framework_version=framework_version,
+        script_mode=True,
+    )
     inputs = estimator.sagemaker_session.upload_data(
-        path=os.path.join(resource_path, 'mnist', 'data'),
-        key_prefix='scriptmode/mnist')
-    estimator.fit(inputs, job_name=unique_name_from_base('test-tf-sm-distributed-mnist'))
+        path=os.path.join(resource_path, "mnist", "data"), key_prefix="scriptmode/mnist"
+    )
+    estimator.fit(inputs, job_name=unique_name_from_base("test-tf-sm-distributed-mnist"))
     _assert_s3_file_exists(sagemaker_session.boto_region_name, estimator.model_data)
 
 
-def test_distributed_mnist_ps(sagemaker_session, ecr_image, instance_type, framework_version):
-    resource_path = os.path.join(os.path.dirname(__file__), '..', '..', 'resources')
-    script = os.path.join(resource_path, 'mnist', 'mnist_estimator.py')
-    estimator = TensorFlow(entry_point=script,
-                           role='SageMakerRole',
-                           hyperparameters={'sagemaker_parameter_server_enabled': True},
-                           train_instance_count=2,
-                           train_instance_type=instance_type,
-                           sagemaker_session=sagemaker_session,
-                           image_name=ecr_image,
-                           framework_version=framework_version,
-                           script_mode=True)
+def test_distributed_mnist_ps(sagemaker_session, image_uri, instance_type, framework_version):
+    resource_path = os.path.join(os.path.dirname(__file__), "..", "..", "resources")
+    script = os.path.join(resource_path, "mnist", "mnist_estimator.py")
+    estimator = TensorFlow(
+        entry_point=script,
+        role="SageMakerRole",
+        hyperparameters={"sagemaker_parameter_server_enabled": True},
+        train_instance_count=2,
+        train_instance_type=instance_type,
+        sagemaker_session=sagemaker_session,
+        image_name=image_uri,
+        framework_version=framework_version,
+        script_mode=True,
+    )
     inputs = estimator.sagemaker_session.upload_data(
-        path=os.path.join(resource_path, 'mnist', 'data-distributed'),
-        key_prefix='scriptmode/mnist-distributed')
-    estimator.fit(inputs, job_name=unique_name_from_base('test-tf-sm-distributed-mnist'))
+        path=os.path.join(resource_path, "mnist", "data-distributed"),
+        key_prefix="scriptmode/mnist-distributed",
+    )
+    estimator.fit(inputs, job_name=unique_name_from_base("test-tf-sm-distributed-mnist"))
     _assert_checkpoint_exists(sagemaker_session.boto_region_name, estimator.model_dir, 0)
     _assert_s3_file_exists(sagemaker_session.boto_region_name, estimator.model_data)
 
 
-def test_s3_plugin(sagemaker_session, ecr_image, instance_type, region, framework_version):
-    resource_path = os.path.join(os.path.dirname(__file__), '..', '..', 'resources')
-    script = os.path.join(resource_path, 'mnist', 'mnist_estimator.py')
-    estimator = TensorFlow(entry_point=script,
-                           role='SageMakerRole',
-                           hyperparameters={
-                               # Saving a checkpoint after every 5 steps to hammer the S3 plugin
-                               'save-checkpoint-steps': 10,
-                               # Disable throttling for checkpoint and model saving
-                               'throttle-secs': 0,
-                               # Without the patch training jobs would fail around 100th to
-                               # 150th step
-                               'max-steps': 200,
-                               # Large batch size would result in a larger checkpoint file
-                               'batch-size': 1024,
-                               # This makes the training job exporting model during training.
-                               # Stale model garbage collection will also be performed.
-                               'export-model-during-training': True
-                           },
-                           train_instance_count=1,
-                           train_instance_type=instance_type,
-                           sagemaker_session=sagemaker_session,
-                           image_name=ecr_image,
-                           framework_version=framework_version,
-                           script_mode=True)
-    estimator.fit('s3://sagemaker-sample-data-{}/tensorflow/mnist'.format(region),
-                  job_name=unique_name_from_base('test-tf-sm-s3-mnist'))
-    _assert_s3_file_exists(region, estimator.model_data)
-    _assert_checkpoint_exists(region, estimator.model_dir, 200)
-
-
-def test_tuning(sagemaker_session, ecr_image, instance_type, framework_version):
-    resource_path = os.path.join(os.path.dirname(__file__), '..', '..', 'resources')
-    script = os.path.join(resource_path, 'mnist', 'mnist.py')
-
-    estimator = TensorFlow(entry_point=script,
-                           role='SageMakerRole',
-                           train_instance_type=instance_type,
-                           train_instance_count=1,
-                           sagemaker_session=sagemaker_session,
-                           image_name=ecr_image,
-                           framework_version=framework_version,
-                           script_mode=True)
-
-    hyperparameter_ranges = {'epochs': IntegerParameter(1, 2)}
-    objective_metric_name = 'accuracy'
-    metric_definitions = [{'Name': objective_metric_name, 'Regex': 'accuracy = ([0-9\\.]+)'}]
-
-    tuner = HyperparameterTuner(estimator,
-                                objective_metric_name,
-                                hyperparameter_ranges,
-                                metric_definitions,
-                                max_jobs=2,
-                                max_parallel_jobs=2)
+def test_tuning(sagemaker_session, image_uri, instance_type, framework_version):
+    resource_path = os.path.join(os.path.dirname(__file__), "..", "..", "resources")
+    script = os.path.join(resource_path, "mnist", "mnist.py")
+
+    estimator = TensorFlow(
+        entry_point=script,
+        role="SageMakerRole",
+        train_instance_type=instance_type,
+        train_instance_count=1,
+        sagemaker_session=sagemaker_session,
+        image_name=image_uri,
+        framework_version=framework_version,
+        script_mode=True,
+    )
+
+    hyperparameter_ranges = {"epochs": IntegerParameter(1, 2)}
+    objective_metric_name = "accuracy"
+    metric_definitions = [{"Name": objective_metric_name, "Regex": "accuracy = ([0-9\\.]+)"}]
+
+    tuner = HyperparameterTuner(
+        estimator,
+        objective_metric_name,
+        hyperparameter_ranges,
+        metric_definitions,
+        max_jobs=2,
+        max_parallel_jobs=2,
+    )
 
     with timeout(minutes=20):
         inputs = estimator.sagemaker_session.upload_data(
-            path=os.path.join(resource_path, 'mnist', 'data'),
-            key_prefix='scriptmode/mnist')
+            path=os.path.join(resource_path, "mnist", "data"), key_prefix="scriptmode/mnist"
+        )
 
-        tuning_job_name = unique_name_from_base('test-tf-sm-tuning', max_length=32)
+        tuning_job_name = unique_name_from_base("test-tf-sm-tuning", max_length=32)
         tuner.fit(inputs, job_name=tuning_job_name)
         tuner.wait()
 
 
 def _assert_checkpoint_exists(region, model_dir, checkpoint_number):
-    _assert_s3_file_exists(region, os.path.join(model_dir, 'graph.pbtxt'))
-    _assert_s3_file_exists(region,
-                           os.path.join(model_dir, 'model.ckpt-{}.index'.format(checkpoint_number)))
-    _assert_s3_file_exists(region,
-                           os.path.join(model_dir, 'model.ckpt-{}.meta'.format(checkpoint_number)))
+    _assert_s3_file_exists(region, os.path.join(model_dir, "graph.pbtxt"))
+    _assert_s3_file_exists(
+        region, os.path.join(model_dir, "model.ckpt-{}.index".format(checkpoint_number))
+    )
+    _assert_s3_file_exists(
+        region, os.path.join(model_dir, "model.ckpt-{}.meta".format(checkpoint_number))
+    )
 
 
 def _assert_s3_file_exists(region, s3_url):
     parsed_url = urlparse(s3_url)
-    s3 = boto3.resource('s3', region_name=region)
-    s3.Object(parsed_url.netloc, parsed_url.path.lstrip('/')).load()
+    s3 = boto3.resource("s3", region_name=region)
+    s3.Object(parsed_url.netloc, parsed_url.path.lstrip("/")).load()
diff --git a/test/integration/sagemaker/test_tuning_model_dir.py b/test/integration/sagemaker/test_tuning_model_dir.py
index e833c3a4..c113c1cb 100644
--- a/test/integration/sagemaker/test_tuning_model_dir.py
+++ b/test/integration/sagemaker/test_tuning_model_dir.py
@@ -16,30 +16,35 @@
 
 from sagemaker.tensorflow import TensorFlow
 from sagemaker.tuner import HyperparameterTuner, IntegerParameter
-
-from test.integration.utils import processor, py_version, unique_name_from_base  # noqa: F401
-
-
-def test_model_dir_with_training_job_name(sagemaker_session, ecr_image, instance_type, framework_version):
-    resource_path = os.path.join(os.path.dirname(__file__), '../..', 'resources')
-    script = os.path.join(resource_path, 'tuning_model_dir', 'entry.py')
-
-    estimator = TensorFlow(entry_point=script,
-                           role='SageMakerRole',
-                           train_instance_type=instance_type,
-                           train_instance_count=1,
-                           image_name=ecr_image,
-                           framework_version=framework_version,
-                           py_version='py3',
-                           sagemaker_session=sagemaker_session)
-
-    tuner = HyperparameterTuner(estimator=estimator,
-                                objective_metric_name='accuracy',
-                                hyperparameter_ranges={'arbitrary_value': IntegerParameter(0, 1)},
-                                metric_definitions=[{'Name': 'accuracy', 'Regex': 'accuracy=([01])'}],
-                                max_jobs=1,
-                                max_parallel_jobs=1)
+from sagemaker.utils import unique_name_from_base
+
+
+def test_model_dir_with_training_job_name(
+    sagemaker_session, image_uri, instance_type, framework_version
+):
+    resource_path = os.path.join(os.path.dirname(__file__), "../..", "resources")
+    script = os.path.join(resource_path, "tuning_model_dir", "entry.py")
+
+    estimator = TensorFlow(
+        entry_point=script,
+        role="SageMakerRole",
+        train_instance_type=instance_type,
+        train_instance_count=1,
+        image_name=image_uri,
+        framework_version=framework_version,
+        py_version="py3",
+        sagemaker_session=sagemaker_session,
+    )
+
+    tuner = HyperparameterTuner(
+        estimator=estimator,
+        objective_metric_name="accuracy",
+        hyperparameter_ranges={"arbitrary_value": IntegerParameter(0, 1)},
+        metric_definitions=[{"Name": "accuracy", "Regex": "accuracy=([01])"}],
+        max_jobs=1,
+        max_parallel_jobs=1,
+    )
 
     # User script has logic to check for the correct model_dir
-    tuner.fit(job_name=unique_name_from_base('test-tf-model-dir', max_length=32))
+    tuner.fit(job_name=unique_name_from_base("test-tf-model-dir", max_length=32))
     tuner.wait()
diff --git a/test/integration/sagemaker/timeout.py b/test/integration/sagemaker/timeout.py
index d4738d32..1ff4278c 100644
--- a/test/integration/sagemaker/timeout.py
+++ b/test/integration/sagemaker/timeout.py
@@ -16,7 +16,7 @@
 import logging
 import signal
 
-LOGGER = logging.getLogger('timeout')
+LOGGER = logging.getLogger("timeout")
 
 
 class TimeoutError(Exception):
@@ -39,7 +39,7 @@ def timeout(seconds=0, minutes=0, hours=0):
     limit = seconds + 60 * minutes + 3600 * hours
 
     def handler(signum, frame):
-        raise TimeoutError('timed out after {} seconds'.format(limit))
+        raise TimeoutError("timed out after {} seconds".format(limit))
 
     try:
         signal.signal(signal.SIGALRM, handler)
diff --git a/test/integration/utils.py b/test/integration/utils.py
deleted file mode 100644
index 4944eb20..00000000
--- a/test/integration/utils.py
+++ /dev/null
@@ -1,37 +0,0 @@
-# Copyright 2019-2020 Amazon.com, Inc. or its affiliates. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License"). You
-# may not use this file except in compliance with the License. A copy of
-# the License is located at
-#
-#     http://aws.amazon.com/apache2.0/
-#
-# or in the "license" file accompanying this file. This file is
-# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
-# ANY KIND, either express or implied. See the License for the specific
-# language governing permissions and limitations under the License.
-from __future__ import absolute_import
-
-import os
-import random
-import time
-
-import pytest
-
-
-def unique_name_from_base(base, max_length=63):
-    unique = '%04x' % random.randrange(16**4)  # 4-digit hex
-    ts = str(int(time.time()))
-    available_length = max_length - 2 - len(ts) - len(unique)
-    trimmed = base[:available_length]
-    return '{}-{}-{}'.format(trimmed, ts, unique)
-
-
-@pytest.fixture(params=os.environ['TEST_PY_VERSIONS'].split(','))
-def py_version(request):
-    return request.param
-
-
-@pytest.fixture(params=os.environ['TEST_PROCESSORS'].split(','))
-def processor(request):
-    return request.param
diff --git a/test/resources/gpu_device_placement.py b/test/resources/gpu_device_placement.py
deleted file mode 100644
index 11bbcdff..00000000
--- a/test/resources/gpu_device_placement.py
+++ /dev/null
@@ -1,26 +0,0 @@
-#  Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved.
-#
-#  Licensed under the Apache License, Version 2.0 (the "License").
-#  You may not use this file except in compliance with the License.
-#  A copy of the License is located at
-#
-#      http://www.apache.org/licenses/LICENSE-2.0
-#
-#  or in the "license" file accompanying this file. This file is distributed
-#  on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
-#  express or implied. See the License for the specific language governing
-#  permissions and limitations under the License.
-import tensorflow as tf
-
-# https://www.tensorflow.org/programmers_guide/using_gpu
-print('-' * 87)
-print('Run GPU test.')
-with tf.device('/gpu:0'):
-    a = tf.constant([1.0, 2.0, 3.0, 4.0, 5.0, 6.0], shape=[2, 3], name='a')
-    b = tf.constant([1.0, 2.0, 3.0, 4.0, 5.0, 6.0], shape=[3, 2], name='b')
-c = tf.matmul(a, b)
-sess = tf.Session(config=tf.ConfigProto(log_device_placement=True))
-# Runs the op.
-print(sess.run(c))
-print('-' * 87)
-print('')
diff --git a/test/resources/hvdbasic/train_hvd_basic.py b/test/resources/hvdbasic/train_hvd_basic.py
index cc068678..24a35a8b 100644
--- a/test/resources/hvdbasic/train_hvd_basic.py
+++ b/test/resources/hvdbasic/train_hvd_basic.py
@@ -4,8 +4,10 @@
 
 hvd.init()
 
-with open(os.path.join('/opt/ml/model/local-rank-%s-rank-%s' % (hvd.local_rank(), hvd.rank())), 'w+') as f:
-    basic_info = {'local-rank': hvd.local_rank(), 'rank': hvd.rank(), 'size': hvd.size()}
+with open(
+    os.path.join("/opt/ml/model/local-rank-%s-rank-%s" % (hvd.local_rank(), hvd.rank())), "w+"
+) as f:
+    basic_info = {"local-rank": hvd.local_rank(), "rank": hvd.rank(), "size": hvd.size()}
 
     print(basic_info)
     json.dump(basic_info, f)
diff --git a/test/resources/hvdbasic/train_hvd_env_vars.py b/test/resources/hvdbasic/train_hvd_env_vars.py
new file mode 100644
index 00000000..da67367c
--- /dev/null
+++ b/test/resources/hvdbasic/train_hvd_env_vars.py
@@ -0,0 +1,19 @@
+import json
+import os
+import horovod.tensorflow as hvd
+
+hvd.init()
+
+with open("/opt/ml/model/local-rank-%s-rank-%s" % (hvd.local_rank(), hvd.rank()), "w+") as f:
+    basic_info = {"local-rank": hvd.local_rank(), "rank": hvd.rank(), "size": hvd.size()}
+
+    print(basic_info)
+    json.dump(basic_info, f)
+
+val = os.environ.get("AWS_CONTAINER_CREDENTIALS_RELATIVE_URI")
+host = os.environ.get("SM_CURRENT_HOST")
+
+assert val is not None
+assert host is not None
+
+print("host {}: AWS_CONTAINER_CREDENTIALS_RELATIVE_URI={}".format(host, val))
diff --git a/test/resources/keras_inception.py b/test/resources/keras_inception.py
deleted file mode 100644
index ebfd1a0e..00000000
--- a/test/resources/keras_inception.py
+++ /dev/null
@@ -1,38 +0,0 @@
-#  Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved.
-#
-#  Licensed under the Apache License, Version 2.0 (the "License").
-#  You may not use this file except in compliance with the License.
-#  A copy of the License is located at
-#
-#      http://www.apache.org/licenses/LICENSE-2.0
-#
-#  or in the "license" file accompanying this file. This file is distributed
-#  on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
-#  express or implied. See the License for the specific language governing
-#  permissions and limitations under the License.
-import argparse
-import os
-
-import keras
-import tensorflow as tf
-
-parser = argparse.ArgumentParser()
-parser.add_argument('--model_dir', type=str)
-
-args = parser.parse_args()
-
-
-# Loading pre-trained Keras model
-model = keras.applications.inception_v3.InceptionV3(weights='imagenet')
-
-# Exports the keras model as TensorFlow Serving Saved Model
-with tf.Session() as session:
-
-    init = tf.global_variables_initializer()
-    session.run(init)
-
-    tf.saved_model.simple_save(
-        session,
-        os.path.join(args.model_dir, 'inception-model/1'),
-        inputs={'input_image': model.input},
-        outputs={t.name: t for t in model.outputs})
diff --git a/test/resources/mnist/horovod_mnist.py b/test/resources/mnist/horovod_mnist.py
index 1014f2bb..f2bf4e8f 100644
--- a/test/resources/mnist/horovod_mnist.py
+++ b/test/resources/mnist/horovod_mnist.py
@@ -10,120 +10,84 @@
 # distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
 # ANY KIND, either express or implied. See the License for the specific
 # language governing permissions and limitations under the License.
-from __future__ import absolute_import, print_function
-
 import os
-import subprocess
-
-import keras
-from keras.datasets import mnist
-from keras.models import Sequential
-from keras.layers import Dense, Dropout, Flatten
-from keras.layers import Conv2D, MaxPooling2D
-from keras import backend as K
 import tensorflow as tf
-import horovod.keras as hvd
-
+import horovod.tensorflow as hvd
 
 # Horovod: initialize Horovod.
 hvd.init()
 
 # Horovod: pin GPU to be used to process local rank (one GPU per process)
-config = tf.ConfigProto()
-config.gpu_options.allow_growth = True
-config.gpu_options.visible_device_list = str(hvd.local_rank())
-K.set_session(tf.Session(config=config))
-
-batch_size = 128
-num_classes = 10
-
-epochs = 1
-
-# Input image dimensions
-img_rows, img_cols = 28, 28
-
-# The data, shuffled and split between train and test sets
-(x_train, y_train), (x_test, y_test) = mnist.load_data()
-
-x_train = x_train[:600]
-y_train = y_train[:600]
-x_test = x_test[:100]
-y_test = y_test[:100]
-
-if K.image_data_format() == 'channels_first':
-    x_train = x_train.reshape(x_train.shape[0], 1, img_rows, img_cols)
-    x_test = x_test.reshape(x_test.shape[0], 1, img_rows, img_cols)
-    input_shape = (1, img_rows, img_cols)
-else:
-    x_train = x_train.reshape(x_train.shape[0], img_rows, img_cols, 1)
-    x_test = x_test.reshape(x_test.shape[0], img_rows, img_cols, 1)
-    input_shape = (img_rows, img_cols, 1)
-
-x_train = x_train.astype('float32')
-x_test = x_test.astype('float32')
-x_train /= 255
-x_test /= 255
-print('x_train shape:', x_train.shape)
-print(x_train.shape[0], 'train samples')
-print(x_test.shape[0], 'test samples')
-
-# Convert class vectors to binary class matrices
-y_train = keras.utils.to_categorical(y_train, num_classes)
-y_test = keras.utils.to_categorical(y_test, num_classes)
-
-model = Sequential()
-model.add(Conv2D(32, kernel_size=(3, 3),
-                 activation='relu',
-                 input_shape=input_shape))
-model.add(Conv2D(64, (3, 3), activation='relu'))
-model.add(MaxPooling2D(pool_size=(2, 2)))
-model.add(Dropout(0.25))
-model.add(Flatten())
-model.add(Dense(128, activation='relu'))
-model.add(Dropout(0.5))
-model.add(Dense(num_classes, activation='softmax'))
+gpus = tf.config.experimental.list_physical_devices("GPU")
+for gpu in gpus:
+    tf.config.experimental.set_memory_growth(gpu, True)
+if gpus:
+    tf.config.experimental.set_visible_devices(gpus[hvd.local_rank()], "GPU")
+
+(mnist_images, mnist_labels), _ = tf.keras.datasets.mnist.load_data(
+    path="mnist-%d.npz" % hvd.rank()
+)
+
+dataset = tf.data.Dataset.from_tensor_slices(
+    (tf.cast(mnist_images[..., tf.newaxis] / 255.0, tf.float32), tf.cast(mnist_labels, tf.int64))
+)
+dataset = dataset.repeat().shuffle(10000).batch(128)
+
+mnist_model = tf.keras.Sequential(
+    [
+        tf.keras.layers.Conv2D(32, [3, 3], activation="relu"),
+        tf.keras.layers.Conv2D(64, [3, 3], activation="relu"),
+        tf.keras.layers.MaxPooling2D(pool_size=(2, 2)),
+        tf.keras.layers.Dropout(0.25),
+        tf.keras.layers.Flatten(),
+        tf.keras.layers.Dense(128, activation="relu"),
+        tf.keras.layers.Dropout(0.5),
+        tf.keras.layers.Dense(10, activation="softmax"),
+    ]
+)
+loss = tf.losses.SparseCategoricalCrossentropy()
 
 # Horovod: adjust learning rate based on number of GPUs.
-opt = keras.optimizers.Adadelta(1.0 * hvd.size())
+opt = tf.optimizers.Adam(0.001 * hvd.size())
+
+checkpoint_dir = "./checkpoints"
+checkpoint = tf.train.Checkpoint(model=mnist_model, optimizer=opt)
+
+
+@tf.function
+def training_step(images, labels, first_batch):
+    with tf.GradientTape() as tape:
+        probs = mnist_model(images, training=True)
+        loss_value = loss(labels, probs)
 
-# Horovod: add Horovod Distributed Optimizer.
-opt = hvd.DistributedOptimizer(opt)
+    # Horovod: add Horovod Distributed GradientTape.
+    tape = hvd.DistributedGradientTape(tape)
 
-model.compile(loss=keras.losses.categorical_crossentropy,
-              optimizer=opt,
-              metrics=['accuracy'])
+    grads = tape.gradient(loss_value, mnist_model.trainable_variables)
+    opt.apply_gradients(zip(grads, mnist_model.trainable_variables))
 
-callbacks = [
     # Horovod: broadcast initial variable states from rank 0 to all other processes.
     # This is necessary to ensure consistent initialization of all workers when
     # training is started with random weights or restored from a checkpoint.
-    hvd.callbacks.BroadcastGlobalVariablesCallback(0),
-]
+    #
+    # Note: broadcast should be done after the first gradient step to ensure optimizer
+    # initialization.
+    if first_batch:
+        hvd.broadcast_variables(mnist_model.variables, root_rank=0)
+        hvd.broadcast_variables(opt.variables(), root_rank=0)
 
-# Horovod: save checkpoints only on worker 0 to prevent other workers from corrupting them.
-if hvd.rank() == 0:
-    callbacks.append(keras.callbacks.ModelCheckpoint('./checkpoint-{epoch}.h5'))
-
-model.fit(x_train, y_train,
-          batch_size=batch_size,
-          callbacks=callbacks,
-          epochs=epochs,
-          verbose=1,
-          validation_data=(x_test, y_test))
-score = model.evaluate(x_test, y_test, verbose=0)
-print('Test loss:', score[0])
-print('Test accuracy:', score[1])
+    return loss_value
 
 
-if hvd.rank() == 0:
-    # Exports the keras model as TensorFlow Serving Saved Model
-    with K.get_session() as session:
+# Horovod: adjust number of steps based on number of GPUs.
+for batch, (images, labels) in enumerate(dataset.take(600 // hvd.size())):
+    loss_value = training_step(images, labels, batch == 0)
 
-        init = tf.global_variables_initializer()
-        session.run(init)
+    if batch % 10 == 0 and hvd.local_rank() == 0:
+        print("Step #%d\tLoss: %.6f" % (batch, loss_value))
 
-        tf.saved_model.simple_save(
-            session,
-            os.path.join('/opt/ml/model/mnist/1'),
-            inputs={'input_image': model.input},
-            outputs={t.name: t for t in model.outputs})
+# Horovod: save checkpoints only on worker 0 to prevent other workers from
+# corrupting it.
+if hvd.rank() == 0:
+    # Export the keras model as Tensorflow SavedModelBundle
+    mnist_model.save(os.path.join("/opt/ml/model/mnist/1"), save_format="tf")
diff --git a/test/resources/mnist/mnist.py b/test/resources/mnist/mnist.py
index e4349ce2..e1c2b275 100644
--- a/test/resources/mnist/mnist.py
+++ b/test/resources/mnist/mnist.py
@@ -7,63 +7,49 @@
 import tensorflow as tf
 
 
-
 def _parse_args():
 
     parser = argparse.ArgumentParser()
 
     # hyperparameters sent by the client are passed as command-line arguments to the script.
-    parser.add_argument('--epochs', type=int, default=1)
+    parser.add_argument("--epochs", type=int, default=1)
     # Data, model, and output directories
-    parser.add_argument('--model-dir', type=str, default=os.environ['SM_MODEL_DIR'])
-    parser.add_argument('--train', type=str, default=os.environ['SM_CHANNEL_TRAINING'])
-    parser.add_argument('--hosts', type=list, default=json.loads(os.environ['SM_HOSTS']))
-    parser.add_argument('--current-host', type=str, default=os.environ['SM_CURRENT_HOST'])
+    parser.add_argument("--model-dir", type=str, default=os.environ["SM_MODEL_DIR"])
+    parser.add_argument("--train", type=str, default=os.environ["SM_CHANNEL_TRAINING"])
+    parser.add_argument("--hosts", type=list, default=json.loads(os.environ["SM_HOSTS"]))
+    parser.add_argument("--current-host", type=str, default=os.environ["SM_CURRENT_HOST"])
 
     return parser.parse_known_args()
 
 
 def _load_training_data(base_dir):
-    x_train = np.load(os.path.join(base_dir, 'train', 'x_train.npy'))
-    y_train = np.load(os.path.join(base_dir, 'train', 'y_train.npy'))
+    x_train = np.load(os.path.join(base_dir, "train", "x_train.npy"))
+    y_train = np.load(os.path.join(base_dir, "train", "y_train.npy"))
     return x_train, y_train
 
 
 def _load_testing_data(base_dir):
-    x_test = np.load(os.path.join(base_dir, 'test', 'x_test.npy'))
-    y_test = np.load(os.path.join(base_dir, 'test', 'y_test.npy'))
+    x_test = np.load(os.path.join(base_dir, "test", "x_test.npy"))
+    y_test = np.load(os.path.join(base_dir, "test", "y_test.npy"))
     return x_test, y_test
 
 
-def assert_can_track_sagemaker_experiments():
-    in_sagemaker_training = 'TRAINING_JOB_ARN' in os.environ
-    in_python_three = sys.version_info[0] == 3
-
-    if in_sagemaker_training and in_python_three:
-        import smexperiments.tracker
-
-        with smexperiments.tracker.Tracker.load() as tracker:
-            tracker.log_parameter('param', 1)
-            tracker.log_metric('metric', 1.0)
-
-
 args, unknown = _parse_args()
 
-model = tf.keras.models.Sequential([
-  tf.keras.layers.Flatten(input_shape=(28, 28)),
-  tf.keras.layers.Dense(512, activation=tf.nn.relu),
-  tf.keras.layers.Dropout(0.2),
-  tf.keras.layers.Dense(10, activation=tf.nn.softmax)
-])
+model = tf.keras.models.Sequential(
+    [
+        tf.keras.layers.Flatten(input_shape=(28, 28)),
+        tf.keras.layers.Dense(512, activation=tf.nn.relu),
+        tf.keras.layers.Dropout(0.2),
+        tf.keras.layers.Dense(10, activation=tf.nn.softmax),
+    ]
+)
 
-model.compile(optimizer='adam',
-              loss='sparse_categorical_crossentropy',
-              metrics=['accuracy'])
+model.compile(optimizer="adam", loss="sparse_categorical_crossentropy", metrics=["accuracy"])
 x_train, y_train = _load_training_data(args.train)
 x_test, y_test = _load_testing_data(args.train)
 model.fit(x_train, y_train, epochs=args.epochs)
 model.evaluate(x_test, y_test)
 
 if args.current_host == args.hosts[0]:
-    model.save(os.path.join('/opt/ml/model', 'my_model.h5'))
-    assert_can_track_sagemaker_experiments()
+    model.save(os.path.join("/opt/ml/model", "my_model.h5"))
diff --git a/test/resources/mnist/mnist_estimator.py b/test/resources/mnist/mnist_estimator.py
index d0b991f2..82fb75ac 100644
--- a/test/resources/mnist/mnist_estimator.py
+++ b/test/resources/mnist/mnist_estimator.py
@@ -4,171 +4,176 @@
 from __future__ import division
 from __future__ import print_function
 
+import logging
+
 import numpy as np
 import tensorflow as tf
 import os
 import argparse
 import json
 
+
 def cnn_model_fn(features, labels, mode):
-  """Model function for CNN."""
-  # Input Layer
-  # Reshape X to 4-D tensor: [batch_size, width, height, channels]
-  # MNIST images are 28x28 pixels, and have one color channel
-  input_layer = tf.reshape(features["x"], [-1, 28, 28, 1])
-
-  # Convolutional Layer #1
-  # Computes 32 features using a 5x5 filter with ReLU activation.
-  # Padding is added to preserve width and height.
-  # Input Tensor Shape: [batch_size, 28, 28, 1]
-  # Output Tensor Shape: [batch_size, 28, 28, 32]
-  conv1 = tf.layers.conv2d(
-      inputs=input_layer,
-      filters=32,
-      kernel_size=[5, 5],
-      padding="same",
-      activation=tf.nn.relu)
-
-  # Pooling Layer #1
-  # First max pooling layer with a 2x2 filter and stride of 2
-  # Input Tensor Shape: [batch_size, 28, 28, 32]
-  # Output Tensor Shape: [batch_size, 14, 14, 32]
-  pool1 = tf.layers.max_pooling2d(inputs=conv1, pool_size=[2, 2], strides=2)
-
-  # Convolutional Layer #2
-  # Computes 64 features using a 5x5 filter.
-  # Padding is added to preserve width and height.
-  # Input Tensor Shape: [batch_size, 14, 14, 32]
-  # Output Tensor Shape: [batch_size, 14, 14, 64]
-  conv2 = tf.layers.conv2d(
-      inputs=pool1,
-      filters=64,
-      kernel_size=[5, 5],
-      padding="same",
-      activation=tf.nn.relu)
-
-  # Pooling Layer #2
-  # Second max pooling layer with a 2x2 filter and stride of 2
-  # Input Tensor Shape: [batch_size, 14, 14, 64]
-  # Output Tensor Shape: [batch_size, 7, 7, 64]
-  pool2 = tf.layers.max_pooling2d(inputs=conv2, pool_size=[2, 2], strides=2)
-
-  # Flatten tensor into a batch of vectors
-  # Input Tensor Shape: [batch_size, 7, 7, 64]
-  # Output Tensor Shape: [batch_size, 7 * 7 * 64]
-  pool2_flat = tf.reshape(pool2, [-1, 7 * 7 * 64])
-
-  # Dense Layer
-  # Densely connected layer with 1024 neurons
-  # Input Tensor Shape: [batch_size, 7 * 7 * 64]
-  # Output Tensor Shape: [batch_size, 1024]
-  dense = tf.layers.dense(inputs=pool2_flat, units=1024, activation=tf.nn.relu)
-
-  # Add dropout operation; 0.6 probability that element will be kept
-  dropout = tf.layers.dropout(
-      inputs=dense, rate=0.4, training=mode == tf.estimator.ModeKeys.TRAIN)
-
-  # Logits layer
-  # Input Tensor Shape: [batch_size, 1024]
-  # Output Tensor Shape: [batch_size, 10]
-  logits = tf.layers.dense(inputs=dropout, units=10)
-
-  predictions = {
-      # Generate predictions (for PREDICT and EVAL mode)
-      "classes": tf.argmax(input=logits, axis=1),
-      # Add `softmax_tensor` to the graph. It is used for PREDICT and by the
-      # `logging_hook`.
-      "probabilities": tf.nn.softmax(logits, name="softmax_tensor")
-  }
-  if mode == tf.estimator.ModeKeys.PREDICT:
-    return tf.estimator.EstimatorSpec(mode=mode, predictions=predictions)
-
-  # Calculate Loss (for both TRAIN and EVAL modes)
-  loss = tf.losses.sparse_softmax_cross_entropy(labels=labels, logits=logits)
-
-  # Configure the Training Op (for TRAIN mode)
-  if mode == tf.estimator.ModeKeys.TRAIN:
-    optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.001)
-    train_op = optimizer.minimize(
-        loss=loss,
-        global_step=tf.train.get_global_step())
-    return tf.estimator.EstimatorSpec(mode=mode, loss=loss, train_op=train_op)
-
-  # Add evaluation metrics (for EVAL mode)
-  eval_metric_ops = {
-      "accuracy": tf.metrics.accuracy(
-          labels=labels, predictions=predictions["classes"])}
-  return tf.estimator.EstimatorSpec(
-      mode=mode, loss=loss, eval_metric_ops=eval_metric_ops)
+    """Model function for CNN."""
+    # Input Layer
+    # Reshape X to 4-D tensor: [batch_size, width, height, channels]
+    # MNIST images are 28x28 pixels, and have one color channel
+    input_layer = tf.reshape(features["x"], [-1, 28, 28, 1])
+
+    # Convolutional Layer #1
+    # Computes 32 features using a 5x5 filter with ReLU activation.
+    # Padding is added to preserve width and height.
+    # Input Tensor Shape: [batch_size, 28, 28, 1]
+    # Output Tensor Shape: [batch_size, 28, 28, 32]
+    conv1 = tf.compat.v1.layers.conv2d(
+        inputs=input_layer, filters=32, kernel_size=[5, 5], padding="same", activation=tf.nn.relu
+    )
+
+    # Pooling Layer #1
+    # First max pooling layer with a 2x2 filter and stride of 2
+    # Input Tensor Shape: [batch_size, 28, 28, 32]
+    # Output Tensor Shape: [batch_size, 14, 14, 32]
+    pool1 = tf.compat.v1.layers.max_pooling2d(inputs=conv1, pool_size=[2, 2], strides=2)
+
+    # Convolutional Layer #2
+    # Computes 64 features using a 5x5 filter.
+    # Padding is added to preserve width and height.
+    # Input Tensor Shape: [batch_size, 14, 14, 32]
+    # Output Tensor Shape: [batch_size, 14, 14, 64]
+    conv2 = tf.compat.v1.layers.conv2d(
+        inputs=pool1, filters=64, kernel_size=[5, 5], padding="same", activation=tf.nn.relu
+    )
+
+    # Pooling Layer #2
+    # Second max pooling layer with a 2x2 filter and stride of 2
+    # Input Tensor Shape: [batch_size, 14, 14, 64]
+    # Output Tensor Shape: [batch_size, 7, 7, 64]
+    pool2 = tf.compat.v1.layers.max_pooling2d(inputs=conv2, pool_size=[2, 2], strides=2)
+
+    # Flatten tensor into a batch of vectors
+    # Input Tensor Shape: [batch_size, 7, 7, 64]
+    # Output Tensor Shape: [batch_size, 7 * 7 * 64]
+    pool2_flat = tf.reshape(pool2, [-1, 7 * 7 * 64])
+
+    # Dense Layer
+    # Densely connected layer with 1024 neurons
+    # Input Tensor Shape: [batch_size, 7 * 7 * 64]
+    # Output Tensor Shape: [batch_size, 1024]
+    dense = tf.compat.v1.layers.dense(inputs=pool2_flat, units=1024, activation=tf.nn.relu)
+
+    # Add dropout operation; 0.6 probability that element will be kept
+    dropout = tf.compat.v1.layers.dropout(
+        inputs=dense, rate=0.4, training=mode == tf.estimator.ModeKeys.TRAIN
+    )
+
+    # Logits layer
+    # Input Tensor Shape: [batch_size, 1024]
+    # Output Tensor Shape: [batch_size, 10]
+    logits = tf.compat.v1.layers.dense(inputs=dropout, units=10)
+
+    predictions = {
+        # Generate predictions (for PREDICT and EVAL mode)
+        "classes": tf.argmax(input=logits, axis=1),
+        # Add `softmax_tensor` to the graph. It is used for PREDICT and by the
+        # `logging_hook`.
+        "probabilities": tf.nn.softmax(logits, name="softmax_tensor"),
+    }
+    if mode == tf.estimator.ModeKeys.PREDICT:
+        return tf.estimator.EstimatorSpec(mode=mode, predictions=predictions)
+
+    # Calculate Loss (for both TRAIN and EVAL modes)
+    loss = tf.compat.v1.losses.sparse_softmax_cross_entropy(labels=labels, logits=logits)
+
+    # Configure the Training Op (for TRAIN mode)
+    if mode == tf.estimator.ModeKeys.TRAIN:
+        optimizer = tf.compat.v1.train.GradientDescentOptimizer(learning_rate=0.001)
+        train_op = optimizer.minimize(loss=loss, global_step=tf.compat.v1.train.get_global_step())
+        return tf.estimator.EstimatorSpec(mode=mode, loss=loss, train_op=train_op)
+
+    # Add evaluation metrics (for EVAL mode)
+    eval_metric_ops = {
+        "accuracy": tf.compat.v1.metrics.accuracy(labels=labels, predictions=predictions["classes"])
+    }
+    return tf.estimator.EstimatorSpec(mode=mode, loss=loss, eval_metric_ops=eval_metric_ops)
+
 
 def _load_training_data(base_dir):
-    x_train = np.load(os.path.join(base_dir, 'train_data.npy'))
-    y_train = np.load(os.path.join(base_dir, 'train_labels.npy'))
+    x_train = np.load(os.path.join(base_dir, "train_data.npy"))
+    y_train = np.load(os.path.join(base_dir, "train_labels.npy"))
     return x_train, y_train
 
+
 def _load_testing_data(base_dir):
-    x_test = np.load(os.path.join(base_dir, 'eval_data.npy'))
-    y_test = np.load(os.path.join(base_dir, 'eval_labels.npy'))
+    x_test = np.load(os.path.join(base_dir, "eval_data.npy"))
+    y_test = np.load(os.path.join(base_dir, "eval_labels.npy"))
     return x_test, y_test
 
+
 def _parse_args():
     parser = argparse.ArgumentParser()
-    parser.add_argument('--train', type=str, default=os.environ['SM_CHANNEL_TRAINING'])
-    parser.add_argument('--model_dir', type=str)
-    parser.add_argument('--max-steps', type=int, default=200)
-    parser.add_argument('--save-checkpoint-steps', type=int, default=200)
-    parser.add_argument('--throttle-secs', type=int, default=60)
-    parser.add_argument('--hosts', type=list, default=json.loads(os.environ['SM_HOSTS']))
-    parser.add_argument('--current-host', type=str, default=os.environ['SM_CURRENT_HOST'])
-    parser.add_argument('--batch-size', type=int, default=100)
-    parser.add_argument('--export-model-during-training', type=bool, default=False)
+    parser.add_argument("--train", type=str, default=os.environ["SM_CHANNEL_TRAINING"])
+    parser.add_argument("--model_dir", type=str)
+    parser.add_argument("--max-steps", type=int, default=200)
+    parser.add_argument("--save-checkpoint-steps", type=int, default=200)
+    parser.add_argument("--throttle-secs", type=int, default=60)
+    parser.add_argument("--hosts", type=list, default=json.loads(os.environ["SM_HOSTS"]))
+    parser.add_argument("--current-host", type=str, default=os.environ["SM_CURRENT_HOST"])
+    parser.add_argument("--batch-size", type=int, default=100)
+    parser.add_argument("--export-model-during-training", type=bool, default=False)
     return parser.parse_known_args()
 
+
 def serving_input_fn():
-    inputs = {'x': tf.placeholder(tf.float32, [None, 784])}
+    inputs = {"x": tf.compat.v1.placeholder(tf.float32, [None, 784])}
     return tf.estimator.export.ServingInputReceiver(inputs, inputs)
 
+
 if __name__ == "__main__":
     args, unknown = _parse_args()
     for arg in vars(args):
         print(arg, getattr(args, arg))
 
-    tf.logging.set_verbosity(tf.logging.DEBUG)
+    logger = tf.get_logger()
+    logger.setLevel(logging.DEBUG)
+    # tf.logging.set_verbosity(tf.logging.DEBUG)
     train_data, train_labels = _load_training_data(args.train)
     eval_data, eval_labels = _load_testing_data(args.train)
 
     # Saving a checkpoint after every step
     run_config = tf.estimator.RunConfig(save_checkpoints_steps=args.save_checkpoint_steps)
     mnist_classifier = tf.estimator.Estimator(
-        model_fn=cnn_model_fn, model_dir=args.model_dir, config=run_config)
+        model_fn=cnn_model_fn, model_dir=args.model_dir, config=run_config
+    )
 
     # Set up logging for predictions
     # Log the values in the "Softmax" tensor with label "probabilities"
     tensors_to_log = {"probabilities": "softmax_tensor"}
-    logging_hook = tf.train.LoggingTensorHook(
-        tensors=tensors_to_log, every_n_iter=50
-    )
+    logging_hook = tf.estimator.LoggingTensorHook(tensors=tensors_to_log, every_n_iter=50)
 
     # Train the model
-    train_input_fn = tf.estimator.inputs.numpy_input_fn(
+    train_input_fn = tf.compat.v1.estimator.inputs.numpy_input_fn(
         x={"x": train_data},
         y=train_labels,
         batch_size=args.batch_size,
         num_epochs=None,
-        shuffle=True)
+        shuffle=True,
+    )
 
-    exporter = tf.estimator.LatestExporter('Servo', serving_input_receiver_fn=serving_input_fn) \
-        if args.export_model_during_training else None
+    exporter = (
+        tf.compat.v1.estimator.LatestExporter("Servo", serving_input_receiver_fn=serving_input_fn)
+        if args.export_model_during_training
+        else None
+    )
     # Evaluate the model and print results
-    eval_input_fn = tf.estimator.inputs.numpy_input_fn(
-        x={"x": eval_data},
-        y=eval_labels,
-        num_epochs=1,
-        shuffle=False)
+    eval_input_fn = tf.compat.v1.estimator.inputs.numpy_input_fn(
+        x={"x": eval_data}, y=eval_labels, num_epochs=1, shuffle=False
+    )
 
     train_spec = tf.estimator.TrainSpec(train_input_fn, max_steps=args.max_steps)
-    eval_spec = tf.estimator.EvalSpec(eval_input_fn, throttle_secs=args.throttle_secs, exporters=exporter)
+    eval_spec = tf.estimator.EvalSpec(
+        eval_input_fn, throttle_secs=args.throttle_secs, exporters=exporter
+    )
     tf.estimator.train_and_evaluate(mnist_classifier, train_spec, eval_spec)
 
     if args.current_host == args.hosts[0]:
-        mnist_classifier.export_savedmodel('/opt/ml/model', serving_input_fn)
+        mnist_classifier.export_saved_model("/opt/ml/model", serving_input_fn)
diff --git a/test/resources/tuning_model_dir/entry.py b/test/resources/tuning_model_dir/entry.py
index 0bce7165..09d44abc 100644
--- a/test/resources/tuning_model_dir/entry.py
+++ b/test/resources/tuning_model_dir/entry.py
@@ -16,11 +16,13 @@
 import os
 
 parser = argparse.ArgumentParser()
-parser.add_argument('--model_dir', type=str)
-parser.add_argument('--arbitrary_value', type=int, default=0)
+parser.add_argument("--model_dir", type=str)
+parser.add_argument("--arbitrary_value", type=int, default=0)
 args = parser.parse_args()
 
-assert os.environ['TRAINING_JOB_NAME'] in args.model_dir, 'model_dir not unique to training job: %s' % args.model_dir
+assert os.environ["TRAINING_JOB_NAME"] in args.model_dir, (
+    "model_dir not unique to training job: %s" % args.model_dir
+)
 
 # For the "hyperparameter tuning" to work
-print('accuracy=1')
+print("accuracy=1")
diff --git a/test/unit/test_deep_learning_container.py b/test/unit/test_deep_learning_container.py
deleted file mode 100644
index 7d5d7d86..00000000
--- a/test/unit/test_deep_learning_container.py
+++ /dev/null
@@ -1,157 +0,0 @@
-# Copyright 2018-2020 Amazon.com, Inc. or its affiliates. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the 'License'). You
-# may not use this file except in compliance with the License. A copy of
-# the License is located at
-#
-#     http://aws.amazon.com/apache2.0/
-#
-# or in the 'license' file accompanying this file. This file is
-# distributed on an 'AS IS' BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
-# ANY KIND, either express or implied. See the License for the specific
-# language governing permissions and limitations under the License.
-from __future__ import absolute_import
-
-import unittest
-
-from docker.build_artifacts import deep_learning_container as deep_learning_container_to_test
-import pytest
-import requests
-
-
-@pytest.fixture(name='fixture_valid_instance_id')
-def fixture_valid_instance_id(requests_mock):
-    return requests_mock.get('http://169.254.169.254/latest/meta-data/instance-id',
-                             text='i-123t32e11s32t1231')
-
-
-@pytest.fixture(name='fixture_invalid_instance_id')
-def fixture_invalid_instance_id(requests_mock):
-    return requests_mock.get('http://169.254.169.254/latest/meta-data/instance-id', text='i-123')
-
-
-@pytest.fixture(name='fixture_none_instance_id')
-def fixture_none_instance_id(requests_mock):
-    return requests_mock.get('http://169.254.169.254/latest/meta-data/instance-id', text=None)
-
-
-@pytest.fixture(name='fixture_invalid_region')
-def fixture_invalid_region(requests_mock):
-    return requests_mock.get('http://169.254.169.254/latest/dynamic/instance-identity/document',
-                             json={'region': 'test'})
-
-
-@pytest.fixture(name='fixture_valid_region')
-def fixture_valid_region(requests_mock):
-    return requests_mock.get('http://169.254.169.254/latest/dynamic/instance-identity/document',
-                             json={'region': 'us-east-1'})
-
-
-def test_retrieve_instance_id(fixture_valid_instance_id):
-    result = deep_learning_container_to_test._retrieve_instance_id()
-    assert 'i-123t32e11s32t1231' == result
-
-
-def test_retrieve_none_instance_id(fixture_none_instance_id):
-    result = deep_learning_container_to_test._retrieve_instance_id()
-    assert result is None
-
-
-def test_retrieve_invalid_instance_id(fixture_invalid_instance_id):
-    result = deep_learning_container_to_test._retrieve_instance_id()
-    assert result is None
-
-
-def test_retrieve_invalid_region(fixture_invalid_region):
-    result = deep_learning_container_to_test._retrieve_instance_region()
-    assert result is None
-
-
-def test_retrieve_valid_region(fixture_valid_region):
-    result = deep_learning_container_to_test._retrieve_instance_region()
-    assert 'us-east-1' == result
-
-
-def test_query_bucket(requests_mock, fixture_valid_region, fixture_valid_instance_id):
-    fixture_valid_instance_id.return_value = 'i-123t32e11s32t1231'
-    fixture_valid_region.return_value = 'us-east-1'
-    requests_mock.get(('https://aws-deep-learning-containers-us-east-1.s3.us-east-1.amazonaws.com'
-                       '/dlc-containers.txt?x-instance-id=i-123t32e11s32t1231'),
-                      text='Access Denied')
-    actual_response = deep_learning_container_to_test.query_bucket()
-    assert 'Access Denied' == actual_response.text
-
-
-def test_query_bucket_region_none(fixture_invalid_region, fixture_valid_instance_id):
-    fixture_valid_instance_id.return_value = 'i-123t32e11s32t1231'
-    fixture_invalid_region.return_value = None
-    actual_response = deep_learning_container_to_test.query_bucket()
-    assert actual_response is None
-
-
-def test_query_bucket_instance_id_none(requests_mock, fixture_valid_region, fixture_none_instance_id):
-    fixture_none_instance_id.return_value = None
-    fixture_valid_region.return_value = 'us-east-1'
-    actual_response = deep_learning_container_to_test.query_bucket()
-    assert actual_response is None
-
-
-def test_query_bucket_instance_id_invalid(requests_mock, fixture_valid_region, fixture_invalid_instance_id):
-    fixture_invalid_instance_id.return_value = None
-    fixture_valid_region.return_value = 'us-east-1'
-    actual_response = deep_learning_container_to_test.query_bucket()
-    assert actual_response is None
-
-
-def test_HTTP_error_on_S3(requests_mock, fixture_valid_region, fixture_valid_instance_id):
-    fixture_valid_instance_id.return_value = 'i-123t32e11s32t1231'
-    fixture_valid_region.return_value = 'us-east-1'
-    query_s3_url = ('https://aws-deep-learning-containers-us-east-1.s3.us-east-1.amazonaws.com'
-                    '/dlc-containers.txt?x-instance-id=i-123t32e11s32t1231')
-
-    requests_mock.get(
-        query_s3_url,
-        exc=requests.exceptions.HTTPError)
-    requests_mock.side_effect = requests.exceptions.HTTPError
-
-    with pytest.raises(requests.exceptions.HTTPError):
-        actual_response = requests.get(query_s3_url)
-        assert actual_response is None
-
-
-def test_connection_error_on_S3(requests_mock, fixture_valid_region, fixture_valid_instance_id):
-    fixture_valid_instance_id.return_value = 'i-123t32e11s32t1231'
-    fixture_valid_region.return_value = 'us-east-1'
-    query_s3_url = ('https://aws-deep-learning-containers-us-east-1.s3.us-east-1.amazonaws.com'
-                    '/dlc-containers.txt?x-instance-id=i-123t32e11s32t1231')
-
-    requests_mock.get(
-        query_s3_url,
-        exc=requests.exceptions.ConnectionError)
-
-    with pytest.raises(requests.exceptions.ConnectionError):
-        actual_response = requests.get(
-            query_s3_url)
-
-        assert actual_response is None
-
-
-def test_timeout_error_on_S3(requests_mock, fixture_valid_region, fixture_valid_instance_id):
-    fixture_valid_instance_id.return_value = 'i-123t32e11s32t1231'
-    fixture_valid_region.return_value = 'us-east-1'
-    query_s3_url = ('https://aws-deep-learning-containers-us-east-1.s3.us-east-1.amazonaws.com'
-                    '/dlc-containers.txt?x-instance-id=i-123t32e11s32t1231')
-
-    requests_mock.get(
-        query_s3_url,
-        exc=requests.Timeout)
-
-    with pytest.raises(requests.exceptions.Timeout):
-        actual_response = requests.get(
-            query_s3_url)
-
-        assert actual_response is None
-
-
-if __name__ == '__main__':
-    unittest.main()
diff --git a/test/unit/test_s3_utils.py b/test/unit/test_s3_utils.py
index 03de70a3..2bd63bf8 100644
--- a/test/unit/test_s3_utils.py
+++ b/test/unit/test_s3_utils.py
@@ -19,30 +19,30 @@
 from sagemaker_tensorflow_container import s3_utils
 
 
-BUCKET_REGION = 'us-west-2'
-JOB_REGION = 'us-west-1'
-JOB_BUKCET = 'sagemaker-us-west-2-000-00-1'
-PREFIX = 'sagemaker/something'
-MODEL_DIR = 's3://{}/{}'.format(JOB_BUKCET, PREFIX)
+BUCKET_REGION = "us-west-2"
+JOB_REGION = "us-west-1"
+JOB_BUKCET = "sagemaker-us-west-2-000-00-1"
+PREFIX = "sagemaker/something"
+MODEL_DIR = "s3://{}/{}".format(JOB_BUKCET, PREFIX)
 
 
-@patch('boto3.client')
+@patch("boto3.client")
 def test_configure(client):
     s3 = MagicMock()
     client.return_value = s3
-    loc = {'LocationConstraint': BUCKET_REGION}
+    loc = {"LocationConstraint": BUCKET_REGION}
     s3.get_bucket_location.return_value = loc
 
     s3_utils.configure(MODEL_DIR, JOB_REGION)
 
-    assert os.environ['S3_REGION'] == BUCKET_REGION
-    assert os.environ['TF_CPP_MIN_LOG_LEVEL'] == '1'
-    assert os.environ['S3_USE_HTTPS'] == '1'
+    assert os.environ["S3_REGION"] == BUCKET_REGION
+    assert os.environ["TF_CPP_MIN_LOG_LEVEL"] == "1"
+    assert os.environ["S3_USE_HTTPS"] == "1"
 
 
 def test_configure_local_dir():
-    s3_utils.configure('/opt/ml/model', JOB_REGION)
+    s3_utils.configure("/opt/ml/model", JOB_REGION)
 
-    assert os.environ['S3_REGION'] == JOB_REGION
-    assert os.environ['TF_CPP_MIN_LOG_LEVEL'] == '1'
-    assert os.environ['S3_USE_HTTPS'] == '1'
+    assert os.environ["S3_REGION"] == JOB_REGION
+    assert os.environ["TF_CPP_MIN_LOG_LEVEL"] == "1"
+    assert os.environ["S3_USE_HTTPS"] == "1"
diff --git a/test/unit/test_training.py b/test/unit/test_training.py
index b69beed2..2795af44 100644
--- a/test/unit/test_training.py
+++ b/test/unit/test_training.py
@@ -17,32 +17,32 @@
 
 from mock import MagicMock, patch
 import pytest
-from sagemaker_containers.beta.framework import runner
+from sagemaker_training import runner
 import tensorflow as tf
 
 from sagemaker_tensorflow_container import training
 
-MODULE_DIR = 's3://my/bucket'
-MODULE_NAME = 'script_name'
-LOG_LEVEL = 'Debug'
-HOST1 = 'host1'
-HOST2 = 'host2'
+MODULE_DIR = "s3://my/bucket"
+MODULE_NAME = "script_name"
+LOG_LEVEL = "Debug"
+HOST1 = "host1"
+HOST2 = "host2"
 HOST_LIST = [HOST1, HOST2]
 CURRENT_HOST = HOST1
-CMD_ARGS = {'some_key': 'some_value'}
+CMD_ARGS = {"some_key": "some_value"}
 CLUSTER_WITH_PS = {
-    'master': ['{}:2222'.format(HOST1)],
-    'worker': ['{}:2222'.format(HOST2)],
-    'ps': ['{}:2223'.format(HOST1), '{}:2223'.format(HOST2)]
+    "master": ["{}:2222".format(HOST1)],
+    "worker": ["{}:2222".format(HOST2)],
+    "ps": ["{}:2223".format(HOST1), "{}:2223".format(HOST2)],
 }
-MASTER_TASK = {'index': 0, 'type': 'master'}
-WORKER_TASK = {'index': 0, 'type': 'worker'}
-PS_TASK_1 = {'index': 0, 'type': 'ps'}
-PS_TASK_2 = {'index': 1, 'type': 'ps'}
-MODEL_DIR = 's3://bucket/prefix'
-MODEL_DIR_CMD_LIST = ['--model_dir', MODEL_DIR]
-REGION = 'us-west-2'
-RESOURCE_PATH = os.path.join(os.path.dirname(__file__), '..', 'resources')
+MASTER_TASK = {"index": 0, "type": "master"}
+WORKER_TASK = {"index": 0, "type": "worker"}
+PS_TASK_1 = {"index": 0, "type": "ps"}
+PS_TASK_2 = {"index": 1, "type": "ps"}
+MODEL_DIR = "s3://bucket/prefix"
+MODEL_DIR_CMD_LIST = ["--model_dir", MODEL_DIR]
+REGION = "us-west-2"
+RESOURCE_PATH = os.path.join(os.path.dirname(__file__), "..", "resources")
 
 
 @pytest.fixture
@@ -50,9 +50,7 @@ def distributed_training_env():
     env = simple_training_env()
 
     env.hosts = HOST_LIST
-    env.additional_framework_parameters = {
-        training.SAGEMAKER_PARAMETER_SERVER_ENABLED: True
-    }
+    env.additional_framework_parameters = {training.SAGEMAKER_PARAMETER_SERVER_ENABLED: True}
     return env
 
 
@@ -65,187 +63,238 @@ def simple_training_env():
     env = MagicMock()
     env.module_dir = MODULE_DIR
     env.user_entry_point = MODULE_NAME
-    env.hyperparameters = {'model_dir': MODEL_DIR}
+    env.hyperparameters = {"model_dir": MODEL_DIR}
     env.log_level = LOG_LEVEL
     env.additional_framework_parameters = {}
     env.hosts = CURRENT_HOST
     env.current_host = CURRENT_HOST
     env.to_env_vars = lambda: {}
-    env.job_name = 'test-training-job'
+    env.job_name = "test-training-job"
     return env
 
 
 def test_is_host_master():
     assert training._is_host_master(HOST_LIST, CURRENT_HOST) is True
-    assert training._is_host_master(HOST_LIST, 'host2') is False
-    assert training._is_host_master(HOST_LIST, 'somehost') is False
+    assert training._is_host_master(HOST_LIST, "host2") is False
+    assert training._is_host_master(HOST_LIST, "somehost") is False
 
 
-@patch('sagemaker_containers.beta.framework.entry_point.run')
+@patch("sagemaker_training.entry_point.run")
 def test_single_machine(run_module, single_machine_training_env):
     training.train(single_machine_training_env, MODEL_DIR_CMD_LIST)
-    run_module.assert_called_with(MODULE_DIR, MODULE_NAME, MODEL_DIR_CMD_LIST,
-                                  single_machine_training_env.to_env_vars(),
-                                  runner=runner.ProcessRunnerType)
+    run_module.assert_called_with(
+        uri=MODULE_DIR,
+        user_entry_point=MODULE_NAME,
+        args=MODEL_DIR_CMD_LIST,
+        env_vars=single_machine_training_env.to_env_vars(),
+        capture_error=True,
+        runner_type=runner.ProcessRunnerType,
+    )
 
 
-@patch('sagemaker_containers.beta.framework.entry_point.run')
+@patch("sagemaker_training.entry_point.run")
 def test_train_horovod(run_module, single_machine_training_env):
-    single_machine_training_env.additional_framework_parameters['sagemaker_mpi_enabled'] = True
+    single_machine_training_env.additional_framework_parameters["sagemaker_mpi_enabled"] = True
 
     training.train(single_machine_training_env, MODEL_DIR_CMD_LIST)
-    run_module.assert_called_with(MODULE_DIR, MODULE_NAME, MODEL_DIR_CMD_LIST,
-                                  single_machine_training_env.to_env_vars(),
-                                  runner=runner.MPIRunnerType)
-
-
-@pytest.mark.skipif(sys.version_info.major != 3,
-                    reason="Skip this for python 2 because of dict key order mismatch")
-@patch('tensorflow.train.ClusterSpec')
-@patch('tensorflow.train.Server')
-@patch('sagemaker_containers.beta.framework.entry_point.run')
-@patch('multiprocessing.Process', lambda target: target())
-@patch('time.sleep', MagicMock())
+    run_module.assert_called_with(
+        uri=MODULE_DIR,
+        user_entry_point=MODULE_NAME,
+        args=MODEL_DIR_CMD_LIST,
+        env_vars=single_machine_training_env.to_env_vars(),
+        capture_error=True,
+        runner_type=runner.MPIRunnerType,
+    )
+
+
+@pytest.mark.skip_on_pipeline
+@pytest.mark.skipif(
+    sys.version_info.major != 3, reason="Skip this for python 2 because of dict key order mismatch"
+)
+@patch("tensorflow.train.ClusterSpec")
+@patch("tensorflow.train.Server")
+@patch("sagemaker_training.entry_point.run")
+@patch("multiprocessing.Process", lambda target: target())
+@patch("time.sleep", MagicMock())
 def test_train_distributed_master(run, tf_server, cluster_spec, distributed_training_env):
     training.train(distributed_training_env, MODEL_DIR_CMD_LIST)
 
-    cluster_spec.assert_called_with({'worker': ['host2:2222'],
-                                     'master': ['host1:2222'],
-                                     'ps': ['host1:2223', 'host2:2223']})
+    cluster_spec.assert_called_with(
+        {"worker": ["host2:2222"], "master": ["host1:2222"], "ps": ["host1:2223", "host2:2223"]}
+    )
 
     tf_server.assert_called_with(
-        cluster_spec(), job_name='ps', task_index=0, config=tf.ConfigProto(device_count={'GPU': 0})
+        cluster_spec(), job_name="ps", task_index=0, config=tf.ConfigProto(device_count={"GPU": 0})
     )
     tf_server().join.assert_called_with()
 
-    tf_config = '{"cluster": {' \
-                '"master": ["host1:2222"], ' \
-                '"ps": ["host1:2223", "host2:2223"], ' \
-                '"worker": ["host2:2222"]}, ' \
-                '"environment": "cloud", ' \
-                '"task": {"index": 0, "type": "master"}}'
+    tf_config = (
+        '{"cluster": {'
+        '"master": ["host1:2222"], '
+        '"ps": ["host1:2223", "host2:2223"], '
+        '"worker": ["host2:2222"]}, '
+        '"environment": "cloud", '
+        '"task": {"index": 0, "type": "master"}}'
+    )
 
-    run.assert_called_with('s3://my/bucket', 'script_name', MODEL_DIR_CMD_LIST,
-                           {'TF_CONFIG': tf_config})
+    run.assert_called_with(
+        uri="s3://my/bucket",
+        user_entry_point="script_name",
+        args=MODEL_DIR_CMD_LIST,
+        env_vars={"TF_CONFIG": tf_config},
+        capture_error=True,
+    )
 
 
-@pytest.mark.skipif(sys.version_info.major != 3,
-                    reason="Skip this for python 2 because of dict key order mismatch")
-@patch('tensorflow.train.ClusterSpec')
-@patch('tensorflow.train.Server')
-@patch('sagemaker_containers.beta.framework.entry_point.run')
-@patch('multiprocessing.Process', lambda target: target())
-@patch('time.sleep', MagicMock())
+@pytest.mark.skip_on_pipeline
+@pytest.mark.skipif(
+    sys.version_info.major != 3, reason="Skip this for python 2 because of dict key order mismatch"
+)
+@patch("tensorflow.train.ClusterSpec")
+@patch("tensorflow.train.Server")
+@patch("sagemaker_training.entry_point.run")
+@patch("multiprocessing.Process", lambda target: target())
+@patch("time.sleep", MagicMock())
 def test_train_distributed_worker(run, tf_server, cluster_spec, distributed_training_env):
     distributed_training_env.current_host = HOST2
 
     training.train(distributed_training_env, MODEL_DIR_CMD_LIST)
 
-    cluster_spec.assert_called_with({'worker': ['host2:2222'],
-                                     'master': ['host1:2222'],
-                                     'ps': ['host1:2223', 'host2:2223']})
+    cluster_spec.assert_called_with(
+        {"worker": ["host2:2222"], "master": ["host1:2222"], "ps": ["host1:2223", "host2:2223"]}
+    )
 
     tf_server.assert_called_with(
-        cluster_spec(), job_name='ps', task_index=1, config=tf.ConfigProto(device_count={'GPU': 0})
+        cluster_spec(), job_name="ps", task_index=1, config=tf.ConfigProto(device_count={"GPU": 0})
     )
     tf_server().join.assert_called_with()
 
-    tf_config = '{"cluster": {' \
-                '"master": ["host1:2222"], ' \
-                '"ps": ["host1:2223", "host2:2223"], ' \
-                '"worker": ["host2:2222"]}, ' \
-                '"environment": "cloud", ' \
-                '"task": {"index": 0, "type": "worker"}}'
+    tf_config = (
+        '{"cluster": {'
+        '"master": ["host1:2222"], '
+        '"ps": ["host1:2223", "host2:2223"], '
+        '"worker": ["host2:2222"]}, '
+        '"environment": "cloud", '
+        '"task": {"index": 0, "type": "worker"}}'
+    )
 
-    run.assert_called_with('s3://my/bucket', 'script_name', MODEL_DIR_CMD_LIST,
-                           {'TF_CONFIG': tf_config})
+    run.assert_called_with(
+        uri="s3://my/bucket",
+        user_entry_point="script_name",
+        args=MODEL_DIR_CMD_LIST,
+        env_vars={"TF_CONFIG": tf_config},
+        capture_error=True,
+    )
 
 
-@patch('sagemaker_containers.beta.framework.entry_point.run')
+@patch("sagemaker_training.entry_point.run")
 def test_train_distributed_no_ps(run, distributed_training_env):
     distributed_training_env.additional_framework_parameters[
-        training.SAGEMAKER_PARAMETER_SERVER_ENABLED] = False
+        training.SAGEMAKER_PARAMETER_SERVER_ENABLED
+    ] = False
     distributed_training_env.current_host = HOST2
     training.train(distributed_training_env, MODEL_DIR_CMD_LIST)
 
-    run.assert_called_with(MODULE_DIR, MODULE_NAME, MODEL_DIR_CMD_LIST,
-                           distributed_training_env.to_env_vars(), runner=runner.ProcessRunnerType)
+    run.assert_called_with(
+        uri=MODULE_DIR,
+        user_entry_point=MODULE_NAME,
+        args=MODEL_DIR_CMD_LIST,
+        env_vars=distributed_training_env.to_env_vars(),
+        capture_error=True,
+        runner_type=runner.ProcessRunnerType,
+    )
 
 
 def test_build_tf_config():
     assert training._build_tf_config(HOST_LIST, HOST1) == {
-        'cluster': CLUSTER_WITH_PS,
-        'environment': 'cloud',
-        'task': MASTER_TASK
+        "cluster": CLUSTER_WITH_PS,
+        "environment": "cloud",
+        "task": MASTER_TASK,
     }
     assert training._build_tf_config(HOST_LIST, HOST1, ps_task=True) == {
-        'cluster': CLUSTER_WITH_PS,
-        'environment': 'cloud',
-        'task': PS_TASK_1
+        "cluster": CLUSTER_WITH_PS,
+        "environment": "cloud",
+        "task": PS_TASK_1,
     }
     assert training._build_tf_config(HOST_LIST, HOST2) == {
-        'cluster': CLUSTER_WITH_PS,
-        'environment': 'cloud',
-        'task': WORKER_TASK
+        "cluster": CLUSTER_WITH_PS,
+        "environment": "cloud",
+        "task": WORKER_TASK,
     }
     assert training._build_tf_config(HOST_LIST, HOST2, ps_task=True) == {
-        'cluster': CLUSTER_WITH_PS,
-        'environment': 'cloud',
-        'task': PS_TASK_2}
+        "cluster": CLUSTER_WITH_PS,
+        "environment": "cloud",
+        "task": PS_TASK_2,
+    }
 
 
 def test_build_tf_config_error():
     with pytest.raises(ValueError) as error:
         training._build_tf_config([HOST1], HOST1, ps_task=True)
-    assert 'Cannot have a ps task if there are no parameter servers in the cluster' in str(error.value)
+    assert "Cannot have a ps task if there are no parameter servers in the cluster" in str(
+        error.value
+    )
 
 
-@patch('sagemaker_tensorflow_container.training.logger')
+@patch("sagemaker_tensorflow_container.training.logger")
 def test_log_model_missing_warning_no_model(logger):
-    path = os.path.join(RESOURCE_PATH, 'test_dir_empty')
+    path = os.path.join(RESOURCE_PATH, "test_dir_empty")
     if not os.path.exists(path):
         os.mkdir(path)
     training._log_model_missing_warning(path)
-    logger.warn.assert_called_with('No model artifact is saved under path {}.'
-                                   ' Your training job will not save any model files to S3.\n'
-                                   'For details of how to construct your training script see:\n'
-                                   'https://sagemaker.readthedocs.io/en/stable/using_tf.html#adapting-your-local-tensorflow-script'  # noqa
-                                   .format(path))
+    logger.warn.assert_called_with(
+        "No model artifact is saved under path {}."
+        " Your training job will not save any model files to S3.\n"
+        "For details of how to construct your training script see:\n"
+        "https://sagemaker.readthedocs.io/en/stable/using_tf.html#adapting-your-local-tensorflow-script".format(  # noqa
+            path
+        )
+    )
 
 
-@patch('sagemaker_tensorflow_container.training.logger')
+@patch("sagemaker_tensorflow_container.training.logger")
 def test_log_model_missing_warning_wrong_format(logger):
-    training._log_model_missing_warning(os.path.join(RESOURCE_PATH, 'test_dir_wrong_model'))
-    logger.warn.assert_called_with('Your model will NOT be servable with SageMaker TensorFlow Serving container. '
-                                   'The model artifact was not saved in the TensorFlow '
-                                   'SavedModel directory structure:\n'
-                                   'https://www.tensorflow.org/guide/saved_model#structure_of_a_savedmodel_directory')
+    training._log_model_missing_warning(os.path.join(RESOURCE_PATH, "test_dir_wrong_model"))
+    logger.warn.assert_called_with(
+        "Your model will NOT be servable with SageMaker TensorFlow Serving container. "
+        "The model artifact was not saved in the TensorFlow "
+        "SavedModel directory structure:\n"
+        "https://www.tensorflow.org/guide/saved_model#structure_of_a_savedmodel_directory"
+    )
 
 
-@patch('sagemaker_tensorflow_container.training.logger')
+@patch("sagemaker_tensorflow_container.training.logger")
 def test_log_model_missing_warning_wrong_parent_dir(logger):
-    training._log_model_missing_warning(os.path.join(RESOURCE_PATH, 'test_dir_wrong_parent_dir'))
-    logger.warn.assert_called_with('Your model will NOT be servable with SageMaker TensorFlow Serving containers. '
-                                   'The SavedModel bundle is under directory \"{}\", not a numeric name.'
-                                   .format('not-digit'))
+    training._log_model_missing_warning(os.path.join(RESOURCE_PATH, "test_dir_wrong_parent_dir"))
+    logger.warn.assert_called_with(
+        "Your model will NOT be servable with SageMaker TensorFlow Serving containers. "
+        'The SavedModel bundle is under directory "{}", not a numeric name.'.format("not-digit")
+    )
 
 
-@patch('sagemaker_tensorflow_container.training.logger')
+@patch("sagemaker_tensorflow_container.training.logger")
 def test_log_model_missing_warning_correct(logger):
-    training._log_model_missing_warning(os.path.join(RESOURCE_PATH, 'test_dir_correct_model'))
+    training._log_model_missing_warning(os.path.join(RESOURCE_PATH, "test_dir_correct_model"))
     logger.warn.assert_not_called()
 
 
-@patch('sagemaker_tensorflow_container.training.logger')
-@patch('sagemaker_tensorflow_container.training.train')
-@patch('logging.Logger.setLevel')
-@patch('sagemaker_containers.beta.framework.training_env')
-@patch('sagemaker_containers.beta.framework.env.read_hyperparameters', return_value={})
-@patch('sagemaker_tensorflow_container.s3_utils.configure')
-def test_main(configure_s3_env, read_hyperparameters, training_env,
-              set_level, train, logger, single_machine_training_env):
+@patch("sagemaker_tensorflow_container.training.logger")
+@patch("sagemaker_tensorflow_container.training.train")
+@patch("logging.Logger.setLevel")
+@patch("sagemaker_training.environment.Environment")
+@patch("sagemaker_training.environment.read_hyperparameters", return_value={})
+@patch("sagemaker_tensorflow_container.s3_utils.configure")
+def test_main(
+    configure_s3_env,
+    read_hyperparameters,
+    training_env,
+    set_level,
+    train,
+    logger,
+    single_machine_training_env,
+):
     training_env.return_value = single_machine_training_env
-    os.environ['SAGEMAKER_REGION'] = REGION
+    os.environ["SAGEMAKER_REGION"] = REGION
     training.main()
     read_hyperparameters.assert_called_once_with()
     training_env.assert_called_once_with(hyperparameters={})
@@ -253,46 +302,71 @@ def test_main(configure_s3_env, read_hyperparameters, training_env,
     configure_s3_env.assert_called_once()
 
 
-@patch('sagemaker_tensorflow_container.training.logger')
-@patch('sagemaker_tensorflow_container.training.train')
-@patch('logging.Logger.setLevel')
-@patch('sagemaker_containers.beta.framework.training_env')
-@patch('sagemaker_containers.beta.framework.env.read_hyperparameters', return_value={'model_dir': MODEL_DIR})
-@patch('sagemaker_tensorflow_container.s3_utils.configure')
-def test_main_simple_training_model_dir(configure_s3_env, read_hyperparameters, training_env,
-                                        set_level, train, logger, single_machine_training_env):
+@patch("sagemaker_tensorflow_container.training.logger")
+@patch("sagemaker_tensorflow_container.training.train")
+@patch("logging.Logger.setLevel")
+@patch("sagemaker_training.environment.Environment")
+@patch("sagemaker_training.environment.read_hyperparameters", return_value={"model_dir": MODEL_DIR})
+@patch("sagemaker_tensorflow_container.s3_utils.configure")
+def test_main_simple_training_model_dir(
+    configure_s3_env,
+    read_hyperparameters,
+    training_env,
+    set_level,
+    train,
+    logger,
+    single_machine_training_env,
+):
     training_env.return_value = single_machine_training_env
-    os.environ['SAGEMAKER_REGION'] = REGION
+    os.environ["SAGEMAKER_REGION"] = REGION
     training.main()
     configure_s3_env.assert_called_once_with(MODEL_DIR, REGION)
 
 
-@patch('sagemaker_tensorflow_container.training.logger')
-@patch('sagemaker_tensorflow_container.training.train')
-@patch('logging.Logger.setLevel')
-@patch('sagemaker_containers.beta.framework.training_env')
-@patch('sagemaker_containers.beta.framework.env.read_hyperparameters', return_value={'model_dir': MODEL_DIR,
-                                                                                     '_tuning_objective_metric': 'auc'})
-@patch('sagemaker_tensorflow_container.s3_utils.configure')
-def test_main_tuning_model_dir(configure_s3_env, read_hyperparameters, training_env,
-                               set_level, train, logger, single_machine_training_env):
+@patch("sagemaker_tensorflow_container.training.logger")
+@patch("sagemaker_tensorflow_container.training.train")
+@patch("logging.Logger.setLevel")
+@patch("sagemaker_training.environment.Environment")
+@patch(
+    "sagemaker_training.environment.read_hyperparameters",
+    return_value={"model_dir": MODEL_DIR, "_tuning_objective_metric": "auc"},
+)
+@patch("sagemaker_tensorflow_container.s3_utils.configure")
+def test_main_tuning_model_dir(
+    configure_s3_env,
+    read_hyperparameters,
+    training_env,
+    set_level,
+    train,
+    logger,
+    single_machine_training_env,
+):
     training_env.return_value = single_machine_training_env
-    os.environ['SAGEMAKER_REGION'] = REGION
+    os.environ["SAGEMAKER_REGION"] = REGION
     training.main()
-    expected_model_dir = '{}/{}/model'.format(MODEL_DIR, single_machine_training_env.job_name)
+    expected_model_dir = "{}/{}/model".format(MODEL_DIR, single_machine_training_env.job_name)
     configure_s3_env.assert_called_once_with(expected_model_dir, REGION)
 
 
-@patch('sagemaker_tensorflow_container.training.logger')
-@patch('sagemaker_tensorflow_container.training.train')
-@patch('logging.Logger.setLevel')
-@patch('sagemaker_containers.beta.framework.training_env')
-@patch('sagemaker_containers.beta.framework.env.read_hyperparameters', return_value={'model_dir': '/opt/ml/model',
-                                                                                     '_tuning_objective_metric': 'auc'})
-@patch('sagemaker_tensorflow_container.s3_utils.configure')
-def test_main_tuning_mpi_model_dir(configure_s3_env, read_hyperparameters, training_env,
-                                   set_level, train, logger, single_machine_training_env):
+@patch("sagemaker_tensorflow_container.training.logger")
+@patch("sagemaker_tensorflow_container.training.train")
+@patch("logging.Logger.setLevel")
+@patch("sagemaker_training.environment.Environment")
+@patch(
+    "sagemaker_training.environment.read_hyperparameters",
+    return_value={"model_dir": "/opt/ml/model", "_tuning_objective_metric": "auc"},
+)
+@patch("sagemaker_tensorflow_container.s3_utils.configure")
+def test_main_tuning_mpi_model_dir(
+    configure_s3_env,
+    read_hyperparameters,
+    training_env,
+    set_level,
+    train,
+    logger,
+    single_machine_training_env,
+):
     training_env.return_value = single_machine_training_env
-    os.environ['SAGEMAKER_REGION'] = REGION
+    os.environ["SAGEMAKER_REGION"] = REGION
     training.main()
-    configure_s3_env.assert_called_once_with('/opt/ml/model', REGION)
+    configure_s3_env.assert_called_once_with("/opt/ml/model", REGION)
diff --git a/test/resources/test_py_version/entry.py b/test/utils/__init__.py
similarity index 67%
rename from test/resources/test_py_version/entry.py
rename to test/utils/__init__.py
index 8f71a01b..79cb9cdf 100644
--- a/test/resources/test_py_version/entry.py
+++ b/test/utils/__init__.py
@@ -1,4 +1,4 @@
-# Copyright 2019-2020 Amazon.com, Inc. or its affiliates. All Rights Reserved.
+# Copyright 2018-2020 Amazon.com, Inc. or its affiliates. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License"). You
 # may not use this file except in compliance with the License. A copy of
@@ -11,12 +11,3 @@
 # ANY KIND, either express or implied. See the License for the specific
 # language governing permissions and limitations under the License.
 from __future__ import absolute_import
-
-import os
-import sys
-
-
-py_version = '%s.%s' % (sys.version_info.major, sys.version_info.minor)
-
-with open(os.path.join(os.environ['SM_OUTPUT_DIR'], 'py_version'), 'a') as f:
-    f.write(py_version)
diff --git a/test/utils/image_utils.py b/test/utils/image_utils.py
new file mode 100644
index 00000000..9fe5b590
--- /dev/null
+++ b/test/utils/image_utils.py
@@ -0,0 +1,72 @@
+# Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"). You
+# may not use this file except in compliance with the License. A copy of
+# the License is located at
+#
+#     http://aws.amazon.com/apache2.0/
+#
+# or in the "license" file accompanying this file. This file is
+# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
+# ANY KIND, either express or implied. See the License for the specific
+# language governing permissions and limitations under the License.
+from __future__ import absolute_import
+
+import os
+import subprocess
+import sys
+
+CYAN_COLOR = "\033[36m"
+END_COLOR = "\033[0m"
+DLC_AWS_ID = "763104351884"
+
+
+def build_image(framework_version, dockerfile, image_uri, region, cwd="."):
+    _check_call("python setup.py sdist")
+
+    if "dlc" in dockerfile:
+        ecr_login(region, DLC_AWS_ID)
+
+    dockerfile_location = os.path.join("test", "container", framework_version, dockerfile)
+
+    subprocess.check_call(
+        [
+            "docker",
+            "build",
+            "-t",
+            image_uri,
+            "-f",
+            dockerfile_location,
+            "--build-arg",
+            "region={}".format(region),
+            cwd,
+        ],
+        cwd=cwd,
+    )
+    print("created image {}".format(image_uri))
+    return image_uri
+
+
+def push_image(ecr_image, region, aws_id):
+    ecr_login(region, aws_id)
+    _check_call("docker push {}".format(ecr_image))
+
+
+def ecr_login(region, aws_id):
+    login = _check_call(
+        "aws ecr get-login --registry-ids {} ".format(aws_id)
+        + "--no-include-email --region {}".format(region)
+    )
+    _check_call(login.decode("utf-8").rstrip("\n"))
+
+
+def _check_call(cmd, *popenargs, **kwargs):
+    if isinstance(cmd, str):
+        cmd = cmd.split(" ")
+    _print_cmd(cmd)
+    return subprocess.check_output(cmd, *popenargs, **kwargs)
+
+
+def _print_cmd(cmd):
+    print("executing docker command: {}{}{}".format(CYAN_COLOR, " ".join(cmd), END_COLOR))
+    sys.stdout.flush()
diff --git a/tox.ini b/tox.ini
index b4f6fbb0..17ed3095 100644
--- a/tox.ini
+++ b/tox.ini
@@ -4,7 +4,7 @@
 # and then run "tox" from this directory.
 
 [tox]
-envlist = py27,py36,flake8
+envlist = py27,py36,py37,flake8
 skip_missing_interpreters = False
 
 [travis]
@@ -27,6 +27,8 @@ exclude =
     benchmarks/
 max-complexity = 10
 ignore =
+    C901,
+    E203,  # whitespace before ':': Black disagrees with and explicitly violates this.
     FI10,
     FI12,
     FI13,
@@ -43,7 +45,7 @@ ignore =
     FI55,
     FI56,
     FI57,
-    E722
+    W503
 
 require-code = True
 
@@ -61,7 +63,6 @@ passenv =
 commands =
     coverage run --rcfile .coveragerc_{envname} --source sagemaker_tensorflow_container -m py.test {posargs}
     {env:IGNORE_COVERAGE:} coverage report --include *sagemaker_tensorflow_container* --show-missing
-deps = sagemaker-containers
 extras = test
 
 [testenv:flake8]