This repository has been archived by the owner on Jan 3, 2023. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 16
/
setup.py
41 lines (34 loc) · 1.33 KB
/
setup.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
"""Setup module for the healthcare_deid DLP pipeline.
All of the code necessary to run the pipeline is packaged into a source
distribution that is uploaded to the --staging_location specified on the command
line. The source distribution is then installed on the workers before they
start running.
When remotely executing the pipeline, `--setup_file path/to/setup.py` must be
added to the pipeline's command line.
"""
import os
import setuptools
# Add required python packages that should be installed over and above the
# standard DataFlow worker environment. Version restrictions are supported if
# necessary.
REQUIRED_PACKAGES = [
'apache_beam[gcp]',
'google-api-python-client',
'google-cloud-storage',
'six==1.10.0',
]
packages = ['common', 'dlp', 'physionet']
package_dir = {p: p for p in packages}
# Use eval from bazel-bin so we get the generated results_pb2.py file.
# If it doesn't exist, then the job is another pipeline that doesn't need eval.
eval_bazel_path = 'bazel-bin/eval/run_pipeline.runfiles/__main__/eval'
if os.path.exists(eval_bazel_path):
packages.append('eval')
package_dir['eval'] = eval_bazel_path
setuptools.setup(
name='healthcare_deid',
version='0.0.1',
package_dir=package_dir,
description='Healthcare Deid pipeline package.',
install_requires=REQUIRED_PACKAGES,
packages=packages)