Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Get R from RStudio provided apt packages (.deb files) #1161

Merged
merged 21 commits into from
Jul 1, 2022
Merged
Show file tree
Hide file tree
Changes from 18 commits
Commits
Show all changes
21 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
114 changes: 45 additions & 69 deletions repo2docker/buildpacks/r.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,11 +38,7 @@ class RBuildPack(PythonBuildPack):

- are needed by a specific tool

The `r-base-core` package from Ubuntu or "Ubuntu packages for R"
apt repositories is used to install R itself,
rather than any of the methods from https://cran.r-project.org/.

The `r-base-dev` package is installed as advised in RStudio instructions.
R is installed from https://docs.rstudio.com/resources/install-r/
"""

@property
Expand All @@ -68,33 +64,29 @@ def r_version(self):
version.
"""
version_map = {
"3.4": "3.4",
"3.5": "3.5.3-1bionic",
"3.5.0": "3.5.0-1bionic",
"3.5.1": "3.5.1-2bionic",
"3.5.2": "3.5.2-1bionic",
"3.5.3": "3.5.3-1bionic",
"3.6": "3.6.3-1bionic",
"3.6.0": "3.6.0-2bionic",
"3.6.1": "3.6.1-3bionic",
"4.0": "4.0.5-1.1804.0",
"4.0.2": "4.0.2-1.1804.0",
"4.1": "4.1.2-1.1804.0",
"4.2": "4.2.0",
"4.1": "4.1.3",
"4.0": "4.0.5",
"3.6": "3.6.3",
"3.5": "3.5.3",
"3.4": "3.4.0",
"3.3": "3.3.3",
}

# the default if nothing is specified
r_version = "4.1"
r_version = "4.1.0"
consideRatio marked this conversation as resolved.
Show resolved Hide resolved

if not hasattr(self, "_r_version"):
parts = self.runtime.split("-")
if len(parts) == 5:
r_version = parts[1]
if r_version not in version_map:
raise ValueError(
"Version '{}' of R is not supported.".format(r_version)
)
# For versions of form x.y, we want to explicitly provide x.y.z - latest patchlevel
# available. Users can however explicitly specify the full version to get something specific
if r_version in version_map:
r_version = version_map[r_version]

# translate to the full version string
self._r_version = version_map.get(r_version)
self._r_version = r_version

return self._r_version

Expand Down Expand Up @@ -141,6 +133,20 @@ def detect(self):
self._runtime = "r-{}".format(str(self._checkpoint_date))
return True

def get_env(self):
"""
Set custom env vars needed for RStudio to load
"""
return super().get_env() + [
# rstudio (rsession) can't seem to find R unless we explicitly tell it where
# it is - just $PATH isn't enough. I discovered these are the env vars it
# looks for by digging through RStudio source and finding
# https://github.com/rstudio/rstudio/blob/v2022.02.3+492/src/cpp/r/session/RDiscovery.cpp
("R_HOME", f"/opt/R/{self.r_version}/lib/R"),
("R_DOC_DIR", "${R_HOME}/doc"),
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is ${R_HOME} expanded to f"/opt/R/{self.r_version}/lib/R" at runtime? Or is it actually completely separate?
If it's the former I think it'd be clearer to use the expanded variable here, if it's the latter would you mind adding a comment explaining the difference?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@manics it gets expanded at runtime. I think it's actually clearer to let it expand at runtime, as you can see that these are dependent if you run env while you are using the image.

("LD_LIBRARY_PATH", "${R_HOME}/lib:${LD_LIBRARY_PATH}"),
]

def get_path(self):
"""
Return paths to be added to the PATH environment variable.
Expand Down Expand Up @@ -177,12 +183,6 @@ def get_packages(self):
"sudo",
"lsb-release",
]
# For R 3.4 we use the default Ubuntu package, for other versions we
# install from a different apt repository
if V(self.r_version) < V("3.5"):
packages.append("r-base")
packages.append("r-base-dev")
packages.append("libclang-dev")

return super().get_packages().union(packages)

Expand Down Expand Up @@ -268,46 +268,25 @@ def get_build_scripts(self):

cran_mirror_url = self.get_cran_mirror_url(self.checkpoint_date)

# Determine which R apt repository should be enabled
if V(self.r_version) >= V("3.5"):
if V(self.r_version) >= V("4"):
vs = "40"
else:
vs = "35"

scripts = [
(
"root",
rf"""
echo "deb https://cloud.r-project.org/bin/linux/ubuntu bionic-cran{vs}/" > /etc/apt/sources.list.d/r-ubuntu.list
""",
),
# Dont use apt-key directly, as gpg does not always respect *_proxy vars. This increase the chances
# of being able to reach it from behind a firewall
(
"root",
r"""
wget --quiet -O - 'https://keyserver.ubuntu.com/pks/lookup?op=get&search=0xe298a3a825c0d65dfd57cbb651716619e084dab9' | apt-key add -
""",
),
(
"root",
# we should have --no-install-recommends on all our apt-get install commands,
# but here it's important because it will pull in CRAN packages
# via r-recommends, which is only guaranteed to be compatible with the latest r-base-core
r"""
apt-get update > /dev/null && \
apt-get install --yes --no-install-recommends \
r-base-core={R_version} \
r-base-dev={R_version} \
libclang-dev \
libzmq3-dev > /dev/null && \
wget --quiet -O /tmp/r-{self.r_version}.deb \
https://cdn.rstudio.com/r/ubuntu-$(. /etc/os-release && echo $VERSION_ID | sed 's/\.//')/pkgs/r-{self.r_version}_1_amd64.deb && \
apt install --yes --no-install-recommends /tmp/r-{self.r_version}.deb > /dev/null && \
rm /tmp/r-{self.r_version}.deb && \
apt-get -qq purge && \
apt-get -qq clean && \
rm -rf /var/lib/apt/lists/*
""".format(
R_version=self.r_version
),
rm -rf /var/lib/apt/lists/* && \
ln -s /opt/R/{self.r_version}/bin/R /usr/local/bin/R && \
ln -s /opt/R/{self.r_version}/bin/Rscript /usr/local/bin/Rscript && \
R --version
""",
),
]

Expand All @@ -326,9 +305,9 @@ def get_build_scripts(self):
# Set paths so that RStudio shares libraries with base R
# install. This first comments out any R_LIBS_USER that
# might be set in /etc/R/Renviron and then sets it.
r"""
sed -i -e '/^R_LIBS_USER=/s/^/#/' /etc/R/Renviron && \
echo "R_LIBS_USER=${R_LIBS_USER}" >> /etc/R/Renviron
rf"""
sed -i -e '/^R_LIBS_USER=/s/^/#/' /opt/R/{self.r_version}/lib/R/etc/Renviron && \
echo "R_LIBS_USER=${{R_LIBS_USER}}" >> /opt/R/{self.r_version}/lib/R/etc/Renviron
""",
),
(
Expand All @@ -338,15 +317,12 @@ def get_build_scripts(self):
# Quite hilarious, IMO.
# See https://docs.rstudio.com/rspm/1.0.12/admin/binaries.html
# Set mirror for RStudio too, by modifying rsession.conf
r"""
rf"""
R RHOME && \
mkdir -p /usr/lib/R/etc /etc/rstudio && \
echo 'options(repos = c(CRAN = "{cran_mirror_url}"))' > /usr/lib/R/etc/Rprofile.site && \
echo 'options(HTTPUserAgent = sprintf("R/%s R (%s)", getRversion(), paste(getRversion(), R.version$platform, R.version$arch, R.version$os)))' >> /usr/lib/R/etc/Rprofile.site && \
mkdir -p /etc/rstudio && \
echo 'options(repos = c(CRAN = "{cran_mirror_url}"))' > /opt/R/{self.r_version}/lib/R/etc/Rprofile.site && \
echo 'r-cran-repos={cran_mirror_url}' > /etc/rstudio/rsession.conf
""".format(
cran_mirror_url=cran_mirror_url
),
""",
),
(
"${NB_USER}",
Expand Down
63 changes: 1 addition & 62 deletions tests/unit/test_r.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,20 +7,6 @@
from repo2docker import buildpacks


def test_unsupported_version(tmpdir):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Does the removal of this test mean all versions are supported?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

YESSS! basically anything that has a deb produced by RStudio is now supported

tmpdir.chdir()

with open("runtime.txt", "w") as f:
f.write("r-3.8-2019-01-01")

r = buildpacks.RBuildPack()
with pytest.raises(ValueError) as excinfo:
# access the property to trigger the exception
_ = r.r_version
# check the version is mentioned in the exception
assert "'3.8'" in str(excinfo.value)


@pytest.mark.parametrize(
"runtime_version, expected", [("", "4.1"), ("3.6", "3.6"), ("3.5.1", "3.5")]
)
Expand All @@ -43,7 +29,7 @@ def test_version_completion(tmpdir):
f.write(f"r-3.6-2019-01-01")

r = buildpacks.RBuildPack()
assert r.r_version == "3.6.3-1bionic"
assert r.r_version == "3.6.3"


@pytest.mark.parametrize(
Expand Down Expand Up @@ -104,50 +90,3 @@ def mock_request_head(url):
assert r.get_mran_snapshot_url(
requested
) == "https://mran.microsoft.com/snapshot/{}".format(expected.isoformat())


def test_install_from_base(tmpdir):
# check that for R==3.4 we install from ubuntu
tmpdir.chdir()

with open("runtime.txt", "w") as f:
f.write("r-3.4-2019-01-02")

r = buildpacks.RBuildPack()
assert "r-base" in r.get_packages()


def test_install_from_cran_apt_repo(tmpdir):
# check that for R>3.4 we don't install r-base from Ubuntu
tmpdir.chdir()

with open("runtime.txt", "w") as f:
f.write("r-3.5-2019-01-02")

r = buildpacks.RBuildPack()
assert "r-base" not in r.get_packages()


def test_custom_cran_apt_repo(tmpdir):
tmpdir.chdir()

with open("runtime.txt", "w") as f:
f.write("r-3.5-2019-01-02")

r = buildpacks.RBuildPack()

scripts = r.get_build_scripts()

# check that at least one of the build scripts adds this new apt repository
for user, script in scripts:
if "https://cloud.r-project.org/bin/linux/ubuntu bionic-cran35/" in script:
break
else:
assert False, "Should have added a new apt repository"

# check that we install the right package
for user, script in scripts:
if "r-base-core=3.5" in script:
break
else:
assert False, "Should have installed base R"