Skip to content

Commit

Permalink
[PROD-17859] Properly handle opaque whiteout entries in Docker layer …
Browse files Browse the repository at this point in the history
…flattener

Flattened version of upstream PR google#110
  • Loading branch information
JoshRosen committed Sep 24, 2018
1 parent 5dc1ab0 commit a5117e1
Show file tree
Hide file tree
Showing 3 changed files with 193 additions and 6 deletions.
11 changes: 11 additions & 0 deletions BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -101,3 +101,14 @@ sh_test(
":pusher.par",
],
)

py_test(
name = "client_v2_2_unit_tests",
size = "large",
srcs = [
"client_v2_2_unit_tests.py",
":containerregistry",
],
main = "client_v2_2_unit_tests.py",
)

21 changes: 15 additions & 6 deletions client/v2_2/docker_image_.py
Original file line number Diff line number Diff line change
Expand Up @@ -697,20 +697,18 @@ def __exit__(self, unused_type, unused_value, unused_traceback):
pass


def _in_whiteout_dir(
fs,
name
):
def _in_whiteout_dir(fs, opaque_whiteouts, name):
while name:
dirname = os.path.dirname(name)
if name == dirname:
break
if fs.get(dirname):
if fs.get(dirname) or dirname in opaque_whiteouts:
return True
name = dirname
return False

_WHITEOUT_PREFIX = '.wh.'
_OPAQUE_WHITEOUT_FILENAME = '.wh..wh..opq'


def extract(image, tar):
Expand All @@ -724,17 +722,27 @@ def extract(image, tar):
# to whether they are a tombstone or not.
fs = {}

opaque_whiteouts_in_higher_layers = set()

# Walk the layers, topmost first and add files. If we've seen them in a
# higher layer then we skip them
for layer in image.diff_ids():
buf = cStringIO.StringIO(image.uncompressed_layer(layer))
with tarfile.open(mode='r:', fileobj=buf) as layer_tar:
opaque_whiteouts_in_this_layer = []
for tarinfo in layer_tar:
# If we see a whiteout file, then don't add anything to the tarball
# but ensure that any lower layers don't add a file with the whited
# out name.
basename = os.path.basename(tarinfo.name)
dirname = os.path.dirname(tarinfo.name)

# If we see an opaque whiteout file, then don't add anything to the
# tarball but ensure that any lower layers don't add files or
# directories which are siblings of the whiteout file.
if basename == _OPAQUE_WHITEOUT_FILENAME:
opaque_whiteouts_in_this_layer.append(dirname)

tombstone = basename.startswith(_WHITEOUT_PREFIX)
if tombstone:
basename = basename[len(_WHITEOUT_PREFIX):]
Expand All @@ -746,7 +754,7 @@ def extract(image, tar):
continue

# Check for a whited out parent directory
if _in_whiteout_dir(fs, name):
if _in_whiteout_dir(fs, opaque_whiteouts_in_higher_layers, name):
continue

# Mark this file as handled by adding its name.
Expand All @@ -758,3 +766,4 @@ def extract(image, tar):
tar.addfile(tarinfo, fileobj=layer_tar.extractfile(tarinfo))
else:
tar.addfile(tarinfo, fileobj=None)
opaque_whiteouts_in_higher_layers.update(opaque_whiteouts_in_this_layer)
167 changes: 167 additions & 0 deletions client_v2_2_unit_tests.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,167 @@
# Copyright 2018 Google Inc. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from collections import OrderedDict
import io
from StringIO import StringIO
import tarfile
import unittest

from containerregistry.client.v2_2 import docker_image as v2_2_image


class MockImage(object):
"""Mock of DockerImage, implementing only the methods called by extract()."""

def __init__(self):
self._fs_layers = OrderedDict()

def add_layer(self, filenames):
"""Add a layer to the image.
Args:
filenames: a list of filenames or (filename, content) pairs. Filenames
with trailing slashes become directory entries in the generated tar
"""
buf = io.BytesIO()
with tarfile.open(mode='w:', fileobj=buf) as tf:
for entry in filenames:
if (isinstance(entry, basestring)):
name = entry
content = ""
else:
(name, content) = entry
tarinfo = tarfile.TarInfo(name)
tarinfo.size = len(content)
if name.endswith("/"):
tarinfo.type = tarfile.DIRTYPE
tf.addfile(tarinfo, fileobj=(StringIO(content) if content else None))
buf.seek(0)
new_layer_id = str(len(self._fs_layers))
self._fs_layers[new_layer_id] = buf.getvalue()

def diff_ids(self):
return reversed(self._fs_layers.keys())

def uncompressed_layer(self, layer_id):
return self._fs_layers[layer_id]


class TestExtract(unittest.TestCase):

def _test_flatten(self, layer_filenames, expected_flattened_output):
# Construct a mock DockerImage with the specified layers:
img = MockImage()
for filenames in layer_filenames:
img.add_layer(filenames)
buf = io.BytesIO()

# Run the actual extract logic:
with tarfile.open(mode='w:', fileobj=buf) as tar:
v2_2_image.extract(img, tar)

# Compare the extract() output to the expected results:
buf.seek(0)
flattened_output = []
with tarfile.open(mode='r', fileobj=buf) as tar:
for tarinfo in tar:
if tarinfo.isdir():
flattened_output.append(tarinfo.name + "/")
else:
contents = tar.extractfile(tarinfo).read()
if contents:
flattened_output.append((tarinfo.name, contents))
else:
flattened_output.append(tarinfo.name)
self.assertEqual(flattened_output, expected_flattened_output)

def test_single_layer(self):
self._test_flatten(
[["/directory/", "/file"]],
["/directory/", "/file"]
)

def test_purely_additive_layers(self):
self._test_flatten(
[
["dir/", "dir/file1", "file"],
["dir/file2", "file2"]
],
["dir/file2", "file2", "dir/", "dir/file1", "file"]
)

def test_highest_layer_of_file_takes_precedence(self):
self._test_flatten(
[
[("file", "a")],
[("file", "b")]
],
[("file", "b")]
)

def test_single_file_whiteout(self):
self._test_flatten(
[
["/foo"],
["/.wh.foo"]
],
[]
)

def test_parent_directory_whiteout(self):
self._test_flatten(
[
["/x/a/", "/x/b/", "/x/b/1"],
["/x/.wh.b"]
],
["/x/a/"]
)

def test_opaque_whiteout(self):
# Example from https://github.com/opencontainers/image-spec/blob/master/layer.md#whiteouts
self._test_flatten(
[
["a/", "a/b/", "a/b/c/", "a/b/c/bar"],
["a/", "a/.wh..wh..opq", "a/b/", "a/b/c/", "a/b/c/foo"],
],
["a/", "a/b/", "a/b/c/", "a/b/c/foo"],
)

self._test_flatten(
[
["a/", "a/b/", "a/b/c/", "a/b/c/bar"],
["a/", "a/b/", "a/b/c/", "a/b/c/foo", "a/.wh..wh..opq"],
],
["a/", "a/b/", "a/b/c/", "a/b/c/foo"],
)

def test_opaque_whiteout_preserves_parent_directory(self):
# Example from https://github.com/opencontainers/image-spec/blob/master/layer.md#whiteouts
self._test_flatten(
[
[
"bin/",
"bin/my-app-binary",
"bin/my-app-tools",
"bin/tools/",
"bin/tools/my-app-tool-one"
],
["bin/.wh..wh..opq"],
],
["bin/"],
)


if __name__ == "__main__":
unittest.main(verbosity=2)

0 comments on commit a5117e1

Please sign in to comment.