From f675452a7808d545b756f557942c9426e6bb3ce1 Mon Sep 17 00:00:00 2001 From: Preston Moore Date: Thu, 18 May 2017 16:23:03 -0400 Subject: [PATCH 1/3] bpo-30400: Fix race condtion in shutil.copyfile --- Lib/shutil.py | 145 ++++++++++++++---- Lib/test/test_shutil.py | 20 ++- .../Library/2018-07-07.bpo-30400.Pjhd8.rst | 2 + 3 files changed, 130 insertions(+), 37 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2018-07-07.bpo-30400.Pjhd8.rst diff --git a/Lib/shutil.py b/Lib/shutil.py index 8d8fe145567822..57c19e1c0aa86c 100644 --- a/Lib/shutil.py +++ b/Lib/shutil.py @@ -5,6 +5,7 @@ """ import os +import fcntl import sys import stat import fnmatch @@ -280,6 +281,11 @@ def _stat(fn): def _islink(fn): return fn.is_symlink() if isinstance(fn, os.DirEntry) else os.path.islink(fn) +def _samefilef(fsrc, fdst): + if os.path.abspath(fsrc.name) == os.path.abspath(fdst.name): + return True + return False + def copyfile(src, dst, *, follow_symlinks=True): """Copy data from src to dst in the most efficient way possible. @@ -307,48 +313,121 @@ def copyfile(src, dst, *, follow_symlinks=True): if _WINDOWS and i == 0: file_size = st.st_size + # XXX Is there a race issue here with os.path.islink() operating on a + # path rather than a file descriptor if not follow_symlinks and _islink(src): os.symlink(os.readlink(src), dst) + return dst + + # In Unix, we must open the files non-blocking initially as opening a + # FIFO without data present in it will block. + if os.name == 'posix': + src_flags = os.O_RDONLY | os.O_NONBLOCK + ### Do we need os.O_EXCL? + dst_flags = os.O_CREAT | os.O_WRONLY | os.O_NONBLOCK else: - with open(src, 'rb') as fsrc: + src_flags = os.O_RDONLY + dst_flags = os.O_WRONLY + + def _src_opener(path, flags): + return os.open(path, flags) + + # dst's opener is more complex. We need to atomically check and open the + # destination if it doesn't exist. If it does exist, we need to detect so + # we don't destroy it later should we have to fail out for some reason. + dst_was_created = False + def _dst_opener(path, flags): + nonlocal dst_was_created + try: + dst_was_created = True + return os.open(path, flags | dst_flags | os.O_EXCL) + except FileExistsError: + dst_was_created = False try: - with open(dst, 'wb') as fdst: - # macOS - if _HAS_FCOPYFILE: + # Open the already existing file in as non-blocking + ### We have O_CREATE and O_NONBLOCK in dst_flags. Is that intended? + fd = os.open(path, flags | dst_flags) + # If the destination is already a FIFO for some reason, we'll get an + # OSError here as we've opened a FIFO for a non-blocking write with + # no reader on the other end. + except OSError as e: + # Use errno to decide whether the OSError we caught is due to + # the above reason or some other reason. For the above reason, + # raise the appropriate special file error. Otherwise, we need + # to re-raise the OSError + if e.errno == errno.ENXIO: + raise SpecialFileError("`%s` is a named pipe" % path) + else: + raise e + # This check is required to catch the case where the destination + # file is a FIFO that's we successfully opened non-blocking because + # for some reason there was a reader listening preventing an ENXIO + # situation. + st = os.fstat(fd) + if stat.S_ISFIFO(st.st_mode): + raise SpecialFileError("`%s` is a named pipe" % path) + return fd + + with open(src, 'rb', opener=_src_opener) as fsrc: + try: + with open(dst, 'wb', opener=_dst_opener) as fdst: + file_size = 0 + # file.name is not populated when using os.fdopen() + st = os.fstat(fsrc.fileno()) + if stat.S_ISFIFO(st.st_mode): + # Don't unlink dst unless we created it above + if dst_was_created: + os.unlink(dst) + raise SpecialFileError("`%s` is a named pipe" % src) + if _WINDOWS and i == 0: + file_size = st.st_size + + # In Unix, we must set the file descriptors back to blocking as that's + # what the rest of this function expects. + # Additonally, fsrc, and fdst must be turned into file objects + if os.name == 'posix': + srcfl = fcntl.fcntl(fsrc.fileno(), fcntl.F_GETFL) + dstfl = fcntl.fcntl(fdst.fileno(), fcntl.F_GETFL) + + # Unset the non-blocking flag + fcntl.fcntl(fsrc.fileno(), fcntl.F_SETFL, srcfl & ~os.O_NONBLOCK) + fcntl.fcntl(fdst.fileno(), fcntl.F_SETFL, dstfl & ~os.O_NONBLOCK) + + # macOS + if _HAS_FCOPYFILE: + try: + _fastcopy_fcopyfile(fsrc, fdst, posix._COPYFILE_DATA) + return dst + except _GiveupOnFastCopy: + pass + # Linux / Android / Solaris + elif _USE_CP_SENDFILE or _USE_CP_COPY_FILE_RANGE: + # reflink may be implicit in copy_file_range. + if _USE_CP_COPY_FILE_RANGE: try: - _fastcopy_fcopyfile(fsrc, fdst, posix._COPYFILE_DATA) + _fastcopy_copy_file_range(fsrc, fdst) + return dst + except _GiveupOnFastCopy: + pass + if _USE_CP_SENDFILE: + try: + _fastcopy_sendfile(fsrc, fdst) return dst except _GiveupOnFastCopy: pass - # Linux / Android / Solaris - elif _USE_CP_SENDFILE or _USE_CP_COPY_FILE_RANGE: - # reflink may be implicit in copy_file_range. - if _USE_CP_COPY_FILE_RANGE: - try: - _fastcopy_copy_file_range(fsrc, fdst) - return dst - except _GiveupOnFastCopy: - pass - if _USE_CP_SENDFILE: - try: - _fastcopy_sendfile(fsrc, fdst) - return dst - except _GiveupOnFastCopy: - pass - # Windows, see: - # https://github.com/python/cpython/pull/7160#discussion_r195405230 - elif _WINDOWS and file_size > 0: - _copyfileobj_readinto(fsrc, fdst, min(file_size, COPY_BUFSIZE)) - return dst - - copyfileobj(fsrc, fdst) - # Issue 43219, raise a less confusing exception - except IsADirectoryError as e: - if not os.path.exists(dst): - raise FileNotFoundError(f'Directory does not exist: {dst}') from e - else: - raise + # Windows, see: + # https://github.com/python/cpython/pull/7160#discussion_r195405230 + elif _WINDOWS and file_size > 0: + _copyfileobj_readinto(fsrc, fdst, min(file_size, COPY_BUFSIZE)) + return dst + copyfileobj(fsrc, fdst) + + # Issue 43219, raise a less confusing exception + except IsADirectoryError as e: + if not os.path.exists(dst): + raise FileNotFoundError(f'Directory does not exist: {dst}') from e + raise return dst diff --git a/Lib/test/test_shutil.py b/Lib/test/test_shutil.py index ebb6cf88336249..b8270ebeb8ed77 100644 --- a/Lib/test/test_shutil.py +++ b/Lib/test/test_shutil.py @@ -2306,6 +2306,18 @@ def check_chown(path, uid=None, gid=None): with self.assertRaises(ValueError): shutil.chown(filename, dir_fd=dirfd) + def test_copyfile_same_file(self): + # copyfile() should raise SameFileError if the source and destination + # are the same. + src_dir = self.mkdtemp() + src_file = os.path.join(src_dir, 'foo') + create_file(src_file, b'foo') + self.assertRaises(SameFileError, shutil.copyfile, src_file, src_file) + # But Error should work too, to stay backward compatible. + self.assertRaises(Error, shutil.copyfile, src_file, src_file) + # Make sure file is not corrupted. + self.assertEqual(read_file(src_file), 'foo') + @support.requires_subprocess() class TestWhich(BaseTest, unittest.TestCase): @@ -2997,7 +3009,7 @@ def __exit__(self, exc_type, exc_val, exc_tb): return self._suppress_at_exit def test_w_source_open_fails(self): - def _open(filename, mode='r'): + def _open(filename, mode='r', opener=None): if filename == 'srcfile': raise OSError('Cannot open "srcfile"') assert 0 # shouldn't reach here. @@ -3010,7 +3022,7 @@ def _open(filename, mode='r'): def test_w_dest_open_fails(self): srcfile = self.Faux() - def _open(filename, mode='r'): + def _open(filename, mode='r', opener=None): if filename == 'srcfile': return srcfile if filename == 'destfile': @@ -3029,7 +3041,7 @@ def test_w_dest_close_fails(self): srcfile = self.Faux() destfile = self.Faux(True) - def _open(filename, mode='r'): + def _open(filename, mode='r', opener=None): if filename == 'srcfile': return srcfile if filename == 'destfile': @@ -3051,7 +3063,7 @@ def test_w_source_close_fails(self): srcfile = self.Faux(True) destfile = self.Faux() - def _open(filename, mode='r'): + def _open(filename, mode='r', opener=None): if filename == 'srcfile': return srcfile if filename == 'destfile': diff --git a/Misc/NEWS.d/next/Library/2018-07-07.bpo-30400.Pjhd8.rst b/Misc/NEWS.d/next/Library/2018-07-07.bpo-30400.Pjhd8.rst new file mode 100644 index 00000000000000..ae50b56fcb4211 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2018-07-07.bpo-30400.Pjhd8.rst @@ -0,0 +1,2 @@ +Fix race condition in shutil.copyfile() by ensuring the inode of the source +file does not change while it is being copied. Patch by Preston Moore. From fa2d608e9b4bcbc5ed59c06c3c1706cb2299ca3a Mon Sep 17 00:00:00 2001 From: Toshio Kuratomi Date: Sun, 20 Jul 2025 05:33:00 -0700 Subject: [PATCH 2/3] Fix code for missing fcntl on non-POSIX systems. --- Lib/shutil.py | 33 ++++++++++++++++++++++++++++----- 1 file changed, 28 insertions(+), 5 deletions(-) diff --git a/Lib/shutil.py b/Lib/shutil.py index 57c19e1c0aa86c..0ada104d810cc4 100644 --- a/Lib/shutil.py +++ b/Lib/shutil.py @@ -5,12 +5,15 @@ """ import os -import fcntl import sys import stat import fnmatch import collections import errno +try: + import fcntl +except ImportError: + fcntl = None try: import zlib @@ -84,6 +87,11 @@ class SpecialFileError(OSError): not supported on a special file (e.g. a named pipe)""" +class MissingFeatureError(Error): + """Raised when a platform does not have all of the features it needs to + perform an operation.""" + + class ReadError(OSError): """Raised when an archive cannot be read""" @@ -299,6 +307,8 @@ def copyfile(src, dst, *, follow_symlinks=True): raise SameFileError("{!r} and {!r} are the same file".format(src, dst)) file_size = 0 + + ### Move this into the opener? for i, fn in enumerate([src, dst]): try: st = _stat(fn) @@ -322,6 +332,17 @@ def copyfile(src, dst, *, follow_symlinks=True): # In Unix, we must open the files non-blocking initially as opening a # FIFO without data present in it will block. if os.name == 'posix': + # We will need these features in order to set the filehandle back to + # blocking mode later. + if not fcntl or not hasattr(fcntl, 'F_SETFL') or not hasattr(fcntl, 'F_GETFL'): + # Which exception to raise? + # The needed module or feature does not exist + # We would be unable to set the file to non-blocking later + # Cannot reset the file handle to defaults + raise MissingFeatureError("Cannot use copyfiles on systems that support" + " socket files unless `fcntl`, `fcntl.F_SETFL`," + " and `fcntl.F_GETFL` are available.") + src_flags = os.O_RDONLY | os.O_NONBLOCK ### Do we need os.O_EXCL? dst_flags = os.O_CREAT | os.O_WRONLY | os.O_NONBLOCK @@ -330,7 +351,7 @@ def copyfile(src, dst, *, follow_symlinks=True): dst_flags = os.O_WRONLY def _src_opener(path, flags): - return os.open(path, flags) + return os.open(path, flags | src_flags) # dst's opener is more complex. We need to atomically check and open the # destination if it doesn't exist. If it does exist, we need to detect so @@ -344,8 +365,10 @@ def _dst_opener(path, flags): except FileExistsError: dst_was_created = False try: - # Open the already existing file in as non-blocking - ### We have O_CREATE and O_NONBLOCK in dst_flags. Is that intended? + # Open the already existing file as non-blocking + ### We have O_CREAT and O_NONBLOCK in dst_flags. Even though + ### we know that we are not creating the file at this point. Is + ### that intended? fd = os.open(path, flags | dst_flags) # If the destination is already a FIFO for some reason, we'll get an # OSError here as we've opened a FIFO for a non-blocking write with @@ -379,7 +402,7 @@ def _dst_opener(path, flags): if dst_was_created: os.unlink(dst) raise SpecialFileError("`%s` is a named pipe" % src) - if _WINDOWS and i == 0: + if _WINDOWS: file_size = st.st_size # In Unix, we must set the file descriptors back to blocking as that's From 8ef96f81dc404d1e807816042b85e5d968123589 Mon Sep 17 00:00:00 2001 From: Toshio Kuratomi Date: Sun, 20 Jul 2025 06:35:47 -0700 Subject: [PATCH 3/3] Get more output about how the windows test is failing. --- Lib/shutil.py | 6 +++++- Lib/test/test_shutil.py | 9 +++++++-- 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/Lib/shutil.py b/Lib/shutil.py index 0ada104d810cc4..c5774913f31b4b 100644 --- a/Lib/shutil.py +++ b/Lib/shutil.py @@ -444,7 +444,11 @@ def _dst_opener(path, flags): elif _WINDOWS and file_size > 0: _copyfileobj_readinto(fsrc, fdst, min(file_size, COPY_BUFSIZE)) return dst - copyfileobj(fsrc, fdst) + from unittest import mock + if isinstance(copyfileobj, mock.Mock): + copyfileobj(fsrc, f"_WINDOWS: {_WINDOWS}\nfile_size: {file_size}") + else: + copyfileobj(fsrc, fdst) # Issue 43219, raise a less confusing exception except IsADirectoryError as e: diff --git a/Lib/test/test_shutil.py b/Lib/test/test_shutil.py index b8270ebeb8ed77..8b2220c2fa70a0 100644 --- a/Lib/test/test_shutil.py +++ b/Lib/test/test_shutil.py @@ -3127,8 +3127,13 @@ def test_file_offset(self): def test_win_impl(self): # Make sure alternate Windows implementation is called. with unittest.mock.patch("shutil._copyfileobj_readinto") as m: - shutil.copyfile(TESTFN, TESTFN2) - assert m.called + with unittest.mock.patch("shutil._fastcopy_fcopyfile") as m1: + with unittest.mock.patch("shutil._fastcopy_copy_file_range") as m2: + with unittest.mock.patch("shutil._fastcopy_sendfile") as m3: + with unittest.mock.patch("shutil.copyfileobj") as m4: + shutil.copyfile(TESTFN, TESTFN2) + + assert m.called, f"_copyfileobj_readinto: {m.called}\n_fastcopy_fcopyfile: {m1.called}\n_fastcopy_copy_file_range: {m2.called}\n_fastcopy_sendfile: {m3.called}\ncopyfileobj: {m4.called}\ncalled_with: {m4.call_args}" # File size is 2 MiB but max buf size should be 1 MiB. self.assertEqual(m.call_args[0][2], 1 * 1024 * 1024)