Skip to content

Commit 343c935

Browse files
committed
Add remove method to ZipFile
Refer to: python/cpython#103033
1 parent a157c27 commit 343c935

2 files changed

Lines changed: 94 additions & 1 deletion

File tree

Lines changed: 93 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,93 @@
1+
"""ZipFile with Remove method."""
2+
3+
# From https://github.com/python/cpython/pull/103033
4+
# Not linted to compare against above PR
5+
import contextlib
6+
from zipfile import ZipFile, ZipInfo
7+
8+
9+
class ZipFileWithRemove(ZipFile):
10+
"""ZipFile with Remove method."""
11+
12+
def remove(self, zinfo_or_arcname):
13+
"""Remove a member from the archive."""
14+
if self.mode not in ("w", "x", "a"):
15+
raise ValueError("remove() requires mode 'w', 'x', or 'a'")
16+
if not self.fp:
17+
raise ValueError("Attempt to write to ZIP archive that was already closed")
18+
if self._writing:
19+
raise ValueError("Can't write to ZIP archive while an open writing handle exists")
20+
21+
# Make sure we have an existing info object
22+
if isinstance(zinfo_or_arcname, ZipInfo):
23+
zinfo = zinfo_or_arcname
24+
# make sure zinfo exists
25+
if zinfo not in self.filelist:
26+
raise KeyError("There is no item %r in the archive" % zinfo_or_arcname)
27+
else:
28+
# get the info object
29+
zinfo = self.getinfo(zinfo_or_arcname)
30+
31+
return self._remove_members({zinfo})
32+
33+
def _remove_members(self, members, *, remove_physical=True, chunk_size=2**20):
34+
"""Remove members in a zip file.
35+
All members (as zinfo) should exist in the zip; otherwise the zip file
36+
will erroneously end in an inconsistent state.
37+
"""
38+
fp = self.fp
39+
entry_offset = 0
40+
member_seen = False
41+
42+
# get a sorted filelist by header offset, in case the dir order
43+
# doesn't match the actual entry order
44+
filelist = sorted(self.filelist, key=lambda x: x.header_offset)
45+
for i in range(len(filelist)):
46+
info = filelist[i]
47+
is_member = info in members
48+
49+
if not (member_seen or is_member):
50+
continue
51+
52+
# get the total size of the entry
53+
try:
54+
offset = filelist[i + 1].header_offset
55+
except IndexError:
56+
offset = self.start_dir
57+
entry_size = offset - info.header_offset
58+
59+
if is_member:
60+
member_seen = True
61+
entry_offset += entry_size
62+
63+
# update caches
64+
self.filelist.remove(info)
65+
with contextlib.suppress(KeyError):
66+
del self.NameToInfo[info.filename]
67+
continue
68+
69+
# update the header and move entry data to the new position
70+
if remove_physical:
71+
old_header_offset = info.header_offset
72+
info.header_offset -= entry_offset
73+
read_size = 0
74+
while read_size < entry_size:
75+
fp.seek(old_header_offset + read_size)
76+
data = fp.read(min(entry_size - read_size, chunk_size))
77+
fp.seek(info.header_offset + read_size)
78+
fp.write(data)
79+
fp.flush()
80+
read_size += len(data)
81+
82+
# Avoid missing entry if entries have a duplicated name.
83+
# Reverse the order as NameToInfo normally stores the last added one.
84+
for info in reversed(self.filelist):
85+
self.NameToInfo.setdefault(info.filename, info)
86+
87+
# update state
88+
if remove_physical:
89+
self.start_dir -= entry_offset
90+
self._didModify = True
91+
92+
# seek to the start of the central dir
93+
fp.seek(self.start_dir)

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -135,7 +135,7 @@ testpaths = "tests"
135135
exclude = "*~,.git/*,.mypy_cache/*,.pytest_cache/*,.venv*,__pycache__/*,cache/*,dist/*,node_modules/*,test-results/*,typings/*"
136136

137137
[tool.ruff]
138-
extend-exclude = ["typings"]
138+
extend-exclude = ["node_modules", "darkseid/zipfile_remove"]
139139
target-version = "py310"
140140
line-length = 100
141141

0 commit comments

Comments
 (0)