Skip to content

Commit f6b7065

Browse files
committed
Fix shlex to not consume newlines after comments in POSIX mode
1 parent 4ceb077 commit f6b7065

3 files changed

Lines changed: 36 additions & 7 deletions

File tree

Lib/shlex.py

Lines changed: 26 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -49,14 +49,14 @@ def __init__(self, instream=None, infile=None, posix=False,
4949
self.token = ''
5050
self.filestack = deque()
5151
self.source = None
52+
# _pushback_chars is a push back queue used by lookahead logic
53+
self._pushback_chars = deque()
5254
if not punctuation_chars:
5355
punctuation_chars = ''
5456
elif punctuation_chars is True:
5557
punctuation_chars = '();<>|&'
5658
self._punctuation_chars = punctuation_chars
5759
if punctuation_chars:
58-
# _pushback_chars is a push back queue used by lookahead logic
59-
self._pushback_chars = deque()
6060
# these chars added because allowed in file names, args, wildcards
6161
self.wordchars += '~-./*?='
6262
#remove any punctuation chars from wordchars
@@ -132,7 +132,7 @@ def read_token(self):
132132
quoted = False
133133
escapedstate = ' '
134134
while True:
135-
if self.punctuation_chars and self._pushback_chars:
135+
if self._pushback_chars:
136136
nextchar = self._pushback_chars.pop()
137137
else:
138138
nextchar = self.instream.read(1)
@@ -156,8 +156,18 @@ def read_token(self):
156156
else:
157157
continue
158158
elif nextchar in self.commenters:
159-
self.instream.readline()
160-
self.lineno += 1
159+
if self.posix:
160+
# Consume comment until new line or end of file
161+
while True:
162+
nextchar = self.instream.read(1)
163+
if not nextchar:
164+
break
165+
if nextchar == '\n':
166+
self._pushback_chars.append(nextchar)
167+
break
168+
else:
169+
self.instream.readline()
170+
self.lineno += 1
161171
elif self.posix and nextchar in self.escape:
162172
escapedstate = 'a'
163173
self.state = nextchar
@@ -226,14 +236,23 @@ def read_token(self):
226236
else:
227237
continue
228238
elif nextchar in self.commenters:
229-
self.instream.readline()
230-
self.lineno += 1
231239
if self.posix:
240+
# Consume comment until new line or end of file
241+
while True:
242+
nextchar = self.instream.read(1)
243+
if not nextchar:
244+
break
245+
if nextchar == '\n':
246+
self._pushback_chars.append(nextchar)
247+
break
232248
self.state = ' '
233249
if self.token or (self.posix and quoted):
234250
break # emit current token
235251
else:
236252
continue
253+
else:
254+
self.instream.readline()
255+
self.lineno += 1
237256
elif self.state == 'c':
238257
if nextchar in self.punctuation_chars:
239258
self.token += nextchar

Lib/test/test_shlex.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -368,6 +368,15 @@ def testPunctuationCharsReadOnly(self):
368368
with self.assertRaises(AttributeError):
369369
shlex_instance.punctuation_chars = False
370370

371+
def testNewlineAfterComment(self):
372+
"""Test that new line after comment is not consumed (POSIX compliance)"""
373+
# When whitespace is customized to exclude newlines, newlines should
374+
# be treated as tokens, even when following a comment
375+
s = shlex.shlex('a # comment \n b', posix=True)
376+
s.whitespace = ' '
377+
result = list(s)
378+
self.assertEqual(result, ['a', '\n', 'b'])
379+
371380
@cpython_only
372381
def test_lazy_imports(self):
373382
import_helper.ensure_lazy_imports('shlex', {'collections', 're', 'os'})
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Fix :mod:`shlex` to preserve new line character after comment in POSIX mode.

0 commit comments

Comments
 (0)