Skip to content

Commit 5900c9d

Browse files
committed
cleanup
1 parent 0b3d72e commit 5900c9d

1 file changed

Lines changed: 45 additions & 68 deletions

File tree

Lib/difflib.py

Lines changed: 45 additions & 68 deletions
Original file line numberDiff line numberDiff line change
@@ -2307,26 +2307,17 @@ def __repr__(self):
23072307
__reduce__ = None
23082308

23092309

2310-
# Private sentinels
2311-
_RANGE = _Sentinel('RANGE') # Range to process
2312-
_BLOCK = _Sentinel('BLOCK') # Block to return
2313-
_RANGEWITHBLOCKS = _Sentinel('RANGEWITHBLOCKS') # Range to process & pre-evaluated blocks
2314-
2315-
# Modifier sentinels. These are returned as first tuple item from `_modifier`
2310+
_RANGE = _Sentinel('RANGE') # Range to process (private)
23162311
ANCHORBLOCKS = _Sentinel('ANCHORBLOCKS') # List of blocks (not subject to balancing)
23172312
RESULTBLOCKS = _Sentinel('RESULTBLOCKS') # List of blocks that terminate recursion
23182313

2319-
23202314
_ERR_MSG_DTYPE = 'Unknown data type: {!r}'
23212315

23222316

23232317
class DivideAndConquerMatcherMixin:
2324-
def _process_range(self, depth, alo, ahi, blo, bhi):
2318+
def _process_range(self, depth, alo, ahi, blo, bhi, try_quick=False):
23252319
raise NotImplementedError
23262320

2327-
def _preprocess_range(self, depth, alo, ahi, blo, bhi):
2328-
return None
2329-
23302321
def _validate_blocks(self, blocks, alo, ahi, blo, bhi):
23312322
# 2.1.1. Prepare for validation
23322323
blocks = list(blocks)
@@ -2367,42 +2358,32 @@ def _get_matching_blocks(self):
23672358
return
23682359

23692360
# 3-element tuples: (data_type, depth, data)
2370-
q = [(_RANGE, 1, (alo, ahi, blo, bhi))]
2361+
q = [(_RANGE, 1, (alo, ahi, blo, bhi), None)]
23712362
while q:
2372-
dtype, depth, data = q.pop()
2363+
dtype, depth, bounds, data = q.pop()
23732364

23742365
# 1. Decision logic for q items
2375-
if dtype is _BLOCK:
2366+
if dtype is RESULTBLOCKS:
23762367
# Just a block to yield
2377-
yield data
2368+
yield from data
23782369
continue
23792370

2380-
elif dtype is _RANGE:
2371+
if dtype is _RANGE:
23812372
# Just the range to process
2382-
bounds = data
2383-
rtype, blocks, validated = self._process_range(depth, *bounds)
2384-
2385-
elif dtype is _RANGEWITHBLOCKS:
2373+
dtype, blocks, validated = self._process_range(depth, *bounds, try_quick=False)
2374+
if not blocks:
2375+
continue
2376+
if not validated:
2377+
blocks = list(self._validate_blocks(blocks, *bounds))
2378+
elif dtype is ANCHORBLOCKS:
23862379
# Range & pre-evaluated block
2387-
bounds, data = data
2388-
rtype, blocks, validated = data
2389-
2380+
blocks = data
23902381
else:
23912382
raise RuntimeError(_ERR_MSG_DTYPE.format(dtype))
23922383

2393-
if rtype not in (ANCHORBLOCKS, RESULTBLOCKS):
2394-
msg = 'Unknown result type from processed range: {!r}'
2395-
raise RuntimeError(msg.format(rtype))
2396-
2397-
if not validated:
2398-
blocks = list(self._validate_blocks(blocks, *bounds))
23992384
if not blocks:
24002385
continue
24012386

2402-
if rtype is RESULTBLOCKS:
2403-
yield from blocks
2404-
continue
2405-
24062387
# 2.1. Interpolate `blocks` with ranges
24072388
alo, ahi, blo, bhi = bounds
24082389
q_tail = []
@@ -2413,22 +2394,20 @@ def _get_matching_blocks(self):
24132394
continue
24142395
if i0 < i and j0 < j:
24152396
q_tail.append((_RANGE, (i0, i, j0, j)))
2416-
q_tail.append((_BLOCK, block))
2397+
q_tail.append((RESULTBLOCKS, [block]))
24172398
i0, j0 = i + k, j + k
2418-
2419-
if q_tail:
2420-
if i0 < ahi and j0 < bhi:
2421-
q_tail.append((_RANGE, (i0, ahi, j0, bhi)))
2422-
else:
2399+
if not q_tail:
24232400
# No blocks identified. Do not recurse further.
24242401
continue
2402+
elif i0 < ahi and j0 < bhi:
2403+
q_tail.append((_RANGE, (i0, ahi, j0, bhi)))
24252404

24262405
# 2.2. Yield what is possible straight away
24272406
q_tail.reverse()
24282407
while q_tail:
24292408
dtype, data = q_tail.pop()
2430-
if dtype is _BLOCK:
2431-
yield data
2409+
if dtype is RESULTBLOCKS:
2410+
yield from data
24322411
elif dtype is _RANGE:
24332412
q_tail.append((dtype, data))
24342413
q_tail.reverse()
@@ -2437,20 +2416,24 @@ def _get_matching_blocks(self):
24372416
raise RuntimeError(_ERR_MSG_DTYPE.format(dtype))
24382417

24392418
# 2.3. append to Q what is not
2440-
d = depth + 1
2419+
depth_p1 = depth + 1
24412420
while q_tail:
24422421
dtype, data = q_tail.pop()
2443-
if dtype is _BLOCK:
2444-
q.append((dtype, d, data))
2422+
if dtype is RESULTBLOCKS:
2423+
q.append((RESULTBLOCKS, depth_p1, None, data))
24452424
elif dtype is _RANGE:
24462425
# Try quick evaluation without re-building
24472426
# Before cache was overriden
2448-
bounds = data
2449-
result = self._preprocess_range(d, *bounds)
2427+
result = self._process_range(depth_p1, *data, try_quick=True)
24502428
if result is not None:
2451-
q.append((_RANGEWITHBLOCKS, d, (bounds, result)))
2429+
dtype, blocks, validated = result
2430+
if blocks:
2431+
if not validated:
2432+
blocks = list(self._validate_blocks(blocks, *bounds))
2433+
if blocks:
2434+
q.append((dtype, depth_p1, data, blocks))
24522435
else:
2453-
q.append((dtype, d, data))
2436+
q.append((_RANGE, depth_p1, data, None))
24542437
else:
24552438
raise RuntimeError(_ERR_MSG_DTYPE.format(dtype))
24562439

@@ -2584,7 +2567,7 @@ def _search_many_of_same_length(patterns, text, start=0, stop=None):
25842567
h = (h - hash(text[i0]) * base_m) % MOD
25852568

25862569

2587-
_FIND_MODES = ('quick-only', 'quick-or-build', 'build')
2570+
_FIND_MODES = ('try-quick', 'quick-or-build', 'build')
25882571

25892572

25902573
class _LCSUBAutomaton:
@@ -2923,7 +2906,7 @@ def find(self, seq1, start1=0, stop1=None, start2=0, stop2=None, mode='quick-or-
29232906
Secondly, it will be leftmost in seq2 if more than one occurrence
29242907
Args:
29252908
mode: str
2926-
quick-only (on failure return None)
2909+
try-quick (on failure return None)
29272910
quick-or-build (on failure build and calculate)
29282911
build
29292912
Returns:
@@ -2938,11 +2921,11 @@ def find(self, seq1, start1=0, stop1=None, start2=0, stop2=None, mode='quick-or-
29382921
return (start1, start2, 0)
29392922

29402923
if mode == 'quick-or-build':
2941-
block = self.find(seq1, start1, stop1, start2, stop2, mode='quick-only')
2924+
block = self.find(seq1, start1, stop1, start2, stop2, mode='try-quick')
29422925
if block is not None:
29432926
return block
29442927

2945-
if mode == 'quick-only':
2928+
if mode == 'try-quick':
29462929
c_start, c_stop = self.cache
29472930
if c_start > start2 or stop2 > c_stop:
29482931
return None
@@ -2955,7 +2938,7 @@ def find(self, seq1, start1=0, stop1=None, start2=0, stop2=None, mode='quick-or-
29552938
return (start1, start2, 0)
29562939

29572940
e1, e2, k = block
2958-
if mode == 'quick-only':
2941+
if mode == 'try-quick':
29592942
stop_in_seq2 = e2 + 1
29602943
start_in_seq2 = stop_in_seq2 - k
29612944
if start_in_seq2 >= start2 and stop_in_seq2 <= stop2:
@@ -3001,7 +2984,7 @@ def find_contiguous_best(self, seq1, start1=0, stop1=None, start2=0, stop2=None,
30012984
will only return all if split is lower that this ratio,
30022985
otherwise will only return first block. Same as simple find.
30032986
mode: str
3004-
quick-only (on failure return None)
2987+
try-quick (on failure return None)
30052988
quick-or-build (on failure build and calculate)
30062989
build
30072990
"""
@@ -3019,11 +3002,11 @@ def find_contiguous_best(self, seq1, start1=0, stop1=None, start2=0, stop2=None,
30193002
# Try quick first anyways!
30203003
blocks = self.find_contiguous_best(
30213004
seq1, start1, stop1, start2, stop2,
3022-
if_split_lower_than=if_split_lower_than, mode='quick-only')
3005+
if_split_lower_than=if_split_lower_than, mode='try-quick')
30233006
if blocks is not None:
30243007
return blocks
30253008

3026-
if mode == 'quick-only':
3009+
if mode == 'try-quick':
30273010
c_start, c_stop = self.cache
30283011
if c_start > start2 or stop2 > c_stop:
30293012
return None
@@ -3038,7 +3021,7 @@ def find_contiguous_best(self, seq1, start1=0, stop1=None, start2=0, stop2=None,
30383021
e1, e2, k = first
30393022
one_mk = 1 - k
30403023
j = e2 + one_mk
3041-
if mode == 'quick-only' and j < start2 or e2 + 1 > stop2:
3024+
if mode == 'try-quick' and j < start2 or e2 + 1 > stop2:
30423025
# NOTE: There is still a chance that we can get result
30433026
# But it is complicated and uncertain, so just give up
30443027
return None
@@ -3210,23 +3193,17 @@ def _extend_junk_for_many(self, blocks, alo, ahi, blo, bhi):
32103193
result.append(block)
32113194
return result
32123195

3213-
def _preprocess_range(self, depth, alo, ahi, blo, bhi):
3196+
def _process_range(self, depth, alo, ahi, blo, bhi, try_quick=False):
32143197
"""
32153198
If we split better than 1 / 8 don't bother
32163199
Ensures O(nlogn) with relative constant ~ ln(2) / ln(8/7) = 5.19
32173200
"""
32183201
bounds = (alo, ahi, blo, bhi)
3202+
mode = 'try-quick' if try_quick else 'quick-or-build'
32193203
blocks = self.automaton.find_contiguous_best(
3220-
self.a, *bounds, if_split_lower_than=1/8, mode='quick-only')
3221-
if blocks is not None:
3222-
if self.bjunk:
3223-
blocks = self._extend_junk_for_many(blocks, *bounds)
3224-
return ANCHORBLOCKS, blocks, True
3225-
3226-
def _process_range(self, depth, alo, ahi, blo, bhi):
3227-
bounds = (alo, ahi, blo, bhi)
3228-
blocks = self.automaton.find_contiguous_best(
3229-
self.a, *bounds, if_split_lower_than=1/8, mode='quick-or-build')
3204+
self.a, *bounds, if_split_lower_than=1/8, mode=mode)
3205+
if try_quick and blocks is None:
3206+
return None
32303207
if self.bjunk:
32313208
blocks = self._extend_junk_for_many(blocks, *bounds)
32323209
return ANCHORBLOCKS, blocks, True

0 commit comments

Comments
 (0)