@@ -2307,26 +2307,17 @@ def __repr__(self):
23072307 __reduce__ = None
23082308
23092309
2310- # Private sentinels
2311- _RANGE = _Sentinel ('RANGE' ) # Range to process
2312- _BLOCK = _Sentinel ('BLOCK' ) # Block to return
2313- _RANGEWITHBLOCKS = _Sentinel ('RANGEWITHBLOCKS' ) # Range to process & pre-evaluated blocks
2314-
2315- # Modifier sentinels. These are returned as first tuple item from `_modifier`
2310+ _RANGE = _Sentinel ('RANGE' ) # Range to process (private)
23162311ANCHORBLOCKS = _Sentinel ('ANCHORBLOCKS' ) # List of blocks (not subject to balancing)
23172312RESULTBLOCKS = _Sentinel ('RESULTBLOCKS' ) # List of blocks that terminate recursion
23182313
2319-
23202314_ERR_MSG_DTYPE = 'Unknown data type: {!r}'
23212315
23222316
23232317class DivideAndConquerMatcherMixin :
2324- def _process_range (self , depth , alo , ahi , blo , bhi ):
2318+ def _process_range (self , depth , alo , ahi , blo , bhi , try_quick = False ):
23252319 raise NotImplementedError
23262320
2327- def _preprocess_range (self , depth , alo , ahi , blo , bhi ):
2328- return None
2329-
23302321 def _validate_blocks (self , blocks , alo , ahi , blo , bhi ):
23312322 # 2.1.1. Prepare for validation
23322323 blocks = list (blocks )
@@ -2367,42 +2358,32 @@ def _get_matching_blocks(self):
23672358 return
23682359
23692360 # 3-element tuples: (data_type, depth, data)
2370- q = [(_RANGE , 1 , (alo , ahi , blo , bhi ))]
2361+ q = [(_RANGE , 1 , (alo , ahi , blo , bhi ), None )]
23712362 while q :
2372- dtype , depth , data = q .pop ()
2363+ dtype , depth , bounds , data = q .pop ()
23732364
23742365 # 1. Decision logic for q items
2375- if dtype is _BLOCK :
2366+ if dtype is RESULTBLOCKS :
23762367 # Just a block to yield
2377- yield data
2368+ yield from data
23782369 continue
23792370
2380- elif dtype is _RANGE :
2371+ if dtype is _RANGE :
23812372 # Just the range to process
2382- bounds = data
2383- rtype , blocks , validated = self ._process_range (depth , * bounds )
2384-
2385- elif dtype is _RANGEWITHBLOCKS :
2373+ dtype , blocks , validated = self ._process_range (depth , * bounds , try_quick = False )
2374+ if not blocks :
2375+ continue
2376+ if not validated :
2377+ blocks = list (self ._validate_blocks (blocks , * bounds ))
2378+ elif dtype is ANCHORBLOCKS :
23862379 # Range & pre-evaluated block
2387- bounds , data = data
2388- rtype , blocks , validated = data
2389-
2380+ blocks = data
23902381 else :
23912382 raise RuntimeError (_ERR_MSG_DTYPE .format (dtype ))
23922383
2393- if rtype not in (ANCHORBLOCKS , RESULTBLOCKS ):
2394- msg = 'Unknown result type from processed range: {!r}'
2395- raise RuntimeError (msg .format (rtype ))
2396-
2397- if not validated :
2398- blocks = list (self ._validate_blocks (blocks , * bounds ))
23992384 if not blocks :
24002385 continue
24012386
2402- if rtype is RESULTBLOCKS :
2403- yield from blocks
2404- continue
2405-
24062387 # 2.1. Interpolate `blocks` with ranges
24072388 alo , ahi , blo , bhi = bounds
24082389 q_tail = []
@@ -2413,22 +2394,20 @@ def _get_matching_blocks(self):
24132394 continue
24142395 if i0 < i and j0 < j :
24152396 q_tail .append ((_RANGE , (i0 , i , j0 , j )))
2416- q_tail .append ((_BLOCK , block ))
2397+ q_tail .append ((RESULTBLOCKS , [ block ] ))
24172398 i0 , j0 = i + k , j + k
2418-
2419- if q_tail :
2420- if i0 < ahi and j0 < bhi :
2421- q_tail .append ((_RANGE , (i0 , ahi , j0 , bhi )))
2422- else :
2399+ if not q_tail :
24232400 # No blocks identified. Do not recurse further.
24242401 continue
2402+ elif i0 < ahi and j0 < bhi :
2403+ q_tail .append ((_RANGE , (i0 , ahi , j0 , bhi )))
24252404
24262405 # 2.2. Yield what is possible straight away
24272406 q_tail .reverse ()
24282407 while q_tail :
24292408 dtype , data = q_tail .pop ()
2430- if dtype is _BLOCK :
2431- yield data
2409+ if dtype is RESULTBLOCKS :
2410+ yield from data
24322411 elif dtype is _RANGE :
24332412 q_tail .append ((dtype , data ))
24342413 q_tail .reverse ()
@@ -2437,20 +2416,24 @@ def _get_matching_blocks(self):
24372416 raise RuntimeError (_ERR_MSG_DTYPE .format (dtype ))
24382417
24392418 # 2.3. append to Q what is not
2440- d = depth + 1
2419+ depth_p1 = depth + 1
24412420 while q_tail :
24422421 dtype , data = q_tail .pop ()
2443- if dtype is _BLOCK :
2444- q .append ((dtype , d , data ))
2422+ if dtype is RESULTBLOCKS :
2423+ q .append ((RESULTBLOCKS , depth_p1 , None , data ))
24452424 elif dtype is _RANGE :
24462425 # Try quick evaluation without re-building
24472426 # Before cache was overriden
2448- bounds = data
2449- result = self ._preprocess_range (d , * bounds )
2427+ result = self ._process_range (depth_p1 , * data , try_quick = True )
24502428 if result is not None :
2451- q .append ((_RANGEWITHBLOCKS , d , (bounds , result )))
2429+ dtype , blocks , validated = result
2430+ if blocks :
2431+ if not validated :
2432+ blocks = list (self ._validate_blocks (blocks , * bounds ))
2433+ if blocks :
2434+ q .append ((dtype , depth_p1 , data , blocks ))
24522435 else :
2453- q .append ((dtype , d , data ))
2436+ q .append ((_RANGE , depth_p1 , data , None ))
24542437 else :
24552438 raise RuntimeError (_ERR_MSG_DTYPE .format (dtype ))
24562439
@@ -2584,7 +2567,7 @@ def _search_many_of_same_length(patterns, text, start=0, stop=None):
25842567 h = (h - hash (text [i0 ]) * base_m ) % MOD
25852568
25862569
2587- _FIND_MODES = ('quick-only ' , 'quick-or-build' , 'build' )
2570+ _FIND_MODES = ('try-quick ' , 'quick-or-build' , 'build' )
25882571
25892572
25902573class _LCSUBAutomaton :
@@ -2923,7 +2906,7 @@ def find(self, seq1, start1=0, stop1=None, start2=0, stop2=None, mode='quick-or-
29232906 Secondly, it will be leftmost in seq2 if more than one occurrence
29242907 Args:
29252908 mode: str
2926- quick-only (on failure return None)
2909+ try-quick (on failure return None)
29272910 quick-or-build (on failure build and calculate)
29282911 build
29292912 Returns:
@@ -2938,11 +2921,11 @@ def find(self, seq1, start1=0, stop1=None, start2=0, stop2=None, mode='quick-or-
29382921 return (start1 , start2 , 0 )
29392922
29402923 if mode == 'quick-or-build' :
2941- block = self .find (seq1 , start1 , stop1 , start2 , stop2 , mode = 'quick-only ' )
2924+ block = self .find (seq1 , start1 , stop1 , start2 , stop2 , mode = 'try-quick ' )
29422925 if block is not None :
29432926 return block
29442927
2945- if mode == 'quick-only ' :
2928+ if mode == 'try-quick ' :
29462929 c_start , c_stop = self .cache
29472930 if c_start > start2 or stop2 > c_stop :
29482931 return None
@@ -2955,7 +2938,7 @@ def find(self, seq1, start1=0, stop1=None, start2=0, stop2=None, mode='quick-or-
29552938 return (start1 , start2 , 0 )
29562939
29572940 e1 , e2 , k = block
2958- if mode == 'quick-only ' :
2941+ if mode == 'try-quick ' :
29592942 stop_in_seq2 = e2 + 1
29602943 start_in_seq2 = stop_in_seq2 - k
29612944 if start_in_seq2 >= start2 and stop_in_seq2 <= stop2 :
@@ -3001,7 +2984,7 @@ def find_contiguous_best(self, seq1, start1=0, stop1=None, start2=0, stop2=None,
30012984 will only return all if split is lower that this ratio,
30022985 otherwise will only return first block. Same as simple find.
30032986 mode: str
3004- quick-only (on failure return None)
2987+ try-quick (on failure return None)
30052988 quick-or-build (on failure build and calculate)
30062989 build
30072990 """
@@ -3019,11 +3002,11 @@ def find_contiguous_best(self, seq1, start1=0, stop1=None, start2=0, stop2=None,
30193002 # Try quick first anyways!
30203003 blocks = self .find_contiguous_best (
30213004 seq1 , start1 , stop1 , start2 , stop2 ,
3022- if_split_lower_than = if_split_lower_than , mode = 'quick-only ' )
3005+ if_split_lower_than = if_split_lower_than , mode = 'try-quick ' )
30233006 if blocks is not None :
30243007 return blocks
30253008
3026- if mode == 'quick-only ' :
3009+ if mode == 'try-quick ' :
30273010 c_start , c_stop = self .cache
30283011 if c_start > start2 or stop2 > c_stop :
30293012 return None
@@ -3038,7 +3021,7 @@ def find_contiguous_best(self, seq1, start1=0, stop1=None, start2=0, stop2=None,
30383021 e1 , e2 , k = first
30393022 one_mk = 1 - k
30403023 j = e2 + one_mk
3041- if mode == 'quick-only ' and j < start2 or e2 + 1 > stop2 :
3024+ if mode == 'try-quick ' and j < start2 or e2 + 1 > stop2 :
30423025 # NOTE: There is still a chance that we can get result
30433026 # But it is complicated and uncertain, so just give up
30443027 return None
@@ -3210,23 +3193,17 @@ def _extend_junk_for_many(self, blocks, alo, ahi, blo, bhi):
32103193 result .append (block )
32113194 return result
32123195
3213- def _preprocess_range (self , depth , alo , ahi , blo , bhi ):
3196+ def _process_range (self , depth , alo , ahi , blo , bhi , try_quick = False ):
32143197 """
32153198 If we split better than 1 / 8 don't bother
32163199 Ensures O(nlogn) with relative constant ~ ln(2) / ln(8/7) = 5.19
32173200 """
32183201 bounds = (alo , ahi , blo , bhi )
3202+ mode = 'try-quick' if try_quick else 'quick-or-build'
32193203 blocks = self .automaton .find_contiguous_best (
3220- self .a , * bounds , if_split_lower_than = 1 / 8 , mode = 'quick-only' )
3221- if blocks is not None :
3222- if self .bjunk :
3223- blocks = self ._extend_junk_for_many (blocks , * bounds )
3224- return ANCHORBLOCKS , blocks , True
3225-
3226- def _process_range (self , depth , alo , ahi , blo , bhi ):
3227- bounds = (alo , ahi , blo , bhi )
3228- blocks = self .automaton .find_contiguous_best (
3229- self .a , * bounds , if_split_lower_than = 1 / 8 , mode = 'quick-or-build' )
3204+ self .a , * bounds , if_split_lower_than = 1 / 8 , mode = mode )
3205+ if try_quick and blocks is None :
3206+ return None
32303207 if self .bjunk :
32313208 blocks = self ._extend_junk_for_many (blocks , * bounds )
32323209 return ANCHORBLOCKS , blocks , True
0 commit comments