2121 "s" : ChunkType .SKIP ,
2222}
2323
24- # Warning threshold for large chunks (tokens)
25- LARGE_CHUNK_THRESHOLD = 2000
26-
2724
2825@dataclass
2926class InteractiveSession :
3027 """Manages interactive chunk classification."""
3128
3229 tree : DocumentTree
33- level : int
3430 nodes : list [HeadingNode ] = field (default_factory = list )
3531 classified : list [ClassifiedNode ] = field (default_factory = list )
3632 current_index : int = 0
3733 accumulated_tokens : int = 0
3834
3935 def __post_init__ (self ) -> None :
4036 """Initialize nodes and classified list from tree."""
41- self .nodes = self .tree .get_nodes_at_level (self .level )
37+ # Traverse all nodes in document order (depth-first)
38+ self .nodes = list (self .tree .iter_all_nodes ())
4239 # Initialize all as CARD_ONLY (default)
4340 self .classified = [
4441 ClassifiedNode (node = n , chunk_type = ChunkType .CARD_ONLY )
@@ -65,12 +62,14 @@ def classify_current(self, chunk_type: ChunkType) -> int:
6562 Classify the current node and advance.
6663
6764 Returns the token count of the classified chunk.
65+ Uses own_text (not full_content) for independent classification.
6866 """
6967 if self .is_complete :
7068 return 0
7169
7270 node = self .nodes [self .current_index ]
73- tokens = count_tokens (node .full_content )
71+ # Use own_text for independent classification semantics
72+ tokens = count_tokens (node .own_text )
7473
7574 self .classified [self .current_index ].chunk_type = chunk_type
7675
@@ -104,13 +103,24 @@ def display_section_summary(
104103 console : Console ,
105104 nodes : list [HeadingNode ],
106105 filename : str ,
107- level : int ,
108- ) -> None :
109- """Display a summary table of all sections."""
106+ max_tokens : int ,
107+ ) -> list [tuple [str , int ]]:
108+ """
109+ Display a summary table of all sections.
110+
111+ Args:
112+ console: Rich console for output
113+ nodes: List of HeadingNode to display
114+ filename: Source filename for display
115+ max_tokens: Maximum tokens per chunk (for oversized warning)
116+
117+ Returns:
118+ List of (breadcrumb, tokens) tuples for oversized nodes
119+ """
110120 console .print ()
111121 console .print (
112122 Panel (
113- f"Found [cyan]{ len (nodes )} [/cyan] sections at level [cyan] { level } [/cyan] " ,
123+ f"Found [cyan]{ len (nodes )} [/cyan] sections" ,
114124 title = f"[bold]Processing: { filename } [/bold]" ,
115125 border_style = "blue" ,
116126 )
@@ -121,17 +131,28 @@ def display_section_summary(
121131 table .add_column ("Section" , style = "cyan" )
122132 table .add_column ("Tokens" , justify = "right" )
123133
134+ oversized : list [tuple [str , int ]] = []
135+
124136 for i , node in enumerate (nodes , 1 ):
125- tokens = count_tokens (node .full_content )
126- # Add warning indicator for large chunks
127- token_str = f"{ tokens :,} "
128- if tokens > LARGE_CHUNK_THRESHOLD :
137+ # Use own_text for independent classification semantics
138+ tokens = count_tokens (node .own_text )
139+ breadcrumb = " > " .join (node .path )
140+
141+ if tokens > max_tokens :
142+ oversized .append ((breadcrumb , tokens ))
129143 token_str = f"[yellow]{ tokens :,} [/yellow] [yellow]![/yellow]"
130- table .add_row (str (i ), node .title , token_str )
144+ style = "yellow"
145+ else :
146+ token_str = f"{ tokens :,} "
147+ style = None
148+
149+ table .add_row (str (i ), breadcrumb , token_str , style = style )
131150
132151 console .print (table )
133152 console .print ()
134153
154+ return oversized
155+
135156
136157def display_classification_help (console : Console ) -> None :
137158 """Display classification options."""
@@ -147,15 +168,17 @@ def display_classification_help(console: Console) -> None:
147168
148169def preview_chunk (console : Console , node : HeadingNode ) -> None :
149170 """Display a preview of the chunk content."""
150- content = node .full_content
171+ # Use own_text for independent classification semantics
172+ content = node .own_text
151173 # Truncate if too long
152174 max_preview = 2000
153175 if len (content ) > max_preview :
154176 content = content [:max_preview ] + "\n ... [dim](truncated)[/dim]"
155177
156- # Detect syntax for highlighting
178+ # Use breadcrumb as title
179+ breadcrumb = " > " .join (node .path )
157180 syntax = Syntax (content , "markdown" , theme = "monokai" , line_numbers = True )
158- console .print (Panel (syntax , title = f"[bold]{ node . title } [/bold]" , border_style = "cyan" ))
181+ console .print (Panel (syntax , title = f"[bold]{ breadcrumb } [/bold]" , border_style = "cyan" ))
159182
160183
161184def prompt_classification (
@@ -172,18 +195,20 @@ def prompt_classification(
172195 if node is None :
173196 return "done"
174197
175- tokens = count_tokens (node .full_content )
198+ # Use own_text for independent classification semantics
199+ tokens = count_tokens (node .own_text )
176200 idx = session .current_index + 1
177201 total = session .total
178202
179- # Build prompt
203+ # Build breadcrumb display
204+ breadcrumb = " > " .join (node .path )
205+
206+ # Build prompt with token info
180207 token_info = f"[dim]({ tokens :,} tokens)[/dim]"
181- if tokens > LARGE_CHUNK_THRESHOLD :
182- token_info = f"[yellow]({ tokens :,} tokens)[/yellow]"
183208
184209 console .print (
185210 f"Section [bold]{ idx } /{ total } [/bold] "
186- f"[cyan]\" { node . title } \" [/cyan] { token_info } "
211+ f"[cyan]{ breadcrumb } [/cyan] { token_info } "
187212 )
188213
189214 try :
@@ -243,30 +268,38 @@ def show_token_info(
243268
244269def run_interactive_session (
245270 tree : DocumentTree ,
246- level : int ,
247271 console : Console ,
248272 filename : str = "" ,
273+ max_tokens : int = 3000 ,
249274) -> list [ClassifiedNode ]:
250275 """
251276 Run an interactive classification session.
252277
253278 Args:
254279 tree: DocumentTree to classify
255- level: Heading level to classify at
256280 console: Rich console for output
257281 filename: Source filename for display
282+ max_tokens: Maximum tokens per chunk (for oversized warnings)
258283
259284 Returns:
260285 List of ClassifiedNode with user classifications
261286 """
262- session = InteractiveSession (tree = tree , level = level )
287+ session = InteractiveSession (tree = tree )
263288
264289 if session .total == 0 :
265- console .print ("[yellow]No sections found at this level .[/yellow]" )
290+ console .print ("[yellow]No sections found.[/yellow]" )
266291 return []
267292
268- # Display summary
269- display_section_summary (console , session .nodes , filename , level )
293+ # Display summary and get oversized nodes
294+ oversized = display_section_summary (console , session .nodes , filename , max_tokens )
295+
296+ # Display oversized warnings
297+ if oversized :
298+ console .print (f"[yellow]Warning: { len (oversized )} section(s) exceed max_tokens ({ max_tokens } ):[/yellow]" )
299+ for breadcrumb , tokens in oversized :
300+ console .print (f" [yellow]- { breadcrumb } : { tokens } tokens[/yellow]" )
301+ console .print ()
302+
270303 display_classification_help (console )
271304
272305 # Classification loop
@@ -288,7 +321,7 @@ def run_interactive_session(
288321 elif result == "reset" :
289322 session .reset ()
290323 console .print ("[yellow]Reset. Starting over...[/yellow]\n " )
291- display_section_summary (console , session .nodes , filename , level )
324+ display_section_summary (console , session .nodes , filename , max_tokens )
292325 display_classification_help (console )
293326
294327 elif result == "done" :
0 commit comments