diff --git a/codex-rs/apply-patch/src/lib.rs b/codex-rs/apply-patch/src/lib.rs index 81ad1f078112..1957ddae0415 100644 --- a/codex-rs/apply-patch/src/lib.rs +++ b/codex-rs/apply-patch/src/lib.rs @@ -3,6 +3,7 @@ mod parser; mod seek_sequence; mod standalone_executable; mod streaming_parser; +mod text_file; use std::collections::HashMap; use std::io; @@ -23,6 +24,8 @@ pub use parser::UpdateFileChunk; pub use parser::parse_patch; use similar::TextDiff; pub use streaming_parser::StreamingPatchParser; +use text_file::Replacement; +use text_file::SourceFile; use thiserror::Error; pub use invocation::maybe_parse_apply_patch_verified; @@ -688,22 +691,13 @@ async fn derive_new_contents_from_chunks( }) })?; - let mut original_lines: Vec = original_contents.split('\n').map(String::from).collect(); - - // Drop the trailing empty element that results from the final newline so - // that line counts match the behaviour of standard `diff`. - if original_lines.last().is_some_and(String::is_empty) { - original_lines.pop(); - } + let mut source_file = SourceFile::parse(&original_contents); + let original_lines = source_file.line_texts(); let path_text = path.inferred_native_path_string(); let replacements = compute_replacements(&original_lines, &path_text, chunks)?; - let new_lines = apply_replacements(original_lines, &replacements); - let mut new_lines = new_lines; - if !new_lines.last().is_some_and(String::is_empty) { - new_lines.push(String::new()); - } - let new_contents = new_lines.join("\n"); + source_file.apply_replacements(&replacements); + let new_contents = source_file.into_contents(); Ok(AppliedPatch { original_contents, new_contents, @@ -717,8 +711,8 @@ fn compute_replacements( original_lines: &[String], path: &str, chunks: &[UpdateFileChunk], -) -> std::result::Result)>, ApplyPatchError> { - let mut replacements: Vec<(usize, usize, Vec)> = Vec::new(); +) -> std::result::Result, ApplyPatchError> { + let mut replacements: Vec = Vec::new(); let mut line_index: usize = 0; for chunk in chunks { @@ -756,11 +750,11 @@ fn compute_replacements( // Attempt to locate the `old_lines` verbatim within the file. In many // real‑world diffs the last element of `old_lines` is an *empty* string // representing the terminating newline of the region being replaced. - // This sentinel is not present in `original_lines` because we strip the - // trailing empty slice emitted by `split('\n')`. If a direct search - // fails and the pattern ends with an empty string, retry without that - // final element so that modifications touching the end‑of‑file can be - // located reliably. + // This sentinel is not present in `original_lines` because `SourceFile` + // stores the terminator on the preceding line rather than as an extra + // trailing element. If a direct search fails and the pattern ends with + // an empty string, retry without that final element so modifications + // touching the end‑of‑file can be located reliably. let mut pattern: &[String] = &chunk.old_lines; let mut found = @@ -785,7 +779,34 @@ fn compute_replacements( } if let Some(start_idx) = found { - replacements.push((start_idx, pattern.len(), new_slice.to_vec())); + // Context lines occur in both sides of a patch chunk. Keep those + // original lines in place so their exact contents and terminators + // survive, especially when the file has mixed line endings. + let mut old_start = 0; + let mut new_start = 0; + for &(old_context, new_context) in &chunk.context_line_indices { + // A trailing empty context line can be removed from `pattern` + // and `new_slice` above when it represents the final newline. + if old_context >= pattern.len() || new_context >= new_slice.len() { + break; + } + if old_start != old_context || new_start != new_context { + replacements.push(( + start_idx + old_start, + old_context - old_start, + new_slice[new_start..new_context].to_vec(), + )); + } + old_start = old_context + 1; + new_start = new_context + 1; + } + if old_start != pattern.len() || new_start != new_slice.len() { + replacements.push(( + start_idx + old_start, + pattern.len() - old_start, + new_slice[new_start..].to_vec(), + )); + } line_index = start_idx + pattern.len(); } else { return Err(ApplyPatchError::ComputeReplacements(format!( @@ -801,34 +822,6 @@ fn compute_replacements( Ok(replacements) } -/// Apply the `(start_index, old_len, new_lines)` replacements to `original_lines`, -/// returning the modified file contents as a vector of lines. -fn apply_replacements( - mut lines: Vec, - replacements: &[(usize, usize, Vec)], -) -> Vec { - // We must apply replacements in descending order so that earlier replacements - // don't shift the positions of later ones. - for (start_idx, old_len, new_segment) in replacements.iter().rev() { - let start_idx = *start_idx; - let old_len = *old_len; - - // Remove old lines. - for _ in 0..old_len { - if start_idx < lines.len() { - lines.remove(start_idx); - } - } - - // Insert new lines. - for (offset, new_line) in new_segment.iter().enumerate() { - lines.insert(start_idx + offset, new_line.clone()); - } - } - - lines -} - /// Intended result of a file update for apply_patch. #[derive(Debug, Eq, PartialEq)] pub struct ApplyPatchFileUpdate { diff --git a/codex-rs/apply-patch/src/parser.rs b/codex-rs/apply-patch/src/parser.rs index ec2d97c477fb..7e5adacb3d0f 100644 --- a/codex-rs/apply-patch/src/parser.rs +++ b/codex-rs/apply-patch/src/parser.rs @@ -111,7 +111,7 @@ impl Hunk { #[cfg(test)] use Hunk::*; -#[derive(Debug, PartialEq, Clone)] +#[derive(Debug, Default, PartialEq, Clone)] pub struct UpdateFileChunk { /// A single line of context used to narrow down the position of the chunk /// (this is usually a class, method, or function definition.) @@ -122,11 +122,26 @@ pub struct UpdateFileChunk { pub old_lines: Vec, pub new_lines: Vec, + /// Pairs of indices into `old_lines` and `new_lines` that identify lines + /// parsed as context rather than inferred to be equal by their contents. + pub(crate) context_line_indices: Vec<(usize, usize)>, + /// If set to true, `old_lines` must occur at the end of the source file. /// (Tolerance around trailing newlines should be encouraged.) pub is_end_of_file: bool, } +impl UpdateFileChunk { + /// Adds a context line to both sides while recording its corresponding + /// indices so it remains distinguishable from identical changed lines. + pub(crate) fn push_context_line(&mut self, line: String) { + self.context_line_indices + .push((self.old_lines.len(), self.new_lines.len())); + self.old_lines.push(line.clone()); + self.new_lines.push(line); + } +} + pub fn parse_patch(patch: &str) -> Result { let mode = if PARSE_IN_STRICT_MODE { ParseMode::Strict @@ -345,6 +360,7 @@ fn test_parse_patch() { change_context: Some("def f():".to_string()), old_lines: vec![" pass".to_string()], new_lines: vec![" return 123".to_string()], + context_line_indices: vec![], is_end_of_file: false }] } @@ -372,6 +388,7 @@ fn test_parse_patch() { change_context: None, old_lines: vec![], new_lines: vec!["line".to_string()], + context_line_indices: vec![], is_end_of_file: false }], }, @@ -402,6 +419,7 @@ fn test_parse_patch() { change_context: None, old_lines: vec!["import foo".to_string()], new_lines: vec!["import foo".to_string(), "bar".to_string()], + context_line_indices: vec![(0, 0)], is_end_of_file: false, }], }] @@ -422,6 +440,7 @@ fn test_parse_patch_preserves_end_of_file_marker() { change_context: None, old_lines: Vec::new(), new_lines: vec!["quux".to_string()], + context_line_indices: vec![], is_end_of_file: true, }], }], @@ -470,6 +489,7 @@ fn test_parse_patch_accepts_relative_and_absolute_hunk_paths() { change_context: None, old_lines: vec!["old".to_string()], new_lines: vec!["new".to_string()], + context_line_indices: vec![], is_end_of_file: false }] }, @@ -548,6 +568,7 @@ fn test_parse_patch_lenient() { change_context: None, old_lines: vec!["import foo".to_string()], new_lines: vec!["import foo".to_string(), "bar".to_string()], + context_line_indices: vec![(0, 0)], is_end_of_file: false, }], }]; diff --git a/codex-rs/apply-patch/src/streaming_parser.rs b/codex-rs/apply-patch/src/streaming_parser.rs index 7b447fce3c3c..ff1b2f82feec 100644 --- a/codex-rs/apply-patch/src/streaming_parser.rs +++ b/codex-rs/apply-patch/src/streaming_parser.rs @@ -274,12 +274,7 @@ impl StreamingPatchParser { } if update_line == EMPTY_CHANGE_CONTEXT_MARKER { - chunks.push(UpdateFileChunk { - change_context: None, - old_lines: Vec::new(), - new_lines: Vec::new(), - is_end_of_file: false, - }); + chunks.push(UpdateFileChunk::default()); self.state.mode = StreamingParserMode::UpdateFile { hunk_line_number }; return Ok(()); } @@ -287,9 +282,7 @@ impl StreamingPatchParser { if let Some(change_context) = update_line.strip_prefix(CHANGE_CONTEXT_MARKER) { chunks.push(UpdateFileChunk { change_context: Some(change_context.to_string()), - old_lines: Vec::new(), - new_lines: Vec::new(), - is_end_of_file: false, + ..UpdateFileChunk::default() }); self.state.mode = StreamingParserMode::UpdateFile { hunk_line_number }; return Ok(()); @@ -313,16 +306,10 @@ impl StreamingPatchParser { if line.is_empty() { if chunks.is_empty() { - chunks.push(UpdateFileChunk { - change_context: None, - old_lines: Vec::new(), - new_lines: Vec::new(), - is_end_of_file: false, - }); + chunks.push(UpdateFileChunk::default()); } if let Some(chunk) = chunks.last_mut() { - chunk.old_lines.push(String::new()); - chunk.new_lines.push(String::new()); + chunk.push_context_line(String::new()); } self.state.mode = StreamingParserMode::UpdateFile { hunk_line_number }; return Ok(()); @@ -330,16 +317,10 @@ impl StreamingPatchParser { if let Some(line_to_add) = line.strip_prefix(' ') { if chunks.is_empty() { - chunks.push(UpdateFileChunk { - change_context: None, - old_lines: Vec::new(), - new_lines: Vec::new(), - is_end_of_file: false, - }); + chunks.push(UpdateFileChunk::default()); } if let Some(chunk) = chunks.last_mut() { - chunk.old_lines.push(line_to_add.to_string()); - chunk.new_lines.push(line_to_add.to_string()); + chunk.push_context_line(line_to_add.to_string()); } self.state.mode = StreamingParserMode::UpdateFile { hunk_line_number }; return Ok(()); @@ -347,12 +328,7 @@ impl StreamingPatchParser { if let Some(line_to_add) = line.strip_prefix('+') { if chunks.is_empty() { - chunks.push(UpdateFileChunk { - change_context: None, - old_lines: Vec::new(), - new_lines: Vec::new(), - is_end_of_file: false, - }); + chunks.push(UpdateFileChunk::default()); } if let Some(chunk) = chunks.last_mut() { chunk.new_lines.push(line_to_add.to_string()); @@ -363,12 +339,7 @@ impl StreamingPatchParser { if let Some(line_to_remove) = line.strip_prefix('-') { if chunks.is_empty() { - chunks.push(UpdateFileChunk { - change_context: None, - old_lines: Vec::new(), - new_lines: Vec::new(), - is_end_of_file: false, - }); + chunks.push(UpdateFileChunk::default()); } if let Some(chunk) = chunks.last_mut() { chunk.old_lines.push(line_to_remove.to_string()); @@ -445,6 +416,7 @@ mod tests { change_context: None, old_lines: vec!["old".to_string()], new_lines: vec!["new".to_string()], + context_line_indices: vec![], is_end_of_file: false, }], }]) @@ -635,12 +607,14 @@ mod tests { change_context: None, old_lines: vec!["old a".to_string(), "*** Update File: b.txt".to_string()], new_lines: vec!["new a".to_string(), "*** Update File: b.txt".to_string()], + context_line_indices: vec![(1, 1)], is_end_of_file: false, }, UpdateFileChunk { change_context: None, old_lines: vec!["old b".to_string()], new_lines: vec!["new b".to_string()], + context_line_indices: vec![], is_end_of_file: false, }, ], @@ -680,6 +654,7 @@ mod tests { String::new(), "context after".to_string(), ], + context_line_indices: vec![(0, 0), (1, 1), (2, 2)], is_end_of_file: false, }], }]) @@ -700,6 +675,7 @@ mod tests { change_context: None, old_lines: Vec::new(), new_lines: vec!["quux".to_string()], + context_line_indices: vec![], is_end_of_file: true, }], }]) @@ -718,6 +694,7 @@ mod tests { change_context: None, old_lines: vec!["old".to_string()], new_lines: vec!["new".to_string()], + context_line_indices: vec![], is_end_of_file: false, }], }]) @@ -733,6 +710,7 @@ mod tests { change_context: None, old_lines: vec!["old\r".to_string()], new_lines: vec!["new".to_string()], + context_line_indices: vec![], is_end_of_file: false, }], }]) @@ -769,6 +747,7 @@ mod tests { change_context: None, old_lines: vec!["old".to_string()], new_lines: vec!["new".to_string()], + context_line_indices: vec![], is_end_of_file: false, }], }]) @@ -782,6 +761,7 @@ mod tests { change_context: None, old_lines: vec!["old".to_string()], new_lines: vec!["new".to_string()], + context_line_indices: vec![], is_end_of_file: false, }], }]) diff --git a/codex-rs/apply-patch/src/text_file.rs b/codex-rs/apply-patch/src/text_file.rs new file mode 100644 index 000000000000..b7d598ca973e --- /dev/null +++ b/codex-rs/apply-patch/src/text_file.rs @@ -0,0 +1,121 @@ +pub(super) type Replacement = (usize, usize, Vec); + +#[derive(Clone, Copy)] +enum LineEnding { + Lf, + CrLf, + Cr, +} + +impl LineEnding { + fn as_str(self) -> &'static str { + match self { + Self::Lf => "\n", + Self::CrLf => "\r\n", + Self::Cr => "\r", + } + } +} + +struct SourceLine { + text: String, + ending: Option, +} + +pub(super) struct SourceFile { + lines: Vec, + preferred_ending: LineEnding, +} + +impl SourceFile { + /// Splits contents into logical lines while retaining each line ending. + /// + /// The first existing ending becomes the preferred style for inserted + /// lines; files without an ending default to LF. + pub(super) fn parse(contents: &str) -> Self { + let mut lines = Vec::new(); + let mut preferred_ending = None; + let mut line_start = 0; + let mut cursor = 0; + + while cursor < contents.len() { + let (ending, ending_len) = match contents.as_bytes()[cursor] { + b'\r' if contents.as_bytes().get(cursor + 1) == Some(&b'\n') => { + (LineEnding::CrLf, 2) + } + b'\r' => (LineEnding::Cr, 1), + b'\n' => (LineEnding::Lf, 1), + _ => { + cursor += 1; + continue; + } + }; + preferred_ending.get_or_insert(ending); + lines.push(SourceLine { + text: contents[line_start..cursor].to_string(), + ending: Some(ending), + }); + cursor += ending_len; + line_start = cursor; + } + + if line_start < contents.len() { + lines.push(SourceLine { + text: contents[line_start..].to_string(), + ending: None, + }); + } + + Self { + lines, + preferred_ending: preferred_ending.unwrap_or(LineEnding::Lf), + } + } + + pub(super) fn line_texts(&self) -> Vec { + self.lines.iter().map(|line| line.text.clone()).collect() + } + + /// Rebuilds the file from source-ordered, non-overlapping replacements. + /// + /// Unchanged lines retain their original endings, inserted lines use the + /// preferred ending, and every resulting line receives an ending to match + /// apply-patch's historical trailing-newline behavior. + pub(super) fn apply_replacements(&mut self, replacements: &[Replacement]) { + let mut source_lines = std::mem::take(&mut self.lines).into_iter(); + let mut new_lines = Vec::new(); + let mut source_index = 0; + + for (start_idx, old_len, new_segment) in replacements { + debug_assert!(*start_idx >= source_index); + for line in source_lines.by_ref().take(*start_idx - source_index) { + new_lines.push(line); + } + for _ in source_lines.by_ref().take(*old_len) {} + new_lines.extend(new_segment.iter().map(|text| SourceLine { + text: text.clone(), + ending: Some(self.preferred_ending), + })); + source_index = start_idx + old_len; + } + new_lines.extend(source_lines); + self.lines = new_lines; + + // Updates have historically added a trailing newline. This also gives + // an unterminated last line an ending if an insertion moved it inward. + for line in &mut self.lines { + line.ending.get_or_insert(self.preferred_ending); + } + } + + pub(super) fn into_contents(self) -> String { + let mut contents = String::new(); + for line in self.lines { + contents.push_str(&line.text); + if let Some(ending) = line.ending { + contents.push_str(ending.as_str()); + } + } + contents + } +} diff --git a/codex-rs/apply-patch/tests/suite/tool.rs b/codex-rs/apply-patch/tests/suite/tool.rs index 8499d0fb9062..9ec880931d6a 100644 --- a/codex-rs/apply-patch/tests/suite/tool.rs +++ b/codex-rs/apply-patch/tests/suite/tool.rs @@ -11,6 +11,26 @@ fn run_apply_patch_in_dir(dir: &Path, patch: &str) -> anyhow::Result anyhow::Result<()> { + let tmp = tempdir()?; + let target_path = tmp.path().join(file_name); + fs::write(&target_path, original)?; + + run_apply_patch_in_dir(tmp.path(), patch)? + .success() + .stdout(format!( + "Success. Updated the following files:\nM {file_name}\n" + )); + + assert_eq!(fs::read(target_path)?, expected); + Ok(()) +} + fn apply_patch_command(dir: &Path) -> anyhow::Result { let mut cmd = Command::new(codex_utils_cargo_bin::cargo_bin("apply_patch")?); cmd.current_dir(dir); @@ -64,6 +84,71 @@ fn test_apply_patch_cli_applies_multiple_chunks() -> anyhow::Result<()> { Ok(()) } +#[test] +fn test_apply_patch_cli_preserves_crlf_from_target_file() -> anyhow::Result<()> { + let patch = "*** Begin Patch\n*** Update File: crlf.txt\n@@\n-one\n+uno\n@@\n two\n+\n+between\n three\n*** End Patch"; + + assert_apply_patch_updates_file( + "crlf.txt", + b"one\r\ntwo\r\nthree\r\n", + patch, + b"uno\r\ntwo\r\n\r\nbetween\r\nthree\r\n", + ) +} + +#[test] +fn test_apply_patch_cli_preserves_cr_from_target_file() -> anyhow::Result<()> { + let patch = "*** Begin Patch\n*** Update File: cr.txt\n@@\n-one\n+uno\n@@\n two\n+\n+between\n three\n*** End Patch"; + + assert_apply_patch_updates_file( + "cr.txt", + b"one\rtwo\rthree\r", + patch, + b"uno\rtwo\r\rbetween\rthree\r", + ) +} + +#[test] +fn test_apply_patch_cli_preserves_change_order_with_repeated_lines() -> anyhow::Result<()> { + let patch = + "*** Begin Patch\n*** Update File: repeated.txt\n@@\n-a\n-b\n+b\n+b\n+a\n*** End Patch"; + + assert_apply_patch_updates_file("repeated.txt", b"a\nb\n", patch, b"b\nb\na\n") +} + +#[test] +fn test_apply_patch_cli_preserves_repeated_context_line_ending() -> anyhow::Result<()> { + let patch = + "*** Begin Patch\n*** Update File: repeated_context.txt\n@@\n-same\n same\n*** End Patch"; + + assert_apply_patch_updates_file("repeated_context.txt", b"same\r\nsame\n", patch, b"same\n") +} + +#[test] +fn test_apply_patch_cli_preserves_untouched_mixed_line_endings() -> anyhow::Result<()> { + let patch = "*** Begin Patch\n*** Update File: mixed.txt\n@@\n one\n two\n-three\n+THREE\n four\n*** End Patch"; + + assert_apply_patch_updates_file( + "mixed.txt", + b"one\r\ntwo\rthree\nfour\r\n", + patch, + b"one\r\ntwo\rTHREE\r\nfour\r\n", + ) +} + +#[test] +fn test_apply_patch_cli_uses_crlf_for_new_trailing_newline() -> anyhow::Result<()> { + let patch = + "*** Begin Patch\n*** Update File: no_trailing_newline.txt\n@@\n-one\n+ONE\n*** End Patch"; + + assert_apply_patch_updates_file( + "no_trailing_newline.txt", + b"one\r\ntwo", + patch, + b"ONE\r\ntwo\r\n", + ) +} + #[test] fn test_apply_patch_cli_moves_file_to_new_directory() -> anyhow::Result<()> { let tmp = tempdir()?;