8000 Use atomic write when persisting cache (#9981) · astral-sh/ruff@bb8d203 · GitHub
[go: up one dir, main page]

Skip to content

Commit bb8d203

Browse files
authored
Use atomic write when persisting cache (#9981)
1 parent f40e012 commit bb8d203

File tree

2 files changed

+23
-7
lines changed

2 files changed

+23
-7
lines changed

crates/ruff/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@ serde = { workspace = true }
4848
serde_json = { workspace = true }
4949
shellexpand = { workspace = true }
5050
strum = { workspace = true, features = [] }
51+
tempfile = { workspace = true }
5152
thiserror = { workspace = true }
5253
toml = { workspace = true }
5354
tracing = { workspace = true, features = ["log"] }

crates/ruff/src/cache.rs

Lines changed: 22 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
use std::fmt::Debug;
22
use std::fs::{self, File};
33
use std::hash::Hasher;
4-
use std::io::{self, BufReader, BufWriter, Write};
4+
use std::io::{self, BufReader, Write};
55
use std::path::{Path, PathBuf};
66
use std::sync::atomic::{AtomicU64, Ordering};
77
use std::sync::Mutex;
@@ -15,6 +15,7 @@ use rayon::iter::ParallelIterator;
1515
use rayon::iter::{IntoParallelIterator, ParallelBridge};
1616
use rustc_hash::FxHashMap;
1717
use serde::{Deserialize, Serialize};
18+
use tempfile::NamedTempFile;
1819

1920
use ruff_cache::{CacheKey, CacheKeyHasher};
2021
use ruff_diagnostics::{DiagnosticKind, Fix};
@@ -165,15 +166,29 @@ impl Cache {
165166
return Ok(());
166167
}
167168

168-
let file = File::create(&self.path)
169-
.with_context(|| format!("Failed to create cache file '{}'", self.path.display()))?;
170-
let writer = BufWriter::new(file);
171-
bincode::serialize_into(writer, &self.package).with_context(|| {
169+
// Write the cache to a temporary file first and then rename it for an "atomic" write.
170+
// Protects against data loss if the process is killed during the write and races between different ruff
171+
// processes, resulting in a corrupted cache file. https://github.com/astral-sh/ruff/issues/8147#issuecomment-1943345964
172+
let mut temp_file =
173+
NamedTempFile::new_in(self.path.parent().expect("Write path must have a parent"))
174+
.context("Failed to create temporary file")?;
175+
176+
// Serialize to in-memory buffer because hyperfine benchmark showed that it's faster than
177+
// using a `BufWriter` and our cache files are small enough that streaming isn't necessary.
178+
let serialized =
179+
bincode::serialize(&self.package).context("Failed to serialize cache data")?;
180+
temp_file
181+
.write_all(&serialized)
182+
.context("Failed to write serialized cache to temporary file.")?;
183+
184+
temp_file.persist(&self.path).with_context(|| {
172185
format!(
173-
"Failed to serialise cache to file '{}'",
186+
"Failed to rename temporary cache file to {}",
174187
self.path.display()
175188
)
176-
})
189+
})?;
190+
191+
Ok(())
177192
}
178193

179194
/// Applies the pending changes without storing the cache to disk.

0 commit comments

Comments
 (0)
0