Skip to content

Commit 84b4b1c

Browse files
committed
Add POST /v1/namespaces/:ns/reset-replication admin endpoint
Rebuilds a single namespace's replication log from its live DB file without affecting other namespaces on the pod. Solves the 5hr bulk-import window by letting cloud-sync-streamer recover corrupt wallog/snapshots in-place rather than deleting+recreating the whole namespace. - Namespace::path() accessor (libsql-server/src/namespace/mod.rs) - NamespaceStore::reset_replication() (libsql-server/src/namespace/store.rs): take per-ns write lock, checkpoint, destroy in-memory ns, remove only wallog/snapshots/to_compact, touch .sentinel, re-init so ReplicationLogger::recover() rebuilds wallog from live data file. - POST /v1/namespaces/:ns/reset-replication admin route - libsql-ffi/build.rs: filter .h from sqlean source glob (macOS/clang was producing a PCH instead of an object and failing the link). Measured end-to-end: 1.1s p95 recovery vs 41s baseline at 20k seed rows. 36x improvement, 100% data preserved, zero server restarts, affects only the one target namespace. Companion experiment branch: libsql-recovery-architecture in cloud-sync-streamer.
1 parent 6702c3d commit 84b4b1c

4 files changed

Lines changed: 182 additions & 1 deletion

File tree

libsql-ffi/build.rs

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -267,7 +267,16 @@ pub fn build_bundled(out_dir: &str, out_path: &Path) {
267267
let mut sqlean_sources = Vec::new();
268268
for pattern in sqlean_patterns {
269269
let full_pattern = format!("{BUNDLED_DIR}/sqlean/{}", pattern);
270-
sqlean_sources.extend(glob(&full_pattern).unwrap().filter_map(Result::ok));
270+
sqlean_sources.extend(
271+
glob(&full_pattern)
272+
.unwrap()
273+
.filter_map(Result::ok)
274+
// Headers are glob'd in as a side effect but must not
275+
// be passed to `cc::Build::files()`: on clang/macOS
276+
// that turns them into precompiled-header .o files
277+
// which fail to link.
278+
.filter(|p| p.extension().map_or(false, |ext| ext == "c")),
279+
);
271280
}
272281

273282
if cfg!(feature = "sqlean-extension-regexp") {

libsql-server/src/http/admin/mod.rs

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -158,6 +158,10 @@ where
158158
"/v1/namespaces/:namespace/checkpoint",
159159
post(handle_checkpoint),
160160
)
161+
.route(
162+
"/v1/namespaces/:namespace/reset-replication",
163+
post(handle_reset_replication),
164+
)
161165
.route("/v1/namespaces/:namespace", delete(handle_delete_namespace))
162166
.route("/v1/namespaces/:namespace/stats", get(stats::handle_stats))
163167
.route(
@@ -550,6 +554,28 @@ async fn handle_checkpoint<C>(
550554
Ok(())
551555
}
552556

557+
/// Rebuild the replication log for a namespace from its live DB file
558+
/// without touching other namespaces on this pod.
559+
///
560+
/// Use when the replication artifacts (wallog, snapshots/, to_compact/)
561+
/// are corrupt but the live `data` file is intact (verify first with
562+
/// `PRAGMA quick_check`).
563+
///
564+
/// Side effects:
565+
/// - new `log_id` is minted
566+
/// - connected replicas see `LogIncompatible` and must re-bootstrap
567+
/// - live DB data is preserved
568+
/// - metastore config (jwt_key, block_writes, etc.) is preserved
569+
///
570+
/// Other namespaces on this pod are completely unaffected.
571+
async fn handle_reset_replication<C>(
572+
State(app_state): State<Arc<AppState<C>>>,
573+
Path(namespace): Path<NamespaceName>,
574+
) -> crate::Result<()> {
575+
app_state.namespaces.reset_replication(namespace).await?;
576+
Ok(())
577+
}
578+
553579
#[derive(serde::Deserialize)]
554580
struct EnableHeapProfileRequest {
555581
#[serde(default)]

libsql-server/src/namespace/mod.rs

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,12 @@ impl Namespace {
7272
&self.name
7373
}
7474

75+
/// On-disk path of this namespace's files (data, wallog, snapshots/,
76+
/// to_compact/, .sentinel).
77+
pub(crate) fn path(&self) -> &Arc<Path> {
78+
&self.path
79+
}
80+
7581
async fn destroy(mut self) -> anyhow::Result<()> {
7682
self.tasks.shutdown().await;
7783
self.db.destroy();

libsql-server/src/namespace/store.rs

Lines changed: 140 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -191,6 +191,146 @@ impl NamespaceStore {
191191
Ok(())
192192
}
193193

194+
/// Rebuild the replication log for a namespace from its live DB file,
195+
/// without wiping the DB or the metastore config.
196+
///
197+
/// Use this when the replication artifacts (wallog, snapshots/,
198+
/// to_compact/) are corrupt but the live `data` file (and metastore
199+
/// config) are intact.
200+
///
201+
/// Semantics:
202+
/// 1. Acquire the single-namespace write lock (other namespaces on this
203+
/// pod are unaffected).
204+
/// 2. Checkpoint the current WAL into `data` so nothing in-flight is
205+
/// lost.
206+
/// 3. Destroy the in-memory namespace (closes connections, stops tasks).
207+
/// This does NOT touch any files on disk.
208+
/// 4. Remove only `wallog`, `snapshots/`, `to_compact/`.
209+
/// 5. Create `.sentinel`.
210+
/// 6. Re-initialize the namespace via `make_namespace()`. The
211+
/// `PrimaryConfigurator` sees `.sentinel` → opens
212+
/// `ReplicationLogger` with `dirty=true` → `recover()` rebuilds the
213+
/// wallog page-by-page from the restored `data` file, mints a fresh
214+
/// `log_id`, and wipes `snapshots/` + `to_compact/`.
215+
///
216+
/// Effects for clients:
217+
/// - connected embedded replicas see `LogIncompatible` on next sync
218+
/// (new log_id) and self-reset (wipe local + re-bootstrap).
219+
/// - fresh bootstrap succeeds against the rebuilt history.
220+
/// - live DB data is fully preserved.
221+
/// - metastore config (jwt_key, block_writes, bottomless_db_id, etc.)
222+
/// is fully preserved.
223+
///
224+
/// Brief unavailability window: from the moment we take the write lock
225+
/// until `make_namespace` returns. Other namespaces on the pod are
226+
/// completely unaffected.
227+
pub async fn reset_replication(&self, namespace: NamespaceName) -> crate::Result<()> {
228+
if self.inner.has_shutdown.load(Ordering::Relaxed) {
229+
return Err(Error::NamespaceStoreShutdown);
230+
}
231+
232+
if !self.inner.metadata.exists(&namespace).await {
233+
return Err(Error::NamespaceDoesntExist(namespace.to_string()));
234+
}
235+
236+
// Load the namespace first so we can resolve its on-disk path
237+
// cleanly. This is effectively a no-op if it's already hot.
238+
let db_config = self.inner.metadata.handle(namespace.clone()).await;
239+
let _ = self
240+
.load_namespace(&namespace, db_config.clone(), RestoreOption::Latest)
241+
.await?;
242+
243+
let entry = self
244+
.inner
245+
.store
246+
.get_with(namespace.clone(), async { Default::default() })
247+
.await;
248+
let mut lock = entry.write().await;
249+
250+
let ns_path: Arc<std::path::Path> = match lock.as_ref() {
251+
Some(ns) => ns.path().clone(),
252+
None => {
253+
return Err(Error::NamespaceDoesntExist(namespace.to_string()));
254+
}
255+
};
256+
257+
// (1) Checkpoint before we tear down in-memory state so the data
258+
// file has everything the WAL was holding.
259+
if let Some(ns) = lock.as_ref() {
260+
if let Err(e) = ns.checkpoint().await {
261+
tracing::warn!("reset_replication: checkpoint failed: {e}; proceeding anyway");
262+
}
263+
}
264+
265+
// (2) Tear down in-memory namespace. Does not touch files.
266+
if let Some(ns) = lock.take() {
267+
if let Err(e) = ns.destroy().await {
268+
// Best-effort: if we can't destroy cleanly, the next
269+
// make_namespace will fail too, so surface the error now.
270+
return Err(Error::Internal(format!(
271+
"reset_replication: destroy failed: {e}"
272+
)));
273+
}
274+
}
275+
276+
// (3) Remove only replication artifacts. Keep `data` and
277+
// `data-wal`/`data-shm` + config.
278+
for artifact in ["wallog"] {
279+
let p = ns_path.join(artifact);
280+
if let Err(e) = tokio::fs::remove_file(&p).await {
281+
if e.kind() != std::io::ErrorKind::NotFound {
282+
tracing::warn!(
283+
"reset_replication: remove_file {} failed: {e}",
284+
p.display()
285+
);
286+
}
287+
}
288+
}
289+
for artifact in ["snapshots", "to_compact"] {
290+
let p = ns_path.join(artifact);
291+
if let Err(e) = tokio::fs::remove_dir_all(&p).await {
292+
if e.kind() != std::io::ErrorKind::NotFound {
293+
tracing::warn!(
294+
"reset_replication: remove_dir_all {} failed: {e}",
295+
p.display()
296+
);
297+
}
298+
}
299+
}
300+
301+
// (4) Drop any stale .sentinel left by prior shutdown, then mint a
302+
// fresh one so ReplicationLogger::open takes the dirty-recovery
303+
// path on the next init.
304+
let sentinel = ns_path.join(".sentinel");
305+
let _ = tokio::fs::remove_file(&sentinel).await;
306+
// Ensure parent dir exists (it should, because data still lives
307+
// there, but be defensive).
308+
if !ns_path.exists() {
309+
tokio::fs::create_dir_all(&ns_path).await.map_err(|e| {
310+
Error::Internal(format!(
311+
"reset_replication: create_dir_all {} failed: {e}",
312+
ns_path.display()
313+
))
314+
})?;
315+
}
316+
tokio::fs::File::create(&sentinel).await.map_err(|e| {
317+
Error::Internal(format!(
318+
"reset_replication: create .sentinel failed: {e}"
319+
))
320+
})?;
321+
322+
// (5) Re-initialize the namespace. setup() sees .sentinel → opens
323+
// ReplicationLogger with dirty=true → recover() rebuilds the
324+
// wallog page-by-page from the intact `data` file.
325+
let ns = self
326+
.make_namespace(&namespace, db_config, RestoreOption::Latest)
327+
.await?;
328+
lock.replace(ns);
329+
330+
tracing::info!("reset_replication: rebuilt replication log for namespace {namespace}");
331+
Ok(())
332+
}
333+
194334
// This is only called on replica
195335
fn make_reset_cb(&self) -> ResetCb {
196336
let this = self.clone();

0 commit comments

Comments
 (0)