move tests into subfolder, add missing install hooks

This commit is contained in:
Nick Sweeting
2026-01-02 00:22:07 -08:00
parent c2afb40350
commit 65ee09ceab
80 changed files with 2659 additions and 859 deletions

View File

@@ -116,7 +116,7 @@ def upgrade_core_tables(apps, schema_editor):
retry_at DATETIME,
depth INTEGER NOT NULL DEFAULT 0,
fs_version VARCHAR(10) NOT NULL DEFAULT '0.9.0',
fs_version VARCHAR(10) NOT NULL DEFAULT '0.8.0',
config TEXT NOT NULL DEFAULT '{}',
notes TEXT NOT NULL DEFAULT '',
num_uses_succeeded INTEGER NOT NULL DEFAULT 0,
@@ -326,6 +326,16 @@ class Migration(migrations.Migration):
name='modified_at',
field=models.DateTimeField(auto_now=True),
),
# Declare fs_version (already created in database with DEFAULT '0.8.0')
migrations.AddField(
model_name='snapshot',
name='fs_version',
field=models.CharField(
max_length=10,
default='0.8.0',
help_text='Filesystem version of this snapshot (e.g., "0.7.0", "0.8.0", "0.9.0"). Used to trigger lazy migration on save().'
),
),
# SnapshotTag table already exists from v0.7.2, just declare it in state
migrations.CreateModel(

View File

@@ -150,11 +150,7 @@ class Migration(migrations.Migration):
name='downloaded_at',
field=models.DateTimeField(blank=True, db_index=True, default=None, editable=False, null=True),
),
migrations.AddField(
model_name='snapshot',
name='fs_version',
field=models.CharField(default='0.9.0', help_text='Filesystem version of this snapshot (e.g., "0.7.0", "0.8.0", "0.9.0"). Used to trigger lazy migration on save().', max_length=10),
),
# NOTE: fs_version already added by migration 0023 with default='0.8.0'
# NOTE: modified_at already added by migration 0023
migrations.AddField(
model_name='snapshot',

View File

@@ -8,7 +8,7 @@ class Migration(migrations.Migration):
dependencies = [
('core', '0025_alter_archiveresult_options_alter_snapshot_options_and_more'),
('machine', '0003_add_process_type_and_parent'),
('machine', '0007_add_process_type_and_parent'),
]
operations = [

View File

@@ -0,0 +1,388 @@
# Generated by hand on 2026-01-01
# Copies ArchiveResult cmd/pwd/cmd_version data to Process records before removing old fields
from django.db import migrations, connection
import json
from pathlib import Path
def parse_cmd_field(cmd_raw):
"""
Parse cmd field which could be:
1. JSON array string: '["wget", "-p", "url"]'
2. Space-separated string: 'wget -p url'
3. NULL/empty
Returns list of strings.
"""
if not cmd_raw:
return []
cmd_raw = cmd_raw.strip()
if not cmd_raw:
return []
# Try to parse as JSON first
if cmd_raw.startswith('['):
try:
parsed = json.loads(cmd_raw)
if isinstance(parsed, list):
return [str(x) for x in parsed]
except json.JSONDecodeError:
pass
# Fallback: split by spaces (simple approach, doesn't handle quoted strings)
# This is acceptable since old cmd fields were mostly simple commands
return cmd_raw.split()
def get_or_create_current_machine(cursor):
"""Get or create Machine.current() using raw SQL."""
import uuid
import socket
from datetime import datetime
# Simple machine detection - get hostname as guid
hostname = socket.gethostname()
guid = f'host_{hostname}' # Simple but stable identifier
# Check if machine exists
cursor.execute("SELECT id FROM machine_machine WHERE guid = ?", [guid])
row = cursor.fetchone()
if row:
return row[0]
# Create new machine
machine_id = str(uuid.uuid4())
now = datetime.now().isoformat()
# Check which columns exist (schema differs between 0.8.x and 0.9.x)
cursor.execute("PRAGMA table_info(machine_machine)")
machine_cols = {row[1] for row in cursor.fetchall()}
# Build INSERT statement based on available columns
if 'config' in machine_cols:
# 0.9.x schema with config column
cursor.execute("""
INSERT INTO machine_machine (
id, created_at, modified_at, guid, hostname,
hw_in_docker, hw_in_vm, hw_manufacturer, hw_product, hw_uuid,
os_arch, os_family, os_platform, os_release, os_kernel,
stats, config, num_uses_failed, num_uses_succeeded
) VALUES (?, ?, ?, ?, ?, 0, 0, '', '', '',
'', '', '', '', '', '{}', '{}', 0, 0)
""", [machine_id, now, now, guid, hostname])
else:
# 0.8.x schema without config column
cursor.execute("""
INSERT INTO machine_machine (
id, created_at, modified_at, guid, hostname,
hw_in_docker, hw_in_vm, hw_manufacturer, hw_product, hw_uuid,
os_arch, os_family, os_platform, os_release, os_kernel,
stats, num_uses_failed, num_uses_succeeded
) VALUES (?, ?, ?, ?, ?, 0, 0, '', '', '',
'', '', '', '', '', '{}', 0, 0)
""", [machine_id, now, now, guid, hostname])
return machine_id
def get_or_create_binary(cursor, machine_id, name, abspath, version):
"""
Get or create Binary record.
Args:
cursor: DB cursor
machine_id: Machine FK
name: Binary name (basename of command)
abspath: Absolute path to binary (or just name if path unknown)
version: Version string
Returns:
binary_id (str)
"""
import uuid
from datetime import datetime
# If abspath is just a name without slashes, it's not a full path
# Store it in both fields for simplicity
if '/' not in abspath:
# Not a full path - store as-is
pass
# Check if binary exists with same machine, name, abspath, version
cursor.execute("""
SELECT id FROM machine_binary
WHERE machine_id = ? AND name = ? AND abspath = ? AND version = ?
""", [machine_id, name, abspath, version])
row = cursor.fetchone()
if row:
return row[0]
# Create new binary
binary_id = str(uuid.uuid4())
now = datetime.now().isoformat()
# Check which columns exist (schema differs between 0.8.x and 0.9.x)
cursor.execute("PRAGMA table_info(machine_binary)")
binary_cols = {row[1] for row in cursor.fetchall()}
# Use only columns that exist in current schema
# 0.8.x schema: id, created_at, modified_at, machine_id, name, binprovider, abspath, version, sha256, num_uses_failed, num_uses_succeeded
# 0.9.x schema adds: binproviders, overrides, status, retry_at, output_dir
if 'binproviders' in binary_cols:
# 0.9.x schema
cursor.execute("""
INSERT INTO machine_binary (
id, created_at, modified_at, machine_id,
name, binproviders, overrides, binprovider, abspath, version, sha256,
status, retry_at, output_dir,
num_uses_failed, num_uses_succeeded
) VALUES (?, ?, ?, ?, ?, 'env', '{}', 'env', ?, ?, '',
'succeeded', NULL, '', 0, 0)
""", [binary_id, now, now, machine_id, name, abspath, version])
else:
# 0.8.x schema (simpler)
cursor.execute("""
INSERT INTO machine_binary (
id, created_at, modified_at, machine_id,
name, binprovider, abspath, version, sha256,
num_uses_failed, num_uses_succeeded
) VALUES (?, ?, ?, ?, ?, 'env', ?, ?, '', 0, 0)
""", [binary_id, now, now, machine_id, name, abspath, version])
return binary_id
def map_status(old_status):
"""
Map old ArchiveResult status to Process status and exit_code.
Args:
old_status: One of: queued, started, backoff, succeeded, failed, skipped
Returns:
(process_status, exit_code) tuple
"""
status_map = {
'queued': ('queued', None),
'started': ('running', None),
'backoff': ('queued', None),
'succeeded': ('exited', 0),
'failed': ('exited', 1),
'skipped': ('exited', None), # Skipped = exited without error
}
return status_map.get(old_status, ('queued', None))
def create_process(cursor, machine_id, pwd, cmd, status, exit_code, started_at, ended_at, binary_id):
"""
Create a Process record.
Returns:
process_id (str)
"""
import uuid
from datetime import datetime
process_id = str(uuid.uuid4())
now = datetime.now().isoformat()
# Convert cmd array to JSON
cmd_json = json.dumps(cmd)
# Set retry_at to now for queued processes, NULL otherwise
retry_at = now if status == 'queued' else None
cursor.execute("""
INSERT INTO machine_process (
id, created_at, modified_at, machine_id, parent_id, process_type,
pwd, cmd, env, timeout,
pid, exit_code, stdout, stderr,
started_at, ended_at,
binary_id, iface_id, url,
status, retry_at
) VALUES (?, ?, ?, ?, NULL, 'cli',
?, ?, '{}', 120,
NULL, ?, '', '',
?, ?,
?, NULL, NULL,
?, ?)
""", [
process_id, now, now, machine_id,
pwd, cmd_json,
exit_code,
started_at, ended_at,
binary_id,
status, retry_at
])
return process_id
def copy_archiveresult_data_to_process(apps, schema_editor):
"""
Copy old ArchiveResult execution data (cmd, pwd, cmd_version) to Process records.
For each ArchiveResult without a process_id:
1. Parse cmd field (handle both JSON array and space-separated string)
2. Extract binary name/path from cmd[0]
3. Get or create Binary record with machine, name, abspath, version
4. Create Process record with mapped fields
5. Link ArchiveResult.process_id to new Process
Status mapping:
- queued → queued (exit_code=None)
- started → running (exit_code=None)
- backoff → queued (exit_code=None)
- succeeded → exited (exit_code=0)
- failed → exited (exit_code=1)
- skipped → exited (exit_code=None)
"""
cursor = connection.cursor()
# Check if old fields still exist (skip if fresh install or already migrated)
cursor.execute("PRAGMA table_info(core_archiveresult)")
cols = {row[1] for row in cursor.fetchall()}
print(f'DEBUG 0027: Columns found: {sorted(cols)}')
print(f'DEBUG 0027: Has cmd={("cmd" in cols)}, pwd={("pwd" in cols)}, cmd_version={("cmd_version" in cols)}, process_id={("process_id" in cols)}')
if 'cmd' not in cols or 'pwd' not in cols or 'cmd_version' not in cols:
print('✓ Fresh install or fields already removed - skipping data copy')
return
# Check if process_id field exists (should exist from 0026)
if 'process_id' not in cols:
print('✗ ERROR: process_id field not found. Migration 0026 must run first.')
return
# Get or create Machine.current()
machine_id = get_or_create_current_machine(cursor)
# Get ArchiveResults without process_id that have cmd data
# Use plugin (extractor was renamed to plugin in migration 0025)
cursor.execute("""
SELECT id, snapshot_id, plugin, cmd, pwd, cmd_version,
status, start_ts, end_ts, created_at
FROM core_archiveresult
WHERE process_id IS NULL
AND (cmd IS NOT NULL OR pwd IS NOT NULL)
""")
results = cursor.fetchall()
if not results:
print('✓ No ArchiveResults need Process migration')
return
print(f'Migrating {len(results)} ArchiveResults to Process records...')
migrated_count = 0
skipped_count = 0
error_count = 0
for i, row in enumerate(results):
ar_id, snapshot_id, plugin, cmd_raw, pwd, cmd_version, status, start_ts, end_ts, created_at = row
if i == 0:
print(f'DEBUG 0027: First row: ar_id={ar_id}, plugin={plugin}, cmd={cmd_raw[:50] if cmd_raw else None}, status={status}')
try:
# Parse cmd field
cmd_array = parse_cmd_field(cmd_raw)
if i == 0:
print(f'DEBUG 0027: Parsed cmd: {cmd_array}')
# Extract binary info from cmd[0] if available
binary_id = None
if cmd_array and cmd_array[0]:
binary_name = Path(cmd_array[0]).name or plugin # Fallback to plugin name
binary_abspath = cmd_array[0]
binary_version = cmd_version or ''
# Get or create Binary record
binary_id = get_or_create_binary(
cursor, machine_id, binary_name, binary_abspath, binary_version
)
if i == 0:
print(f'DEBUG 0027: Created Binary: id={binary_id}, name={binary_name}')
# Map status
process_status, exit_code = map_status(status)
# Set timestamps
started_at = start_ts or created_at
ended_at = end_ts if process_status == 'exited' else None
# Create Process record
process_id = create_process(
cursor=cursor,
machine_id=machine_id,
pwd=pwd or '',
cmd=cmd_array,
status=process_status,
exit_code=exit_code,
started_at=started_at,
ended_at=ended_at,
binary_id=binary_id,
)
if i == 0:
print(f'DEBUG 0027: Created Process: id={process_id}')
# Link ArchiveResult to Process
cursor.execute(
"UPDATE core_archiveresult SET process_id = ? WHERE id = ?",
[process_id, ar_id]
)
migrated_count += 1
if i == 0:
print(f'DEBUG 0027: Linked ArchiveResult to Process')
except Exception as e:
print(f'✗ Error migrating ArchiveResult {ar_id}: {e}')
import traceback
traceback.print_exc()
error_count += 1
continue
print(f'✓ Migration complete: {migrated_count} migrated, {skipped_count} skipped, {error_count} errors')
class Migration(migrations.Migration):
dependencies = [
('core', '0026_add_process_to_archiveresult'),
('machine', '0007_add_process_type_and_parent'),
]
operations = [
# First, copy data from old fields to Process
migrations.RunPython(
copy_archiveresult_data_to_process,
reverse_code=migrations.RunPython.noop,
),
# Now safe to remove old fields (moved from 0025)
migrations.RemoveField(
model_name='archiveresult',
name='cmd',
),
migrations.RemoveField(
model_name='archiveresult',
name='cmd_version',
),
migrations.RemoveField(
model_name='archiveresult',
name='pwd',
),
]

View File

@@ -362,24 +362,22 @@ class Snapshot(ModelWithOutputDir, ModelWithConfig, ModelWithNotes, ModelWithHea
# Migrate filesystem if needed (happens automatically on save)
if self.pk and self.fs_migration_needed:
from django.db import transaction
with transaction.atomic():
# Walk through migration chain automatically
current = self.fs_version
target = self._fs_current_version()
# Walk through migration chain automatically
current = self.fs_version
target = self._fs_current_version()
while current != target:
next_ver = self._fs_next_version(current)
method = f'_fs_migrate_from_{current.replace(".", "_")}_to_{next_ver.replace(".", "_")}'
while current != target:
next_ver = self._fs_next_version(current)
method = f'_fs_migrate_from_{current.replace(".", "_")}_to_{next_ver.replace(".", "_")}'
# Only run if method exists (most are no-ops)
if hasattr(self, method):
getattr(self, method)()
# Only run if method exists (most are no-ops)
if hasattr(self, method):
getattr(self, method)()
current = next_ver
current = next_ver
# Update version (still in transaction)
self.fs_version = target
# Update version
self.fs_version = target
super().save(*args, **kwargs)
if self.url not in self.crawl.urls:
@@ -486,33 +484,58 @@ class Snapshot(ModelWithOutputDir, ModelWithConfig, ModelWithNotes, ModelWithHea
# Convert index.json to index.jsonl in the new directory
self.convert_index_json_to_jsonl()
# Create backwards-compat symlink (INSIDE transaction)
symlink_path = CONSTANTS.ARCHIVE_DIR / self.timestamp
if symlink_path.is_symlink():
symlink_path.unlink()
# Schedule cleanup AFTER transaction commits successfully
# This ensures DB changes are committed before we delete old files
from django.db import transaction
transaction.on_commit(lambda: self._cleanup_old_migration_dir(old_dir, new_dir))
if not symlink_path.exists() or symlink_path == old_dir:
symlink_path.symlink_to(new_dir, target_is_directory=True)
# Return cleanup info for manual cleanup if needed (when called directly)
return (old_dir, new_dir)
# Schedule old directory deletion AFTER transaction commits
transaction.on_commit(lambda: self._cleanup_old_migration_dir(old_dir))
def _cleanup_old_migration_dir(self, old_dir: Path):
def _cleanup_old_migration_dir(self, old_dir: Path, new_dir: Path):
"""
Delete old directory after successful migration.
Delete old directory and create symlink after successful migration.
Called via transaction.on_commit() after DB commit succeeds.
"""
import shutil
import logging
print(f"[DEBUG] _cleanup_old_migration_dir called: old_dir={old_dir}, new_dir={new_dir}")
# Delete old directory
if old_dir.exists() and not old_dir.is_symlink():
print(f"[DEBUG] Attempting to delete old directory: {old_dir}")
try:
shutil.rmtree(old_dir)
print(f"[DEBUG] Successfully deleted old directory: {old_dir}")
except Exception as e:
# Log but don't raise - migration succeeded, this is just cleanup
print(f"[DEBUG] Failed to delete old directory {old_dir}: {e}")
logging.getLogger('archivebox.migration').warning(
f"Could not remove old migration directory {old_dir}: {e}"
)
return # Don't create symlink if cleanup failed
else:
print(f"[DEBUG] Old directory doesn't exist or is already a symlink: {old_dir}")
# Create backwards-compat symlink (after old dir is deleted)
symlink_path = old_dir # Same path as old_dir
if symlink_path.is_symlink():
print(f"[DEBUG] Unlinking existing symlink: {symlink_path}")
symlink_path.unlink()
if not symlink_path.exists():
print(f"[DEBUG] Creating symlink: {symlink_path} -> {new_dir}")
try:
symlink_path.symlink_to(new_dir, target_is_directory=True)
print(f"[DEBUG] Successfully created symlink")
except Exception as e:
print(f"[DEBUG] Failed to create symlink: {e}")
logging.getLogger('archivebox.migration').warning(
f"Could not create symlink from {symlink_path} to {new_dir}: {e}"
)
else:
print(f"[DEBUG] Symlink path already exists: {symlink_path}")
# =========================================================================
# Path Calculation and Migration Helpers
@@ -1616,8 +1639,11 @@ class Snapshot(ModelWithOutputDir, ModelWithConfig, ModelWithNotes, ModelWithHea
This enables step-based execution where all hooks in a step can run in parallel.
"""
from archivebox.hooks import discover_hooks
from archivebox.config.configset import get_config
hooks = discover_hooks('Snapshot')
# Get merged config with crawl-specific PLUGINS filter
config = get_config(crawl=self.crawl, snapshot=self)
hooks = discover_hooks('Snapshot', config=config)
archiveresults = []
for hook_path in hooks:
@@ -2212,22 +2238,19 @@ class SnapshotMachine(BaseStateMachine, strict_states=True):
started = State(value=Snapshot.StatusChoices.STARTED)
sealed = State(value=Snapshot.StatusChoices.SEALED, final=True)
# Tick Event
# Tick Event (polled by workers)
tick = (
queued.to.itself(unless='can_start') |
queued.to(started, cond='can_start') |
started.to.itself(unless='is_finished', on='on_started_to_started') |
started.to(sealed, cond='is_finished')
queued.to(started, cond='can_start')
)
# Manual event (triggered by last ArchiveResult finishing)
seal = started.to(sealed)
def can_start(self) -> bool:
can_start = bool(self.snapshot.url)
return can_start
def is_finished(self) -> bool:
"""Check if snapshot processing is complete - delegates to model method."""
return self.snapshot.is_finished_processing()
@queued.enter
def enter_queued(self):
self.snapshot.update_and_requeue(
@@ -2237,29 +2260,34 @@ class SnapshotMachine(BaseStateMachine, strict_states=True):
@started.enter
def enter_started(self):
# lock the snapshot while we create the pending archiveresults
self.snapshot.update_and_requeue(
retry_at=timezone.now() + timedelta(seconds=30), # if failed, wait 30s before retrying
)
import sys
print(f'[cyan] 🔄 SnapshotMachine.enter_started() - creating archiveresults for {self.snapshot.url}[/cyan]', file=sys.stderr)
# Run the snapshot - creates pending archiveresults for all enabled plugins
self.snapshot.run()
# unlock the snapshot after we're done + set status = started
self.snapshot.update_and_requeue(
retry_at=timezone.now() + timedelta(seconds=5), # check again in 5s
status=Snapshot.StatusChoices.STARTED,
)
# Check if any archiveresults were created
ar_count = self.snapshot.archiveresult_set.count()
print(f'[cyan] 🔄 ArchiveResult count: {ar_count}[/cyan]', file=sys.stderr)
def on_started_to_started(self):
"""Called when Snapshot stays in started state (archiveresults not finished yet)."""
# Bump retry_at so we check again in a few seconds
self.snapshot.update_and_requeue(
retry_at=timezone.now() + timedelta(seconds=5),
)
if ar_count == 0:
# No archiveresults created, seal immediately
print(f'[cyan] 🔄 No archiveresults created, sealing snapshot immediately[/cyan]', file=sys.stderr)
self.seal()
else:
# Set status = started with retry_at far future (so workers don't claim us - we're waiting for ARs)
# Last AR will manually call self.seal() when done
self.snapshot.update_and_requeue(
retry_at=timezone.now() + timedelta(days=365),
status=Snapshot.StatusChoices.STARTED,
)
print(f'[cyan] 🔄 {ar_count} archiveresults created, waiting for them to finish[/cyan]', file=sys.stderr)
@sealed.enter
def enter_sealed(self):
import sys
# Clean up background hooks
self.snapshot.cleanup()
@@ -2268,6 +2296,21 @@ class SnapshotMachine(BaseStateMachine, strict_states=True):
status=Snapshot.StatusChoices.SEALED,
)
print(f'[cyan] ✅ SnapshotMachine.enter_sealed() - sealed {self.snapshot.url}[/cyan]', file=sys.stderr)
# Check if this is the last snapshot for the parent crawl - if so, seal the crawl
if self.snapshot.crawl:
crawl = self.snapshot.crawl
remaining_active = Snapshot.objects.filter(
crawl=crawl,
status__in=[Snapshot.StatusChoices.QUEUED, Snapshot.StatusChoices.STARTED]
).count()
if remaining_active == 0:
print(f'[cyan]🔒 All snapshots sealed for crawl {crawl.id}, sealing crawl[/cyan]', file=sys.stderr)
# Seal the parent crawl
crawl.sm.seal()
class ArchiveResult(ModelWithOutputDir, ModelWithConfig, ModelWithNotes, ModelWithStateMachine):
class StatusChoices(models.TextChoices):
@@ -3102,8 +3145,30 @@ class ArchiveResultMachine(BaseStateMachine, strict_states=True):
end_ts=None,
)
def _check_and_seal_parent_snapshot(self):
"""Check if this is the last ArchiveResult to finish - if so, seal the parent Snapshot."""
import sys
snapshot = self.archiveresult.snapshot
# Check if all archiveresults are finished (in final states)
remaining_active = snapshot.archiveresult_set.exclude(
status__in=[
ArchiveResult.StatusChoices.SUCCEEDED,
ArchiveResult.StatusChoices.FAILED,
ArchiveResult.StatusChoices.SKIPPED,
]
).count()
if remaining_active == 0:
print(f'[cyan] 🔒 All archiveresults finished for snapshot {snapshot.url}, sealing snapshot[/cyan]', file=sys.stderr)
# Seal the parent snapshot
snapshot.sm.seal()
@succeeded.enter
def enter_succeeded(self):
import sys
self.archiveresult.update_and_requeue(
retry_at=None,
status=ArchiveResult.StatusChoices.SUCCEEDED,
@@ -3113,8 +3178,15 @@ class ArchiveResultMachine(BaseStateMachine, strict_states=True):
# Update health stats for ArchiveResult, Snapshot, and Crawl cascade
self.archiveresult.cascade_health_update(success=True)
print(f'[cyan] ✅ ArchiveResult succeeded: {self.archiveresult.plugin} for {self.archiveresult.snapshot.url}[/cyan]', file=sys.stderr)
# Check if this is the last AR to finish - seal parent snapshot if so
self._check_and_seal_parent_snapshot()
@failed.enter
def enter_failed(self):
import sys
self.archiveresult.update_and_requeue(
retry_at=None,
status=ArchiveResult.StatusChoices.FAILED,
@@ -3124,16 +3196,25 @@ class ArchiveResultMachine(BaseStateMachine, strict_states=True):
# Update health stats for ArchiveResult, Snapshot, and Crawl cascade
self.archiveresult.cascade_health_update(success=False)
print(f'[red] ❌ ArchiveResult failed: {self.archiveresult.plugin} for {self.archiveresult.snapshot.url}[/red]', file=sys.stderr)
# Check if this is the last AR to finish - seal parent snapshot if so
self._check_and_seal_parent_snapshot()
@skipped.enter
def enter_skipped(self):
import sys
self.archiveresult.update_and_requeue(
retry_at=None,
status=ArchiveResult.StatusChoices.SKIPPED,
end_ts=timezone.now(),
)
def after_transition(self, event: str, source: State, target: State):
self.archiveresult.snapshot.update_and_requeue() # bump snapshot retry time so it picks up all the new changes
print(f'[dim] ⏭️ ArchiveResult skipped: {self.archiveresult.plugin} for {self.archiveresult.snapshot.url}[/dim]', file=sys.stderr)
# Check if this is the last AR to finish - seal parent snapshot if so
self._check_and_seal_parent_snapshot()
# =============================================================================