fix transition mid transition

This commit is contained in:
Nick Sweeting
2026-01-02 00:24:44 -08:00
parent 65ee09ceab
commit 3672174dad
3 changed files with 13 additions and 7 deletions

View File

@@ -2241,10 +2241,12 @@ class SnapshotMachine(BaseStateMachine, strict_states=True):
# Tick Event (polled by workers)
tick = (
queued.to.itself(unless='can_start') |
queued.to(started, cond='can_start')
queued.to(started, cond='can_start') |
started.to.itself(unless='is_finished') |
started.to(sealed, cond='is_finished')
)
# Manual event (triggered by last ArchiveResult finishing)
# Manual event (can also be triggered by last ArchiveResult finishing)
seal = started.to(sealed)
def can_start(self) -> bool:

View File

@@ -548,17 +548,20 @@ class CrawlMachine(BaseStateMachine, strict_states=True):
if root_snapshot:
print(f'[cyan]🔄 Created root snapshot: {root_snapshot.url}[/cyan]', file=sys.stderr)
# Update status to STARTED
# Set retry_at to far future so workers don't claim us (we're waiting for snapshots to finish)
# Set retry_at to None so workers don't claim us (we wait for snapshots to finish)
# Last snapshot will manually call self.seal() when done
self.crawl.update_and_requeue(
retry_at=timezone.now() + timedelta(days=365),
retry_at=None,
status=Crawl.StatusChoices.STARTED,
)
else:
# No snapshots (system crawl like archivebox://install)
print(f'[cyan]🔄 No snapshots created, sealing crawl immediately[/cyan]', file=sys.stderr)
# Seal immediately since there's no work to do
self.seal()
print(f'[cyan]🔄 No snapshots created, allowing immediate seal[/cyan]', file=sys.stderr)
# Set retry_at=now so next tick() will transition to sealed
self.crawl.update_and_requeue(
retry_at=timezone.now(),
status=Crawl.StatusChoices.STARTED,
)
except Exception as e:
print(f'[red]⚠️ Crawl {self.crawl.id} failed to start: {e}[/red]')

View File

@@ -1435,6 +1435,7 @@ function getTestEnv() {
MACHINE_TYPE: machineType,
LIB_DIR: libDir,
NODE_MODULES_DIR: nodeModulesDir,
NODE_PATH: nodeModulesDir, // Node.js uses NODE_PATH for module resolution
NPM_BIN_DIR: path.join(libDir, 'npm', '.bin'),
CHROME_EXTENSIONS_DIR: getExtensionsDir(),
};