mirror of
https://github.com/ArchiveBox/ArchiveBox.git
synced 2026-04-06 07:47:53 +10:00
type and test fixes
This commit is contained in:
@@ -31,7 +31,7 @@ __package__ = 'archivebox.workers'
|
||||
import os
|
||||
import time
|
||||
from typing import Type
|
||||
from datetime import timedelta
|
||||
from datetime import datetime, timedelta
|
||||
from multiprocessing import Process as MPProcess
|
||||
from pathlib import Path
|
||||
|
||||
@@ -189,7 +189,7 @@ class Orchestrator:
|
||||
event='Shutting down',
|
||||
indent_level=0,
|
||||
pid=self.pid,
|
||||
error=error if error and not isinstance(error, KeyboardInterrupt) else None,
|
||||
error=error if isinstance(error, Exception) and not isinstance(error, KeyboardInterrupt) else None,
|
||||
)
|
||||
|
||||
def get_total_worker_count(self) -> int:
|
||||
@@ -567,7 +567,8 @@ class Orchestrator:
|
||||
status=ArchiveResult.StatusChoices.STARTED,
|
||||
).select_related('process')
|
||||
for ar in started_ars:
|
||||
if ar.process_id and ar.process and ar.process.status == Process.StatusChoices.RUNNING:
|
||||
process_id = getattr(ar, 'process_id', None)
|
||||
if process_id and ar.process and ar.process.status == Process.StatusChoices.RUNNING:
|
||||
try:
|
||||
ar.process.kill_tree(graceful_timeout=0.0)
|
||||
except Exception:
|
||||
@@ -904,28 +905,29 @@ class Orchestrator:
|
||||
size = ''
|
||||
stderr_tail = ''
|
||||
if ar:
|
||||
if ar.process_id and ar.process:
|
||||
process_id = getattr(ar, 'process_id', None)
|
||||
if process_id and ar.process:
|
||||
stderr_tail = _tail_stderr_line(ar.process)
|
||||
if ar.status == ArchiveResult.StatusChoices.STARTED:
|
||||
status = 'started'
|
||||
is_running = True
|
||||
is_pending = False
|
||||
start_ts = ar.start_ts or (ar.process.started_at if ar.process_id and ar.process else None)
|
||||
start_ts = ar.start_ts or (ar.process.started_at if process_id and ar.process else None)
|
||||
if start_ts:
|
||||
elapsed = _format_seconds((now - start_ts).total_seconds())
|
||||
hook_timeout = None
|
||||
if ar.process_id and ar.process and ar.process.timeout:
|
||||
if process_id and ar.process and ar.process.timeout:
|
||||
hook_timeout = ar.process.timeout
|
||||
hook_timeout = hook_timeout or hook_timeouts.get(hook_name)
|
||||
if hook_timeout:
|
||||
timeout = _format_seconds(hook_timeout)
|
||||
else:
|
||||
status = ar.status
|
||||
if ar.process_id and ar.process and ar.process.exit_code == 137:
|
||||
if process_id and ar.process and ar.process.exit_code == 137:
|
||||
status = 'failed'
|
||||
is_pending = False
|
||||
start_ts = ar.start_ts or (ar.process.started_at if ar.process_id and ar.process else None)
|
||||
end_ts = ar.end_ts or (ar.process.ended_at if ar.process_id and ar.process else None)
|
||||
start_ts = ar.start_ts or (ar.process.started_at if process_id and ar.process else None)
|
||||
end_ts = ar.end_ts or (ar.process.ended_at if process_id and ar.process else None)
|
||||
if start_ts and end_ts:
|
||||
elapsed = _format_seconds((end_ts - start_ts).total_seconds())
|
||||
size = _format_size(getattr(ar, 'output_size', None))
|
||||
@@ -1093,7 +1095,7 @@ class Orchestrator:
|
||||
from archivebox.core.models import Snapshot
|
||||
|
||||
# Get all started snapshots (optionally filtered by crawl_id)
|
||||
snapshot_filter = {'status': 'started'}
|
||||
snapshot_filter: dict[str, str | datetime] = {'status': 'started'}
|
||||
if self.crawl_id:
|
||||
snapshot_filter['crawl_id'] = self.crawl_id
|
||||
else:
|
||||
|
||||
@@ -335,6 +335,7 @@ def start_worker(supervisor, daemon, lazy=False):
|
||||
for added in added:
|
||||
supervisor.addProcessGroup(added)
|
||||
|
||||
procs = []
|
||||
for _ in range(25):
|
||||
procs = supervisor.getAllProcessInfo()
|
||||
for proc in procs:
|
||||
|
||||
@@ -1,7 +1,9 @@
|
||||
from datetime import timedelta
|
||||
from typing import cast
|
||||
from unittest.mock import patch
|
||||
|
||||
from django.contrib.auth import get_user_model
|
||||
from django.contrib.auth.models import UserManager
|
||||
from django.test import TestCase
|
||||
from django.utils import timezone
|
||||
|
||||
@@ -12,7 +14,8 @@ from archivebox.workers.worker import CrawlWorker
|
||||
|
||||
class TestScheduledCrawlMaterialization(TestCase):
|
||||
def setUp(self):
|
||||
self.user = get_user_model().objects.create_user(
|
||||
user_manager = cast(UserManager, get_user_model().objects)
|
||||
self.user = user_manager.create_user(
|
||||
username='schedule-user',
|
||||
password='password',
|
||||
)
|
||||
@@ -52,6 +55,8 @@ class TestScheduledCrawlMaterialization(TestCase):
|
||||
self.assertEqual(scheduled_crawls.count(), 2)
|
||||
|
||||
queued_crawl = scheduled_crawls.last()
|
||||
self.assertIsNotNone(queued_crawl)
|
||||
assert queued_crawl is not None
|
||||
self.assertEqual(queued_crawl.status, Crawl.StatusChoices.QUEUED)
|
||||
self.assertEqual(queued_crawl.urls, 'https://example.com/feed.xml')
|
||||
self.assertEqual(queued_crawl.max_depth, 1)
|
||||
@@ -63,7 +68,7 @@ class TestScheduledCrawlMaterialization(TestCase):
|
||||
Orchestrator(exit_on_idle=True)._materialize_due_schedules()
|
||||
self.assertEqual(Crawl.objects.filter(schedule=schedule).count(), 1)
|
||||
|
||||
Orchestrator(exit_on_idle=False, crawl_id=str(schedule.template_id))._materialize_due_schedules()
|
||||
Orchestrator(exit_on_idle=False, crawl_id=str(schedule.template.id))._materialize_due_schedules()
|
||||
self.assertEqual(Crawl.objects.filter(schedule=schedule).count(), 1)
|
||||
|
||||
@patch.object(CrawlWorker, 'start')
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
from pathlib import Path
|
||||
from types import SimpleNamespace
|
||||
from typing import Any, cast
|
||||
from unittest.mock import patch
|
||||
|
||||
from django.test import SimpleTestCase
|
||||
@@ -11,14 +12,14 @@ class TestSnapshotWorkerRetryForegroundHooks(SimpleTestCase):
|
||||
def _make_worker(self):
|
||||
worker = SnapshotWorker.__new__(SnapshotWorker)
|
||||
worker.pid = 12345
|
||||
worker.snapshot = SimpleNamespace(
|
||||
cast(Any, worker).snapshot = SimpleNamespace(
|
||||
status='started',
|
||||
refresh_from_db=lambda: None,
|
||||
)
|
||||
worker._snapshot_exceeded_hard_timeout = lambda: False
|
||||
worker._seal_snapshot_due_to_timeout = lambda: None
|
||||
worker._run_hook = lambda *args, **kwargs: SimpleNamespace()
|
||||
worker._wait_for_hook = lambda *args, **kwargs: None
|
||||
worker._wait_for_hook = lambda process, ar: None
|
||||
return worker
|
||||
|
||||
@patch('archivebox.workers.worker.log_worker_event')
|
||||
@@ -49,10 +50,10 @@ class TestSnapshotWorkerRetryForegroundHooks(SimpleTestCase):
|
||||
run_calls.append((args, kwargs))
|
||||
return SimpleNamespace()
|
||||
|
||||
def wait_for_hook(process, archive_result):
|
||||
wait_calls.append((process, archive_result))
|
||||
archive_result.status = 'succeeded'
|
||||
archive_result.output_files = {'singlefile.html': {}}
|
||||
def wait_for_hook(process, ar):
|
||||
wait_calls.append((process, ar))
|
||||
ar.status = 'succeeded'
|
||||
ar.output_files = {'singlefile.html': {}}
|
||||
|
||||
archive_result = SimpleNamespace(
|
||||
status='failed',
|
||||
|
||||
Reference in New Issue
Block a user