mirror of
https://github.com/ArchiveBox/ArchiveBox.git
synced 2026-04-06 07:47:53 +10:00
Tighten API typing and add return values
This commit is contained in:
@@ -25,8 +25,8 @@ class CLIScheduleAPITests(TestCase):
|
||||
def test_schedule_api_creates_schedule(self):
|
||||
request = RequestFactory().post('/api/v1/cli/schedule')
|
||||
request.user = self.user
|
||||
request.stdout = StringIO()
|
||||
request.stderr = StringIO()
|
||||
setattr(request, 'stdout', StringIO())
|
||||
setattr(request, 'stderr', StringIO())
|
||||
args = ScheduleCommandSchema(
|
||||
every='daily',
|
||||
import_path='https://example.com/feed.xml',
|
||||
|
||||
@@ -112,7 +112,7 @@ class RemoveCommandSchema(Schema):
|
||||
def cli_add(request: HttpRequest, args: AddCommandSchema):
|
||||
from archivebox.cli.archivebox_add import add
|
||||
|
||||
result = add(
|
||||
crawl, snapshots = add(
|
||||
urls=args.urls,
|
||||
tag=args.tag,
|
||||
depth=args.depth,
|
||||
@@ -125,9 +125,9 @@ def cli_add(request: HttpRequest, args: AddCommandSchema):
|
||||
created_by_id=request.user.pk,
|
||||
)
|
||||
|
||||
snapshot_ids = [str(snapshot_id) for snapshot_id in result.values_list('id', flat=True)]
|
||||
snapshot_ids = [str(snapshot_id) for snapshot_id in snapshots.values_list('id', flat=True)]
|
||||
result_payload = {
|
||||
"crawl_id": getattr(result, "crawl_id", None),
|
||||
"crawl_id": str(crawl.id),
|
||||
"num_snapshots": len(snapshot_ids),
|
||||
"snapshot_ids": snapshot_ids,
|
||||
"queued_urls": args.urls,
|
||||
|
||||
@@ -427,7 +427,7 @@ def get_any(request: HttpRequest, id: str):
|
||||
try:
|
||||
response = getter(request, id)
|
||||
if isinstance(response, Model):
|
||||
return redirect(f"/api/v1/{response._meta.app_label}/{response._meta.model_name}/{response.id}?{request.META['QUERY_STRING']}")
|
||||
return redirect(f"/api/v1/{response._meta.app_label}/{response._meta.model_name}/{response.pk}?{request.META['QUERY_STRING']}")
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
@@ -435,7 +435,7 @@ def get_any(request: HttpRequest, id: str):
|
||||
from archivebox.api.v1_crawls import get_crawl
|
||||
response = get_crawl(request, id)
|
||||
if isinstance(response, Model):
|
||||
return redirect(f"/api/v1/{response._meta.app_label}/{response._meta.model_name}/{response.id}?{request.META['QUERY_STRING']}")
|
||||
return redirect(f"/api/v1/{response._meta.app_label}/{response._meta.model_name}/{response.pk}?{request.META['QUERY_STRING']}")
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
|
||||
@@ -21,6 +21,7 @@ from archivebox.config.permissions import USER, HOSTNAME
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from archivebox.core.models import Snapshot
|
||||
from archivebox.crawls.models import Crawl
|
||||
|
||||
|
||||
def _collect_input_urls(args: tuple[str, ...]) -> list[str]:
|
||||
@@ -53,7 +54,7 @@ def add(urls: str | list[str],
|
||||
update: bool=not ARCHIVING_CONFIG.ONLY_NEW,
|
||||
index_only: bool=False,
|
||||
bg: bool=False,
|
||||
created_by_id: int | None=None) -> QuerySet['Snapshot']:
|
||||
created_by_id: int | None=None) -> tuple['Crawl', QuerySet['Snapshot']]:
|
||||
"""Add a new URL or list of URLs to your archive.
|
||||
|
||||
The flow is:
|
||||
@@ -145,7 +146,7 @@ def add(urls: str | list[str],
|
||||
if tag:
|
||||
snapshot.save_tags(tag.split(','))
|
||||
snapshot.ensure_crawl_symlink()
|
||||
return crawl.snapshot_set.all()
|
||||
return crawl, crawl.snapshot_set.all()
|
||||
|
||||
# 5. Start the orchestrator to process the queue
|
||||
# The orchestrator will:
|
||||
@@ -210,8 +211,7 @@ def add(urls: str | list[str],
|
||||
|
||||
# 6. Return the list of Snapshots in this crawl
|
||||
snapshots = crawl.snapshot_set.all()
|
||||
snapshots.crawl_id = str(crawl.id)
|
||||
return snapshots
|
||||
return crawl, snapshots
|
||||
|
||||
|
||||
@click.command()
|
||||
|
||||
@@ -85,21 +85,21 @@ def read_stdin(stream: Optional[TextIO] = None) -> Iterator[Dict[str, Any]]:
|
||||
Yields parsed records as dicts.
|
||||
Supports both JSONL format and plain URLs (one per line).
|
||||
"""
|
||||
stream = stream or sys.stdin
|
||||
active_stream: TextIO = sys.stdin if stream is None else stream
|
||||
|
||||
# Don't block if stdin is a tty with no input
|
||||
if stream.isatty():
|
||||
if active_stream.isatty():
|
||||
return
|
||||
|
||||
try:
|
||||
ready, _, _ = select.select([stream], [], [], 0)
|
||||
ready, _, _ = select.select([active_stream], [], [], 0)
|
||||
except (OSError, ValueError):
|
||||
ready = [stream]
|
||||
ready = [active_stream]
|
||||
|
||||
if not ready:
|
||||
return
|
||||
|
||||
for line in stream:
|
||||
for line in active_stream:
|
||||
record = parse_line(line)
|
||||
if record:
|
||||
yield record
|
||||
@@ -142,9 +142,9 @@ def write_record(record: Dict[str, Any], stream: Optional[TextIO] = None) -> Non
|
||||
"""
|
||||
Write a single JSONL record to stdout (or provided stream).
|
||||
"""
|
||||
stream = stream or sys.stdout
|
||||
stream.write(json.dumps(record) + '\n')
|
||||
stream.flush()
|
||||
active_stream: TextIO = sys.stdout if stream is None else stream
|
||||
active_stream.write(json.dumps(record) + '\n')
|
||||
active_stream.flush()
|
||||
|
||||
|
||||
def write_records(records: Iterator[Dict[str, Any]], stream: Optional[TextIO] = None) -> int:
|
||||
|
||||
@@ -27,7 +27,7 @@ def bg_add(add_kwargs: dict) -> int:
|
||||
add_kwargs = add_kwargs.copy()
|
||||
add_kwargs['bg'] = True
|
||||
|
||||
result = add(**add_kwargs)
|
||||
_, result = add(**add_kwargs)
|
||||
|
||||
return len(result) if result else 0
|
||||
|
||||
|
||||
Reference in New Issue
Block a user