mirror of
https://github.com/ArchiveBox/ArchiveBox.git
synced 2026-04-04 06:47:57 +10:00
use full dotted paths for all archivebox imports, add migrations and more fixes
This commit is contained in:
@@ -56,7 +56,7 @@ def add(urls: str | list[str],
|
||||
from archivebox.core.models import Snapshot
|
||||
from archivebox.crawls.models import Crawl
|
||||
from archivebox.base_models.models import get_or_create_system_user_pk
|
||||
from workers.orchestrator import Orchestrator
|
||||
from archivebox.workers.orchestrator import Orchestrator
|
||||
|
||||
created_by_id = created_by_id or get_or_create_system_user_pk()
|
||||
|
||||
|
||||
@@ -78,7 +78,7 @@ def discover_outlinks(
|
||||
from archivebox.core.models import Snapshot, ArchiveResult
|
||||
from archivebox.crawls.models import Crawl
|
||||
from archivebox.config import CONSTANTS
|
||||
from workers.orchestrator import Orchestrator
|
||||
from archivebox.workers.orchestrator import Orchestrator
|
||||
|
||||
created_by_id = get_or_create_system_user_pk()
|
||||
is_tty = sys.stdout.isatty()
|
||||
|
||||
@@ -96,7 +96,7 @@ def run_plugins(
|
||||
TYPE_SNAPSHOT, TYPE_ARCHIVERESULT
|
||||
)
|
||||
from archivebox.core.models import Snapshot, ArchiveResult
|
||||
from workers.orchestrator import Orchestrator
|
||||
from archivebox.workers.orchestrator import Orchestrator
|
||||
|
||||
is_tty = sys.stdout.isatty()
|
||||
|
||||
|
||||
@@ -13,11 +13,9 @@ from archivebox.misc.util import docstring, enforce_types
|
||||
|
||||
|
||||
@enforce_types
|
||||
def init(force: bool=False, quick: bool=False, install: bool=False, setup: bool=False) -> None:
|
||||
def init(force: bool=False, quick: bool=False, install: bool=False) -> None:
|
||||
"""Initialize a new ArchiveBox collection in the current directory"""
|
||||
|
||||
install = install or setup
|
||||
|
||||
from archivebox.config import CONSTANTS, VERSION, DATA_DIR
|
||||
from archivebox.config.common import SERVER_CONFIG
|
||||
from archivebox.config.collection import write_config_file
|
||||
@@ -128,7 +126,8 @@ def init(force: bool=False, quick: bool=False, install: bool=False, setup: bool=
|
||||
print(f' [yellow]√ Added {len(orphaned_data_dir_links)} orphaned links from existing archive directories.[/yellow]')
|
||||
|
||||
if pending_links:
|
||||
Snapshot.objects.create_from_dicts(list(pending_links.values()))
|
||||
for link_dict in pending_links.values():
|
||||
Snapshot.from_jsonl(link_dict)
|
||||
|
||||
# Hint for orphaned snapshot directories
|
||||
print()
|
||||
@@ -187,7 +186,6 @@ def init(force: bool=False, quick: bool=False, install: bool=False, setup: bool=
|
||||
@click.option('--force', '-f', is_flag=True, help='Ignore unrecognized files in current directory and initialize anyway')
|
||||
@click.option('--quick', '-q', is_flag=True, help='Run any updates or migrations without rechecking all snapshot dirs')
|
||||
@click.option('--install', '-s', is_flag=True, help='Automatically install dependencies and extras used for archiving')
|
||||
@click.option('--setup', '-s', is_flag=True, help='DEPRECATED: equivalent to --install')
|
||||
@docstring(init.__doc__)
|
||||
def main(**kwargs) -> None:
|
||||
init(**kwargs)
|
||||
|
||||
@@ -85,7 +85,7 @@ def install(dry_run: bool=False) -> None:
|
||||
print()
|
||||
|
||||
# Run the crawl synchronously (this triggers on_Crawl hooks)
|
||||
from workers.orchestrator import Orchestrator
|
||||
from archivebox.workers.orchestrator import Orchestrator
|
||||
orchestrator = Orchestrator(exit_on_idle=True)
|
||||
orchestrator.runloop()
|
||||
|
||||
|
||||
@@ -37,7 +37,7 @@ def orchestrator(daemon: bool = False, watch: bool = False) -> int:
|
||||
0: All work completed successfully
|
||||
1: Error occurred
|
||||
"""
|
||||
from workers.orchestrator import Orchestrator
|
||||
from archivebox.workers.orchestrator import Orchestrator
|
||||
|
||||
if Orchestrator.is_running():
|
||||
print('[yellow]Orchestrator is already running[/yellow]')
|
||||
|
||||
@@ -74,7 +74,7 @@ def server(runserver_args: Iterable[str]=(SERVER_CONFIG.BIND_ADDR,),
|
||||
tail_multiple_worker_logs,
|
||||
is_port_in_use,
|
||||
)
|
||||
from workers.orchestrator import Orchestrator
|
||||
from archivebox.workers.orchestrator import Orchestrator
|
||||
import sys
|
||||
|
||||
# Check if port is already in use
|
||||
|
||||
@@ -163,7 +163,7 @@ def create_snapshots(
|
||||
|
||||
# If --plugins is passed, run the orchestrator for those plugins
|
||||
if plugins:
|
||||
from workers.orchestrator import Orchestrator
|
||||
from archivebox.workers.orchestrator import Orchestrator
|
||||
rprint(f'[blue]Running plugins: {plugins or "all"}...[/blue]', file=sys.stderr)
|
||||
orchestrator = Orchestrator(exit_on_idle=True)
|
||||
orchestrator.runloop()
|
||||
|
||||
@@ -160,7 +160,7 @@ def process_all_db_snapshots(batch_size: int = 100) -> dict:
|
||||
total = Snapshot.objects.count()
|
||||
print(f'[*] Processing {total} snapshots from database...')
|
||||
|
||||
for snapshot in Snapshot.objects.iterator():
|
||||
for snapshot in Snapshot.objects.iterator(chunk_size=batch_size):
|
||||
# Reconcile index.json with DB
|
||||
snapshot.reconcile_with_index_json()
|
||||
|
||||
@@ -209,7 +209,7 @@ def process_filtered_snapshots(
|
||||
total = snapshots.count()
|
||||
print(f'[*] Found {total} matching snapshots')
|
||||
|
||||
for snapshot in snapshots.iterator():
|
||||
for snapshot in snapshots.iterator(chunk_size=batch_size):
|
||||
# Reconcile index.json with DB
|
||||
snapshot.reconcile_with_index_json()
|
||||
|
||||
|
||||
@@ -17,7 +17,7 @@ TEST_CONFIG = {
|
||||
|
||||
'DATA_DIR': 'data.tests',
|
||||
|
||||
'SAVE_ARCHIVE_DOT_ORG': 'False',
|
||||
'SAVE_ARCHIVEDOTORG': 'False',
|
||||
'SAVE_TITLE': 'False',
|
||||
|
||||
'USE_CURL': 'False',
|
||||
|
||||
@@ -32,7 +32,7 @@ from unittest.mock import patch, MagicMock
|
||||
TEST_CONFIG = {
|
||||
'USE_COLOR': 'False',
|
||||
'SHOW_PROGRESS': 'False',
|
||||
'SAVE_ARCHIVE_DOT_ORG': 'False',
|
||||
'SAVE_ARCHIVEDOTORG': 'False',
|
||||
'SAVE_TITLE': 'True', # Fast extractor
|
||||
'SAVE_FAVICON': 'False',
|
||||
'SAVE_WGET': 'False',
|
||||
|
||||
Reference in New Issue
Block a user