mirror of
https://github.com/ArchiveBox/ArchiveBox.git
synced 2026-04-05 07:17:52 +10:00
remove Seed model in favor of Crawl as template
This commit is contained in:
@@ -27,10 +27,9 @@ TYPE_SNAPSHOT = 'Snapshot'
|
||||
TYPE_ARCHIVERESULT = 'ArchiveResult'
|
||||
TYPE_TAG = 'Tag'
|
||||
TYPE_CRAWL = 'Crawl'
|
||||
TYPE_SEED = 'Seed'
|
||||
TYPE_INSTALLEDBINARY = 'InstalledBinary'
|
||||
|
||||
VALID_TYPES = {TYPE_SNAPSHOT, TYPE_ARCHIVERESULT, TYPE_TAG, TYPE_CRAWL, TYPE_SEED, TYPE_INSTALLEDBINARY}
|
||||
VALID_TYPES = {TYPE_SNAPSHOT, TYPE_ARCHIVERESULT, TYPE_TAG, TYPE_CRAWL, TYPE_INSTALLEDBINARY}
|
||||
|
||||
|
||||
def parse_line(line: str) -> Optional[Dict[str, Any]]:
|
||||
@@ -206,7 +205,8 @@ def crawl_to_jsonl(crawl) -> Dict[str, Any]:
|
||||
return {
|
||||
'type': TYPE_CRAWL,
|
||||
'id': str(crawl.id),
|
||||
'seed_id': str(crawl.seed_id),
|
||||
'urls': crawl.urls,
|
||||
'extractor': crawl.extractor,
|
||||
'status': crawl.status,
|
||||
'max_depth': crawl.max_depth,
|
||||
'created_at': crawl.created_at.isoformat() if crawl.created_at else None,
|
||||
|
||||
@@ -13,9 +13,11 @@ from rich.console import Console
|
||||
from rich.highlighter import Highlighter
|
||||
|
||||
# SETUP RICH CONSOLE / TTY detection / COLOR / PROGRESS BARS
|
||||
CONSOLE = Console()
|
||||
STDERR = Console(stderr=True)
|
||||
IS_TTY = CONSOLE.is_interactive
|
||||
# Disable wrapping - use soft_wrap=True and large width so text flows naturally
|
||||
# Colors are preserved, just no hard line breaks inserted
|
||||
CONSOLE = Console(width=32768, soft_wrap=True, force_terminal=True)
|
||||
STDERR = Console(stderr=True, width=32768, soft_wrap=True, force_terminal=True)
|
||||
IS_TTY = sys.stdout.isatty()
|
||||
|
||||
class RainbowHighlighter(Highlighter):
|
||||
def highlight(self, text):
|
||||
|
||||
@@ -603,21 +603,17 @@ def log_worker_event(
|
||||
|
||||
# Build final message
|
||||
error_str = f' {type(error).__name__}: {error}' if error else ''
|
||||
# Build colored message - worker_label needs to be inside color tags
|
||||
# But first we need to format the color tags separately from the worker label
|
||||
from archivebox.misc.logging import CONSOLE
|
||||
from rich.text import Text
|
||||
|
||||
# Create a Rich Text object for proper formatting
|
||||
text = Text()
|
||||
text.append(indent) # Indentation
|
||||
# Append worker label and event with color
|
||||
text.append(indent)
|
||||
text.append(f'{worker_label} {event}{error_str}', style=color)
|
||||
# Append metadata without color (add separator if metadata exists)
|
||||
if metadata_str:
|
||||
text.append(f' | {metadata_str}')
|
||||
|
||||
CONSOLE.print(text)
|
||||
CONSOLE.print(text, soft_wrap=True)
|
||||
|
||||
|
||||
@enforce_types
|
||||
|
||||
@@ -1,7 +1,5 @@
|
||||
__package__ = 'archivebox'
|
||||
|
||||
import sys
|
||||
import shutil
|
||||
import django
|
||||
import pydantic
|
||||
|
||||
@@ -20,14 +18,10 @@ timezone.utc = datetime.timezone.utc
|
||||
# DjangoSignalWebhooksConfig.verbose_name = 'API'
|
||||
|
||||
|
||||
# Install rich for pretty tracebacks in console logs
|
||||
# https://rich.readthedocs.io/en/stable/traceback.html#traceback-handler
|
||||
|
||||
from rich.traceback import install # noqa
|
||||
|
||||
TERM_WIDTH = (shutil.get_terminal_size((200, 10)).columns - 1) if sys.stdout.isatty() else 200
|
||||
# os.environ.setdefault('COLUMNS', str(TERM_WIDTH))
|
||||
install(show_locals=True, word_wrap=False, locals_max_length=10, locals_hide_dunder=True, suppress=[django, pydantic], extra_lines=2, width=TERM_WIDTH)
|
||||
# Rich traceback handler disabled - it adds frames/boxes that wrap weirdly in log files
|
||||
# Standard Python tracebacks are used instead (full width, no frames)
|
||||
# from rich.traceback import install
|
||||
# install(show_locals=True, word_wrap=False, ...)
|
||||
|
||||
|
||||
# Hide site-packages/sonic/client.py:115: SyntaxWarning
|
||||
|
||||
Reference in New Issue
Block a user