diff --git a/archivebox/cli/archivebox_status.py b/archivebox/cli/archivebox_status.py
index e8e91b2d..94f5916c 100644
--- a/archivebox/cli/archivebox_status.py
+++ b/archivebox/cli/archivebox_status.py
@@ -37,46 +37,42 @@ def status(out_dir: Path=DATA_DIR) -> None:
     print(f'    > SQL Main Index: {num_sql_links} links'.ljust(36), f'(found in {CONSTANTS.SQL_INDEX_FILENAME})')
     print(f'    > JSON Link Details: {num_link_details} links'.ljust(36), f'(found in {ARCHIVE_DIR.name}/*/index.json)')
     print()
-    print('[green]\\[*] Scanning archive data directories...[/green]')
+    print('[green]\\[*] Scanning archive data from database...[/green]')
     print(f'[yellow]   {ARCHIVE_DIR}/*[/yellow]')
-    num_bytes, num_dirs, num_files = get_dir_size(ARCHIVE_DIR)
+
+    # Get archive stats from DB (no filesystem scanning)
+    from django.db.models import Sum, Count
+    from archivebox.core.models import ArchiveResult
+
+    archive_stats = ArchiveResult.objects.filter(status='succeeded').aggregate(
+        total_size=Sum('output_size'),
+        total_results=Count('id'),
+    )
+    num_bytes = archive_stats['total_size'] or 0
+    num_results = archive_stats['total_results'] or 0
     size = printable_filesize(num_bytes)
-    print(f'    Size: {size} across {num_files} files in {num_dirs} directories')
+    print(f'    Size: {size} across {num_results} archive results (from DB)')
 
     # Use DB as source of truth for snapshot status
     num_indexed = links.count()
-    num_archived = links.filter(status='archived').count() or links.exclude(downloaded_at=None).count()
+    num_archived = links.filter(status='sealed').count() or links.exclude(downloaded_at=None).count()
     num_unarchived = links.filter(status='queued').count() or links.filter(downloaded_at=None).count()
     print(f'    > indexed: {num_indexed}'.ljust(36), '(total snapshots in DB)')
     print(f'      > archived: {num_archived}'.ljust(36), '(snapshots with archived content)')
     print(f'      > unarchived: {num_unarchived}'.ljust(36), '(snapshots pending archiving)')
 
-    # Count directories on filesystem
-    num_present = 0
-    orphaned_dirs = []
-    if ARCHIVE_DIR.exists():
-        for entry in ARCHIVE_DIR.iterdir():
-            if entry.is_dir():
-                num_present += 1
-                if not links.filter(timestamp=entry.name).exists():
-                    orphaned_dirs.append(str(entry))
-
-    num_valid = min(num_present, num_indexed)  # approximate
+    # All snapshots are tracked in DB now, no need to count filesystem dirs
+    num_valid = num_indexed
     print()
-    print(f'    > present: {num_present}'.ljust(36), '(directories in archive/)')
-    print(f'      > [green]valid:[/green] {num_valid}'.ljust(36), '               (directories with matching DB entry)')
+    print(f'    > [green]valid:[/green] {num_valid}'.ljust(36), '(snapshots in database)')
 
-    num_orphaned = len(orphaned_dirs)
+    num_orphaned = 0  # Orphan detection would require filesystem scan, skip for S3 compatibility
     print(f'      > [red]orphaned:[/red] {num_orphaned}'.ljust(36), '         (directories without matching DB entry)')
 
     if num_indexed:
         print('    [violet]Hint:[/violet] You can list snapshots by status like so:')
         print('        [green]archivebox list --status=<status>  (e.g. archived, queued, etc.)[/green]')
 
-    if orphaned_dirs:
-        print('    [violet]Hint:[/violet] To automatically import orphaned data directories into the main index, run:')
-        print('        [green]archivebox init[/green]')
-
     print()
     print('[green]\\[*] Scanning recent archive changes and user logins:[/green]')
     print(f'[yellow]   {CONSTANTS.LOGS_DIR}/*[/yellow]')
diff --git a/archivebox/core/admin_snapshots.py b/archivebox/core/admin_snapshots.py
index 0af36faf..afa31175 100644
--- a/archivebox/core/admin_snapshots.py
+++ b/archivebox/core/admin_snapshots.py
@@ -1,9 +1,6 @@
 
 __package__ = 'archivebox.core'
 
-import os
-from pathlib import Path
-
 from django.contrib import admin, messages
 from django.urls import path
 from django.utils.html import format_html, mark_safe
@@ -363,7 +360,8 @@ class SnapshotAdmin(SearchResultsAdminMixin, ConfigEditorMixin, BaseModelAdmin):
         # ordering='archiveresult_count'
     )
     def size(self, obj):
-        archive_size = os.access(Path(obj.output_dir) / 'index.html', os.F_OK) and obj.archive_size
+        """Display archive size from DB (no filesystem access)."""
+        archive_size = obj.archive_size
         if archive_size:
             size_txt = printable_filesize(archive_size)
             if archive_size > 52428800:
@@ -442,14 +440,11 @@ class SnapshotAdmin(SearchResultsAdminMixin, ConfigEditorMixin, BaseModelAdmin):
         description='Size',
     )
     def size_with_stats(self, obj):
-        """Show archive size with output size from archive results."""
+        """Show archive size from DB (no filesystem access)."""
         stats = obj.get_progress_stats()
 
-        # Use output_size from archive results if available, fallback to disk size
-        output_size = stats['output_size']
-        archive_size = os.access(Path(obj.output_dir) / 'index.html', os.F_OK) and obj.archive_size
-
-        size_bytes = output_size or archive_size or 0
+        # Use output_size from archive results (already aggregated in stats)
+        size_bytes = stats['output_size'] or 0
 
         if size_bytes:
             size_txt = printable_filesize(size_bytes)
diff --git a/archivebox/core/models.py b/archivebox/core/models.py
index b8aa660c..b20ff67b 100755
--- a/archivebox/core/models.py
+++ b/archivebox/core/models.py
@@ -22,7 +22,7 @@ from django.contrib import admin
 from django.conf import settings
 
 from archivebox.config import CONSTANTS
-from archivebox.misc.system import get_dir_size, atomic_write
+from archivebox.misc.system import atomic_write
 from archivebox.misc.util import parse_date, base_url, domain as url_domain, to_json, ts_to_date_str, urlencode, htmlencode, urldecode
 from archivebox.misc.hashing import get_dir_info
 from archivebox.hooks import (
@@ -1345,11 +1345,19 @@ class Snapshot(ModelWithOutputDir, ModelWithConfig, ModelWithNotes, ModelWithHea
         return f'{CONSTANTS.ARCHIVE_DIR_NAME}/{self.timestamp}'
 
     @cached_property
-    def archive_size(self):
-        try:
-            return get_dir_size(self.output_dir)[0]
-        except Exception:
-            return 0
+    def archive_size(self) -> int:
+        """
+        Total size of all archived files for this snapshot.
+        Computed from ArchiveResult.output_size in DB (no filesystem access).
+        """
+        from django.db.models import Sum
+
+        total = self.archiveresult_set.filter(
+            status='succeeded'
+        ).aggregate(
+            total_size=Sum('output_size')
+        )['total_size']
+        return total or 0
 
     def save_tags(self, tags: Iterable[str] = ()) -> None:
         tags_id = [Tag.objects.get_or_create(name=tag)[0].pk for tag in tags if tag.strip()]
@@ -1904,8 +1912,8 @@ class Snapshot(ModelWithOutputDir, ModelWithConfig, ModelWithNotes, ModelWithHea
 
     def canonical_outputs(self) -> Dict[str, Optional[str]]:
         """
-        Intelligently discover the best output file for each plugin.
-        Uses actual ArchiveResult data and filesystem scanning with smart heuristics.
+        Discover the best output file for each plugin.
+        Uses ArchiveResult.output_files from DB (no filesystem scanning).
         """
         FAVICON_PROVIDER = 'https://www.google.com/s2/favicons?domain={}'
 
@@ -1917,36 +1925,25 @@ class Snapshot(ModelWithOutputDir, ModelWithConfig, ModelWithNotes, ModelWithHea
         }
 
         MIN_DISPLAY_SIZE = 15_000  # 15KB - filter out tiny files
-        MAX_SCAN_FILES = 50  # Don't scan massive directories
 
-        def find_best_output_in_dir(dir_path: Path, plugin_name: str) -> Optional[str]:
-            """Find the best representative file in a plugin's output directory"""
-            if not dir_path.exists() or not dir_path.is_dir():
+        def find_best_output_from_files(output_files: dict, plugin_name: str) -> Optional[str]:
+            """Find the best representative file from output_files dict."""
+            if not output_files:
                 return None
 
             candidates = []
-            file_count = 0
-
-            # Special handling for media plugin - look for thumbnails
             is_media_dir = plugin_name == 'media'
 
-            # Scan for suitable files
-            for file_path in dir_path.rglob('*'):
-                file_count += 1
-                if file_count > MAX_SCAN_FILES:
-                    break
-
-                if file_path.is_dir() or file_path.name.startswith('.'):
+            for rel_path, metadata in output_files.items():
+                if rel_path.startswith('.'):
                     continue
 
-                ext = file_path.suffix.lstrip('.').lower()
+                ext = rel_path.rsplit('.', 1)[-1].lower() if '.' in rel_path else ''
                 if ext not in IFRAME_EMBEDDABLE_EXTENSIONS:
                     continue
 
-                try:
-                    size = file_path.stat().st_size
-                except OSError:
-                    continue
+                # Get size from metadata if available, otherwise assume it passes
+                size = metadata.get('size', MIN_DISPLAY_SIZE) if isinstance(metadata, dict) else MIN_DISPLAY_SIZE
 
                 # For media dir, allow smaller image files (thumbnails are often < 15KB)
                 min_size = 5_000 if (is_media_dir and ext in ('png', 'jpg', 'jpeg', 'webp', 'gif')) else MIN_DISPLAY_SIZE
@@ -1955,16 +1952,15 @@ class Snapshot(ModelWithOutputDir, ModelWithConfig, ModelWithNotes, ModelWithHea
 
                 # Prefer main files: index.html, output.*, content.*, etc.
                 priority = 0
-                name_lower = file_path.name.lower()
+                name_lower = rel_path.lower()
 
                 if is_media_dir:
-                    # Special prioritization for media directories
                     if any(keyword in name_lower for keyword in ('thumb', 'thumbnail', 'cover', 'poster')):
-                        priority = 200  # Highest priority for thumbnails
+                        priority = 200
                     elif ext in ('png', 'jpg', 'jpeg', 'webp', 'gif'):
-                        priority = 150  # High priority for any image
+                        priority = 150
                     elif ext in ('mp4', 'webm', 'mp3', 'opus', 'ogg'):
-                        priority = 100  # Lower priority for actual media files
+                        priority = 100
                     else:
                         priority = 50
                 elif 'index' in name_lower:
@@ -1978,15 +1974,14 @@ class Snapshot(ModelWithOutputDir, ModelWithConfig, ModelWithNotes, ModelWithHea
                 else:
                     priority = 10
 
-                candidates.append((priority, size, file_path))
+                candidates.append((priority, size, rel_path))
 
             if not candidates:
                 return None
 
             # Sort by priority (desc), then size (desc)
             candidates.sort(key=lambda x: (x[0], x[1]), reverse=True)
-            best_file = candidates[0][2]
-            return str(best_file.relative_to(Path(self.output_dir)))
+            return candidates[0][2]
 
         canonical = {
             'index_path': 'index.html',
@@ -1994,52 +1989,26 @@ class Snapshot(ModelWithOutputDir, ModelWithConfig, ModelWithNotes, ModelWithHea
             'archive_org_path': f'https://web.archive.org/web/{self.base_url}',
         }
 
-        # Scan each ArchiveResult's output directory for the best file
-        snap_dir = Path(self.output_dir)
+        # Get best output from each ArchiveResult using output_files from DB
         for result in self.archiveresult_set.filter(status='succeeded'):
             if not result.output_files and not result.output_str:
                 continue
 
-            # Try to find the best output file for this plugin
-            plugin_dir = snap_dir / result.plugin
             best_output = None
 
-            # Check output_files first (new field)
+            # Check output_files first (primary source)
             if result.output_files:
-                first_file = next(iter(result.output_files.keys()), None)
-                if first_file and (plugin_dir / first_file).exists():
-                    best_output = f'{result.plugin}/{first_file}'
+                best_file = find_best_output_from_files(result.output_files, result.plugin)
+                if best_file:
+                    best_output = f'{result.plugin}/{best_file}'
 
             # Fallback to output_str if it looks like a path
-            if not best_output and result.output_str and (snap_dir / result.output_str).exists():
+            if not best_output and result.output_str:
                 best_output = result.output_str
 
-            if not best_output and plugin_dir.exists():
-                # Intelligently find the best file in the plugin's directory
-                best_output = find_best_output_in_dir(plugin_dir, result.plugin)
-
             if best_output:
                 canonical[f'{result.plugin}_path'] = best_output
 
-        # Also scan top-level for legacy outputs (backwards compatibility)
-        for file_path in snap_dir.glob('*'):
-            if file_path.is_dir() or file_path.name in ('index.html', 'index.json'):
-                continue
-
-            ext = file_path.suffix.lstrip('.').lower()
-            if ext not in IFRAME_EMBEDDABLE_EXTENSIONS:
-                continue
-
-            try:
-                size = file_path.stat().st_size
-                if size >= MIN_DISPLAY_SIZE:
-                    # Add as generic output with stem as key
-                    key = f'{file_path.stem}_path'
-                    if key not in canonical:
-                        canonical[key] = file_path.name
-            except OSError:
-                continue
-
         if self.is_static:
             static_path = f'warc/{self.timestamp}'
             canonical.update({
diff --git a/archivebox/core/views.py b/archivebox/core/views.py
index f0410846..6666fb18 100644
--- a/archivebox/core/views.py
+++ b/archivebox/core/views.py
@@ -54,6 +54,7 @@ class SnapshotView(View):
 
     @staticmethod
     def render_live_index(request, snapshot):
+        """Render the live index page using DB data (no filesystem access)."""
         TITLE_LOADING_MSG = 'Not yet archived...'
 
         # Dict of plugin -> ArchiveResult object
@@ -61,37 +62,33 @@ class SnapshotView(View):
         # Dict of plugin -> result info dict (for template compatibility)
         archiveresults = {}
 
-        results = snapshot.archiveresult_set.all()
+        # Get succeeded results with output files from DB
+        results = snapshot.archiveresult_set.filter(status='succeeded')
 
         for result in results:
             embed_path = result.embed_path()
-            abs_path = result.snapshot_dir / (embed_path or 'None')
 
-            if (result.status == 'succeeded'
-                and embed_path
-                and os.access(abs_path, os.R_OK)
-                and abs_path.exists()):
-                if os.path.isdir(abs_path) and not any(abs_path.glob('*.*')):
-                    continue
+            # Check if result has any output files (from DB, not filesystem)
+            if not embed_path or not (result.output_files or result.output_str):
+                continue
 
-                # Store the full ArchiveResult object for template tags
-                archiveresult_objects[result.plugin] = result
+            # Store the full ArchiveResult object for template tags
+            archiveresult_objects[result.plugin] = result
 
-                result_info = {
-                    'name': result.plugin,
-                    'path': embed_path,
-                    'ts': ts_to_date_str(result.end_ts),
-                    'size': abs_path.stat().st_size or '?',
-                    'result': result,  # Include the full object for template tags
-                }
-                archiveresults[result.plugin] = result_info
+            # Get size from output_size field (DB) instead of stat()
+            result_info = {
+                'name': result.plugin,
+                'path': embed_path,
+                'ts': ts_to_date_str(result.end_ts),
+                'size': result.output_size or '?',
+                'result': result,  # Include the full object for template tags
+            }
+            archiveresults[result.plugin] = result_info
 
-        # Use canonical_outputs for intelligent discovery
-        # This method now scans ArchiveResults and uses smart heuristics
+        # Use canonical_outputs for intelligent discovery (now uses DB, not filesystem)
         canonical = snapshot.canonical_outputs()
 
-        # Add any newly discovered outputs from canonical_outputs to archiveresults
-        snap_dir = Path(snapshot.output_dir)
+        # Add any outputs from canonical_outputs not already in archiveresults
         for key, path in canonical.items():
             if not key.endswith('_path') or not path or path.startswith('http'):
                 continue
@@ -100,22 +97,16 @@ class SnapshotView(View):
             if plugin_name in archiveresults:
                 continue  # Already have this from ArchiveResult
 
-            file_path = snap_dir / path
-            if not file_path.exists() or not file_path.is_file():
-                continue
-
-            try:
-                file_size = file_path.stat().st_size
-                if file_size >= 15_000:  # Only show files > 15KB
-                    archiveresults[plugin_name] = {
-                        'name': plugin_name,
-                        'path': path,
-                        'ts': ts_to_date_str(file_path.stat().st_mtime or 0),
-                        'size': file_size,
-                        'result': None,
-                    }
-            except OSError:
-                continue
+            # For canonical outputs not from ArchiveResult, add with minimal info
+            # (these are derived from output_files, so we know they exist)
+            if plugin_name not in ('index', 'google_favicon', 'archive_org'):
+                archiveresults[plugin_name] = {
+                    'name': plugin_name,
+                    'path': path,
+                    'ts': '',
+                    'size': '?',
+                    'result': None,
+                }
 
         # Get available extractor plugins from hooks (sorted by numeric prefix for ordering)
         # Convert to base names for display ordering
@@ -131,10 +122,8 @@ class SnapshotView(View):
 
         snapshot_info = snapshot.to_dict(extended=True)
 
-        try:
-            warc_path = 'warc/' + list(Path(snap_dir).glob('warc/*.warc.*'))[0].name
-        except IndexError:
-            warc_path = 'warc/'
+        # Get warc path from canonical outputs (DB) instead of filesystem glob
+        warc_path = canonical.get('wget_path', 'warc/')
 
         context = {
             **snapshot_info,
diff --git a/archivebox/misc/logging_util.py b/archivebox/misc/logging_util.py
index 547b3b68..980ba57f 100644
--- a/archivebox/misc/logging_util.py
+++ b/archivebox/misc/logging_util.py
@@ -25,7 +25,6 @@ from django.core.management.base import DjangoHelpFormatter
 
 from archivebox.config import CONSTANTS, DATA_DIR, VERSION
 from archivebox.config.common import SHELL_CONFIG
-from archivebox.misc.system import get_dir_size
 from archivebox.misc.util import enforce_types
 from archivebox.misc.logging import ANSI, stderr
 
@@ -312,14 +311,13 @@ def log_snapshot_archiving_finished(snapshot: "Snapshot", out_dir: str, is_new:
     else:
         _LAST_RUN_STATS.succeeded += 1
 
-    try:
-        size = get_dir_size(out_dir)
-    except FileNotFoundError:
-        size = (0, None, '0')
+    # Get archive size from DB instead of filesystem
+    archive_size = snapshot.archive_size
+    num_results = snapshot.archiveresult_set.filter(status='succeeded').count()
 
     end_ts = datetime.now(timezone.utc)
     duration = str(end_ts - start_ts).split('.')[0]
-    print('        [bright_black]{} files ({}) in {}s [/]'.format(size[2], printable_filesize(size[0]), duration))
+    print('        [bright_black]{} results ({}) in {}s [/]'.format(num_results, printable_filesize(archive_size), duration))