fix orchestrator statemachine and Process from archiveresult migrations

This commit is contained in:
Nick Sweeting
2026-01-01 16:43:02 -08:00
parent 876feac522
commit 60422adc87
13 changed files with 378 additions and 96 deletions

View File

@@ -26,7 +26,8 @@
"Bash(grep:*)",
"WebFetch(domain:python-statemachine.readthedocs.io)",
"Bash(./bin/run_plugin_tests.sh:*)",
"Bash(done)"
"Bash(done)",
"Bash(coverage erase:*)"
]
}
}

1
.gitignore vendored
View File

@@ -10,6 +10,7 @@ tests/out/
.coverage
.coverage.*
coverage.json
coverage/
htmlcov/
# Python and Node dependencies

View File

@@ -173,6 +173,8 @@ class ConstantsDict(Mapping):
CUSTOM_TEMPLATES_DIR_NAME,
CUSTOM_PLUGINS_DIR_NAME,
CRONTABS_DIR_NAME,
"invalid",
"users",
# Backwards compatibility with old directory names
"user_plugins", # old name for USER_PLUGINS_DIR (now 'plugins')
"user_templates", # old name for CUSTOM_TEMPLATES_DIR (now 'templates')

View File

@@ -57,18 +57,8 @@ class Migration(migrations.Migration):
name='snapshot',
options={'verbose_name': 'Snapshot', 'verbose_name_plural': 'Snapshots'},
),
migrations.RemoveField(
model_name='archiveresult',
name='cmd',
),
migrations.RemoveField(
model_name='archiveresult',
name='cmd_version',
),
migrations.RemoveField(
model_name='archiveresult',
name='pwd',
),
# NOTE: RemoveField for cmd, cmd_version, pwd moved to migration 0027
# to allow data migration to Process records first
migrations.AddField(
model_name='archiveresult',
name='config',

View File

@@ -2208,7 +2208,7 @@ class SnapshotMachine(BaseStateMachine, strict_states=True):
tick = (
queued.to.itself(unless='can_start') |
queued.to(started, cond='can_start') |
started.to.itself(unless='is_finished') |
started.to.itself(unless='is_finished', on='on_started_to_started') |
started.to(sealed, cond='is_finished')
)
@@ -2243,6 +2243,13 @@ class SnapshotMachine(BaseStateMachine, strict_states=True):
status=Snapshot.StatusChoices.STARTED,
)
def on_started_to_started(self):
"""Called when Snapshot stays in started state (archiveresults not finished yet)."""
# Bump retry_at so we check again in a few seconds
self.snapshot.update_and_requeue(
retry_at=timezone.now() + timedelta(seconds=5),
)
@sealed.enter
def enter_sealed(self):
# Clean up background hooks

View File

@@ -502,7 +502,7 @@ class CrawlMachine(BaseStateMachine, strict_states=True):
tick = (
queued.to.itself(unless='can_start') |
queued.to(started, cond='can_start') |
started.to.itself(unless='is_finished') |
started.to.itself(unless='is_finished', on='on_started_to_started') |
started.to(sealed, cond='is_finished')
)

View File

@@ -1201,6 +1201,14 @@ def process_hook_records(records: List[Dict[str, Any]], overrides: Dict[str, Any
# Dispatch to appropriate model's from_json() method
if record_type == 'Snapshot':
from archivebox.core.models import Snapshot
# Check if discovered snapshot exceeds crawl max_depth
snapshot_depth = record.get('depth', 0)
crawl = overrides.get('crawl')
if crawl and snapshot_depth > crawl.max_depth:
# Skip - this URL was discovered but exceeds max crawl depth
continue
obj = Snapshot.from_json(record.copy(), overrides)
if obj:
stats['Snapshot'] = stats.get('Snapshot', 0) + 1

View File

@@ -163,8 +163,10 @@ def fetch_content(url: str) -> str:
@click.command()
@click.option('--url', required=True, help='Netscape bookmark file URL to parse')
@click.option('--snapshot-id', required=False, help='Snapshot UUID (unused but required by hook runner)')
def main(url: str, snapshot_id: str = None):
@click.option('--snapshot-id', required=False, help='Parent Snapshot UUID')
@click.option('--crawl-id', required=False, help='Crawl UUID')
@click.option('--depth', type=int, default=0, help='Current depth level')
def main(url: str, snapshot_id: str = None, crawl_id: str = None, depth: int = 0):
"""Parse Netscape bookmark HTML and extract URLs."""
try:
@@ -188,7 +190,12 @@ def main(url: str, snapshot_id: str = None):
'type': 'Snapshot',
'url': unescape(bookmark_url),
'plugin': PLUGIN_NAME,
'depth': depth + 1,
}
if snapshot_id:
entry['parent_snapshot_id'] = snapshot_id
if crawl_id:
entry['crawl_id'] = crawl_id
if title:
entry['title'] = unescape(title)
if tags_str:

View File

@@ -100,8 +100,10 @@ def fetch_content(url: str) -> str:
@click.command()
@click.option('--url', required=True, help='URL to parse (file:// or https://)')
@click.option('--snapshot-id', required=False, help='Snapshot UUID (unused but required by hook runner)')
def main(url: str, snapshot_id: str = None):
@click.option('--snapshot-id', required=False, help='Parent Snapshot UUID')
@click.option('--crawl-id', required=False, help='Crawl UUID')
@click.option('--depth', type=int, default=0, help='Current depth level')
def main(url: str, snapshot_id: str = None, crawl_id: str = None, depth: int = 0):
"""Parse plain text and extract URLs."""
try:
@@ -123,9 +125,12 @@ def main(url: str, snapshot_id: str = None):
'type': 'Snapshot',
'url': found_url,
'plugin': PLUGIN_NAME,
'depth': depth + 1,
}
if snapshot_id:
record['parent_snapshot_id'] = snapshot_id
if crawl_id:
record['crawl_id'] = crawl_id
print(json.dumps(record))
# Emit ArchiveResult record to mark completion

View File

@@ -30,6 +30,7 @@ from .test_migrations_helpers import (
verify_foreign_keys,
verify_all_snapshots_in_output,
verify_crawl_count,
verify_process_migration,
)
@@ -260,6 +261,54 @@ class TestMigrationFrom08x(unittest.TestCase):
self.assertTrue('ArchiveBox' in output or 'version' in output.lower(),
f"Version output missing expected content: {output[:500]}")
def test_migration_creates_process_records(self):
"""Migration should create Process records for all ArchiveResults."""
result = run_archivebox(self.work_dir, ['init'], timeout=45)
self.assertEqual(result.returncode, 0, f"Init failed: {result.stderr}")
# Verify Process records created
expected_count = len(self.original_data['archiveresults'])
ok, msg = verify_process_migration(self.db_path, expected_count)
self.assertTrue(ok, msg)
def test_migration_creates_binary_records(self):
"""Migration should create Binary records from cmd_version data."""
result = run_archivebox(self.work_dir, ['init'], timeout=45)
self.assertEqual(result.returncode, 0, f"Init failed: {result.stderr}")
conn = sqlite3.connect(str(self.db_path))
cursor = conn.cursor()
# Check Binary records exist
cursor.execute("SELECT COUNT(*) FROM machine_binary")
binary_count = cursor.fetchone()[0]
# Should have at least one binary per unique extractor
extractors = set(ar['extractor'] for ar in self.original_data['archiveresults'])
self.assertGreaterEqual(binary_count, len(extractors),
f"Expected at least {len(extractors)} Binaries, got {binary_count}")
conn.close()
def test_migration_preserves_cmd_data(self):
"""Migration should preserve cmd data in Process.cmd field."""
result = run_archivebox(self.work_dir, ['init'], timeout=45)
self.assertEqual(result.returncode, 0, f"Init failed: {result.stderr}")
conn = sqlite3.connect(str(self.db_path))
cursor = conn.cursor()
# Check that Process records have cmd arrays
cursor.execute("SELECT cmd FROM machine_process WHERE cmd != '[]'")
cmd_records = cursor.fetchall()
# All Processes should have non-empty cmd (test data has json.dumps([extractor, '--version']))
expected_count = len(self.original_data['archiveresults'])
self.assertEqual(len(cmd_records), expected_count,
f"Expected {expected_count} Processes with cmd, got {len(cmd_records)}")
conn.close()
class TestMigrationDataIntegrity08x(unittest.TestCase):
"""Comprehensive data integrity tests for 0.8.x migrations."""

View File

@@ -730,44 +730,26 @@ def seed_0_8_data(db_path: Path) -> Dict[str, List[Dict]]:
tag_id = cursor.lastrowid
created_data['tags'].append({'id': tag_id, 'name': name, 'slug': name.lower()})
# Create Seeds first (required for 0.8.x Crawls)
test_seeds = [
('https://example.com', 'auto', 'Example Seed'),
('https://github.com/ArchiveBox', 'auto', 'GitHub Seed'),
]
created_data['seeds'] = []
for uri, extractor, label in test_seeds:
seed_id = generate_uuid()
cursor.execute("""
INSERT INTO crawls_seed (id, created_at, created_by_id, modified_at, uri,
extractor, tags_str, label, config, output_dir, notes,
num_uses_failed, num_uses_succeeded)
VALUES (?, datetime('now'), ?, datetime('now'), ?, ?, '', ?, '{}', '', '', 0, 0)
""", (seed_id, user_id, uri, extractor, label))
created_data['seeds'].append({'id': seed_id, 'uri': uri, 'label': label})
# Create 2 Crawls (linked to Seeds)
# Create 2 Crawls (0.9.0 schema - no seeds)
test_crawls = [
('https://example.com\nhttps://example.org', 0, 'Example Crawl', created_data['seeds'][0]['id']),
('https://github.com/ArchiveBox', 1, 'GitHub Crawl', created_data['seeds'][1]['id']),
('https://example.com\nhttps://example.org', 0, 'Example Crawl'),
('https://github.com/ArchiveBox', 1, 'GitHub Crawl'),
]
for i, (urls, max_depth, label, seed_id) in enumerate(test_crawls):
for i, (urls, max_depth, label) in enumerate(test_crawls):
crawl_id = generate_uuid()
cursor.execute("""
INSERT INTO crawls_crawl (id, created_at, created_by_id, modified_at, seed_id, urls,
INSERT INTO crawls_crawl (id, created_at, created_by_id, modified_at, urls,
config, max_depth, tags_str, label, status, retry_at,
num_uses_failed, num_uses_succeeded)
VALUES (?, datetime('now'), ?, datetime('now'), ?, ?, '{}', ?, '', ?, 'queued', datetime('now'), 0, 0)
""", (crawl_id, user_id, seed_id, urls, max_depth, label))
VALUES (?, datetime('now'), ?, datetime('now'), ?, '{}', ?, '', ?, 'queued', datetime('now'), 0, 0)
""", (crawl_id, user_id, urls, max_depth, label))
created_data['crawls'].append({
'id': crawl_id,
'urls': urls,
'max_depth': max_depth,
'label': label,
'seed_id': seed_id,
})
# Create 5 snapshots linked to crawls
@@ -1146,3 +1128,64 @@ def verify_crawl_count(db_path: Path, expected: int) -> Tuple[bool, str]:
if count == expected:
return True, f"Crawl count OK: {count}"
return False, f"Crawl count mismatch: expected {expected}, got {count}"
def verify_process_migration(db_path: Path, expected_archiveresult_count: int) -> Tuple[bool, str]:
"""
Verify that ArchiveResults were properly migrated to Process records.
Checks:
1. All ArchiveResults have process_id set
2. Process count matches ArchiveResult count
3. Binary records created for unique cmd_version values
4. Status mapping is correct
"""
conn = sqlite3.connect(str(db_path))
cursor = conn.cursor()
# Check all ArchiveResults have process_id
cursor.execute("SELECT COUNT(*) FROM core_archiveresult WHERE process_id IS NULL")
null_count = cursor.fetchone()[0]
if null_count > 0:
conn.close()
return False, f"Found {null_count} ArchiveResults without process_id"
# Check Process count
cursor.execute("SELECT COUNT(*) FROM machine_process")
process_count = cursor.fetchone()[0]
if process_count != expected_archiveresult_count:
conn.close()
return False, f"Expected {expected_archiveresult_count} Processes, got {process_count}"
# Check status mapping
cursor.execute("""
SELECT ar.status, p.status, p.exit_code
FROM core_archiveresult ar
JOIN machine_process p ON ar.process_id = p.id
""")
status_errors = []
for ar_status, p_status, p_exit_code in cursor.fetchall():
expected_p_status, expected_exit_code = {
'queued': ('queued', None),
'started': ('running', None),
'backoff': ('queued', None),
'succeeded': ('exited', 0),
'failed': ('exited', 1),
'skipped': ('exited', None),
}.get(ar_status, ('queued', None))
if p_status != expected_p_status:
status_errors.append(f"AR status {ar_status} → Process {p_status}, expected {expected_p_status}")
if p_exit_code != expected_exit_code:
status_errors.append(f"AR status {ar_status} → exit_code {p_exit_code}, expected {expected_exit_code}")
if status_errors:
conn.close()
return False, f"Status mapping errors: {'; '.join(status_errors[:5])}"
conn.close()
return True, f"Process migration verified: {process_count} Processes created"

View File

@@ -175,8 +175,50 @@ class Orchestrator:
"""Spawn a new worker process. Returns PID or None if spawn failed."""
try:
pid = WorkerClass.start(daemon=False)
# Worker spawning is logged by the worker itself in on_startup()
return pid
# CRITICAL: Block until worker registers itself in Process table
# This prevents race condition where orchestrator spawns multiple workers
# before any of them finish on_startup() and register
from archivebox.machine.models import Process
import time
timeout = 5.0 # seconds to wait for worker registration
poll_interval = 0.1 # check every 100ms
elapsed = 0.0
spawn_time = timezone.now()
while elapsed < timeout:
# Check if worker process is registered with strict criteria:
# 1. Correct PID
# 2. WORKER process type
# 3. RUNNING status
# 4. Parent is this orchestrator
# 5. Started recently (within last 10 seconds)
worker_process = Process.objects.filter(
pid=pid,
process_type=Process.TypeChoices.WORKER,
status=Process.StatusChoices.RUNNING,
parent_id=self.db_process.id,
started_at__gte=spawn_time - timedelta(seconds=10),
).first()
if worker_process:
# Worker successfully registered!
return pid
time.sleep(poll_interval)
elapsed += poll_interval
# Timeout - worker failed to register
log_worker_event(
worker_type='Orchestrator',
event='Worker failed to register in time',
indent_level=0,
pid=self.pid,
metadata={'worker_type': WorkerClass.name, 'worker_pid': pid, 'timeout': timeout},
)
return None
except Exception as e:
log_worker_event(
worker_type='Orchestrator',
@@ -266,48 +308,75 @@ class Orchestrator:
def runloop(self) -> None:
"""Main orchestrator loop."""
from rich.progress import Progress, BarColumn, TextColumn, TaskProgressColumn
from archivebox.misc.logging import IS_TTY
import archivebox.misc.logging as logging_module
from archivebox.misc.logging import IS_TTY, CONSOLE
import sys
import os
# Enable progress bars only in TTY + foreground mode
show_progress = IS_TTY and self.exit_on_idle
# Save original consoles
original_console = logging_module.CONSOLE
original_stderr = logging_module.STDERR
# Debug
print(f"[yellow]DEBUG: IS_TTY={IS_TTY}, exit_on_idle={self.exit_on_idle}, show_progress={show_progress}[/yellow]")
# Create Progress with the console it will control
progress = Progress(
TextColumn("[cyan]{task.description}"),
BarColumn(bar_width=40),
TaskProgressColumn(),
transient=False,
console=original_console, # Use the original console
) if show_progress else None
self.on_startup()
task_ids = {}
task_ids = {} # snapshot_id -> task_id
if not show_progress:
# No progress bars - just run normally
self._run_orchestrator_loop(None, task_ids, None, None)
else:
# Redirect worker subprocess output to /dev/null
devnull_fd = os.open(os.devnull, os.O_WRONLY)
# Wrapper to convert console.print() to console.log() for Rich Progress
class ConsoleLogWrapper:
def __init__(self, console):
self._console = console
def print(self, *args, **kwargs):
# Use log() instead of print() to work with Live display
self._console.log(*args)
def __getattr__(self, name):
return getattr(self._console, name)
# Save original stdout/stderr (make 2 copies - one for Console, one for restoring)
original_stdout = sys.stdout.fileno()
original_stderr = sys.stderr.fileno()
stdout_for_console = os.dup(original_stdout)
stdout_for_restore = os.dup(original_stdout)
stderr_for_restore = os.dup(original_stderr)
try:
# Redirect stdout/stderr to /dev/null (workers will inherit this)
os.dup2(devnull_fd, original_stdout)
os.dup2(devnull_fd, original_stderr)
# Create Console using saved stdout (not the redirected one)
from rich.console import Console
import archivebox.misc.logging as logging_module
orchestrator_console = Console(file=os.fdopen(stdout_for_console, 'w'), force_terminal=True)
# Update global CONSOLE so orchestrator logs appear too
original_console = logging_module.CONSOLE
logging_module.CONSOLE = orchestrator_console
# Now create Progress and run loop (DON'T restore stdout/stderr - workers need /dev/null)
with Progress(
TextColumn("[cyan]{task.description}"),
BarColumn(bar_width=40),
TaskProgressColumn(),
console=orchestrator_console,
) as progress:
self._run_orchestrator_loop(progress, task_ids, None, None)
# Restore original console
logging_module.CONSOLE = original_console
finally:
# Restore stdout/stderr
os.dup2(stdout_for_restore, original_stdout)
os.dup2(stderr_for_restore, original_stderr)
# Cleanup
try:
os.close(devnull_fd)
os.close(stdout_for_restore)
os.close(stderr_for_restore)
except:
pass
# stdout_for_console is closed by orchestrator_console
def _run_orchestrator_loop(self, progress, task_ids, read_fd, console):
"""Run the main orchestrator loop with optional progress display."""
try:
if progress:
progress.start()
# Wrap progress.console so print() calls become log() calls
wrapped_console = ConsoleLogWrapper(progress.console)
logging_module.CONSOLE = wrapped_console
logging_module.STDERR = wrapped_console
# Call on_startup AFTER redirecting consoles
self.on_startup()
while True:
# Check queues and spawn workers
queue_sizes = self.check_queues_and_spawn_workers()
@@ -333,12 +402,33 @@ class Orchestrator:
status__in=['succeeded', 'skipped', 'failed']
).count()
# Find currently running hook (ordered by hook_name to get lowest step number)
current_ar = snapshot.archiveresult_set.filter(status='started').order_by('hook_name').first()
if not current_ar:
# If nothing running, show next queued item (ordered to get next in sequence)
current_ar = snapshot.archiveresult_set.filter(status='queued').order_by('hook_name').first()
current_plugin = ''
if current_ar:
# Use hook_name if available, otherwise plugin name
hook_name = current_ar.hook_name or current_ar.plugin or ''
# Extract just the hook name without path (e.g., "on_Snapshot__50_wget.py" -> "wget")
if hook_name:
# Clean up the name: remove prefix and extension
clean_name = hook_name.split('__')[-1] if '__' in hook_name else hook_name
clean_name = clean_name.replace('.py', '').replace('.sh', '').replace('.bg', '')
current_plugin = f"{clean_name}"
# Build description with URL + current plugin
url = snapshot.url[:50] + '...' if len(snapshot.url) > 50 else snapshot.url
description = f"{url}{current_plugin}"
# Create or update task
if snapshot.id not in task_ids:
url = snapshot.url[:60] + '...' if len(snapshot.url) > 60 else snapshot.url
task_ids[snapshot.id] = progress.add_task(url, total=total, completed=completed)
task_ids[snapshot.id] = progress.add_task(description, total=total, completed=completed)
else:
progress.update(task_ids[snapshot.id], completed=completed)
# Update both progress and description
progress.update(task_ids[snapshot.id], description=description, completed=completed)
# Remove tasks for snapshots that are no longer active
for snapshot_id in list(task_ids.keys()):
@@ -373,12 +463,6 @@ class Orchestrator:
raise
else:
self.on_shutdown()
finally:
if progress:
# Restore original consoles
logging_module.CONSOLE = original_console
logging_module.STDERR = original_stderr
progress.stop()
def start(self) -> int:
"""

View File

@@ -1,14 +1,20 @@
#!/bin/bash
# Run ArchiveBox plugin tests
# Run ArchiveBox plugin tests with coverage
#
# All plugin tests use pytest and are located in pluginname/tests/test_*.py
#
# Usage: ./bin/run_plugin_tests.sh [plugin_name]
# Usage: ./bin/test_plugins.sh [plugin_name] [--no-coverage]
#
# Examples:
# ./bin/run_plugin_tests.sh # Run all plugin tests
# ./bin/run_plugin_tests.sh chrome # Run chrome plugin tests
# ./bin/run_plugin_tests.sh parse_* # Run all parse_* plugin tests
# ./bin/test_plugins.sh # Run all plugin tests with coverage
# ./bin/test_plugins.sh chrome # Run chrome plugin tests with coverage
# ./bin/test_plugins.sh parse_* # Run all parse_* plugin tests with coverage
# ./bin/test_plugins.sh --no-coverage # Run all tests without coverage
#
# Coverage results are saved to .coverage and can be viewed with:
# coverage combine
# coverage report
# coverage json
set -e
@@ -18,11 +24,43 @@ RED='\033[0;31m'
YELLOW='\033[1;33m'
NC='\033[0m' # No Color
# Save root directory first
ROOT_DIR="$(cd "$(dirname "$0")/.." && pwd)"
# Parse arguments
PLUGIN_FILTER="${1:-}"
PLUGIN_FILTER=""
ENABLE_COVERAGE=true
for arg in "$@"; do
if [ "$arg" = "--no-coverage" ]; then
ENABLE_COVERAGE=false
else
PLUGIN_FILTER="$arg"
fi
done
# Reset coverage data if collecting coverage
if [ "$ENABLE_COVERAGE" = true ]; then
echo "Resetting coverage data..."
cd "$ROOT_DIR" || exit 1
coverage erase
rm -rf "$ROOT_DIR/coverage/js" 2>/dev/null
mkdir -p "$ROOT_DIR/coverage/js"
# Enable Python subprocess coverage
export COVERAGE_PROCESS_START="$ROOT_DIR/pyproject.toml"
export PYTHONPATH="$ROOT_DIR:$PYTHONPATH" # For sitecustomize.py
# Enable Node.js V8 coverage (built-in, no packages needed)
export NODE_V8_COVERAGE="$ROOT_DIR/coverage/js"
echo "Python coverage: enabled (subprocess support)"
echo "JavaScript coverage: enabled (NODE_V8_COVERAGE)"
echo ""
fi
# Change to plugins directory
cd "$(dirname "$0")/../archivebox/plugins" || exit 1
cd "$ROOT_DIR/archivebox/plugins" || exit 1
echo "=========================================="
echo "ArchiveBox Plugin Tests"
@@ -34,6 +72,12 @@ if [ -n "$PLUGIN_FILTER" ]; then
else
echo "Running all plugin tests"
fi
if [ "$ENABLE_COVERAGE" = true ]; then
echo "Coverage: enabled"
else
echo "Coverage: disabled"
fi
echo ""
# Track results
@@ -67,7 +111,13 @@ for test_dir in $TEST_DIRS; do
echo -e "${YELLOW}[RUNNING]${NC} $plugin_name"
if python -m pytest "$test_dir" -p no:django -v --tb=short 2>&1 | grep -v "^platform\|^cachedir\|^rootdir\|^configfile\|^plugins:" | tail -100; then
# Build pytest command with optional coverage
PYTEST_CMD="python -m pytest $test_dir -p no:django -v --tb=short"
if [ "$ENABLE_COVERAGE" = true ]; then
PYTEST_CMD="$PYTEST_CMD --cov=$plugin_name --cov-append --cov-branch"
fi
if eval "$PYTEST_CMD" 2>&1 | grep -v "^platform\|^cachedir\|^rootdir\|^configfile\|^plugins:" | tail -100; then
echo -e "${GREEN}[PASSED]${NC} $plugin_name"
PASSED_PLUGINS=$((PASSED_PLUGINS + 1))
else
@@ -91,6 +141,41 @@ if [ $TOTAL_PLUGINS -eq 0 ]; then
exit 0
elif [ $FAILED_PLUGINS -eq 0 ]; then
echo -e "${GREEN}✓ All plugin tests passed!${NC}"
# Show coverage summary if enabled
if [ "$ENABLE_COVERAGE" = true ]; then
echo ""
echo "=========================================="
echo "Python Coverage Summary"
echo "=========================================="
# Coverage data is in ROOT_DIR, combine and report from there
cd "$ROOT_DIR" || exit 1
# Copy coverage data from plugins dir if it exists
if [ -f "$ROOT_DIR/archivebox/plugins/.coverage" ]; then
cp "$ROOT_DIR/archivebox/plugins/.coverage" "$ROOT_DIR/.coverage"
fi
coverage combine 2>/dev/null || true
coverage report --include="archivebox/plugins/*" --omit="*/tests/*" 2>&1 | head -50
echo ""
echo "=========================================="
echo "JavaScript Coverage Summary"
echo "=========================================="
if [ -d "$ROOT_DIR/coverage/js" ] && [ "$(ls -A "$ROOT_DIR/coverage/js" 2>/dev/null)" ]; then
node "$ROOT_DIR/bin/convert_v8_coverage.js" "$ROOT_DIR/coverage/js"
else
echo "No JavaScript coverage data collected"
echo "(JS hooks may not have been executed during tests)"
fi
echo ""
echo "For detailed coverage reports (from project root):"
echo " Python: coverage report --show-missing --include='archivebox/plugins/*' --omit='*/tests/*'"
echo " Python: coverage json # LLM-friendly format"
echo " Python: coverage html # Interactive HTML report"
echo " JavaScript: node bin/convert_v8_coverage.js coverage/js"
fi
exit 0
else
echo -e "${RED}✗ Some plugin tests failed${NC}"