wip

2026-01-15 12:15:10 +00:00 · 2025-12-28 17:51:54 -08:00
parent 54f91c1339
commit f0aa19fa7d
157 changed files with 6774 additions and 5061 deletions
--- a/old/TODO_fs_migrations.md
+++ b/old/TODO_fs_migrations.md
@@ -743,7 +743,7 @@ def update(filter_patterns: Iterable[str] = (),
    from archivebox.config.django import setup_django
    setup_django()

-    from core.models import Snapshot
+    from archivebox.core.models import Snapshot
    from django.utils import timezone

    while True:
@@ -790,7 +790,7 @@ def import_orphans_from_archive(resume_from: str = None, batch_size: int = 100)
    Skip symlinks (already migrated).
    Create DB records and trigger migration on save().
    """
-    from core.models import Snapshot
+    from archivebox.core.models import Snapshot
    from archivebox.config import CONSTANTS
    from django.db import transaction

@@ -858,7 +858,7 @@ def process_all_db_snapshots(batch_size: int = 100) -> dict:
    Process all snapshots in DB.
    Reconcile index.json and queue for archiving.
    """
-    from core.models import Snapshot
+    from archivebox.core.models import Snapshot
    from django.db import transaction
    from django.utils import timezone

@@ -896,7 +896,7 @@ def process_filtered_snapshots(
    batch_size: int
 ) -> dict:
    """Process snapshots matching filters (DB query only)."""
-    from core.models import Snapshot
+    from archivebox.core.models import Snapshot
    from django.db import transaction
    from django.utils import timezone
    from datetime import datetime
@@ -1042,7 +1042,7 @@ def search(filter_patterns: list[str] | None=None,
           with_headers: bool=False):
    """List, filter, and export information about archive entries"""

-    from core.models import Snapshot
+    from archivebox.core.models import Snapshot

    if with_headers and not (json or html or csv):
        stderr('[X] --with-headers requires --json, --html or --csv\n', color='red')
--- a/old/TODO_hook_architecture.md
+++ b/old/TODO_hook_architecture.md
@@ -658,7 +658,7 @@ def create_model_record(record: dict) -> Any:
    Returns:
        Created/updated model instance
    """
-    from machine.models import Binary, Dependency
+    from archivebox.machine.models import Binary, Dependency

    model_type = record.pop('type')

@@ -917,7 +917,7 @@ def find_binary_for_cmd(cmd: List[str], machine_id: str) -> Optional[str]:
    if not cmd:
        return None

-    from machine.models import Binary
+    from archivebox.machine.models import Binary

    bin_path_or_name = cmd[0]

@@ -977,7 +977,7 @@ def run_hook(
    """
    import time
    from datetime import datetime, timezone
-    from machine.models import Machine
+    from archivebox.machine.models import Machine

    start_time = time.time()

@@ -1125,7 +1125,7 @@ def run(self):
    """
    from django.utils import timezone
    from archivebox.hooks import BUILTIN_PLUGINS_DIR, USER_PLUGINS_DIR, run_hook, find_binary_for_cmd, create_model_record
-    from machine.models import Machine
+    from archivebox.machine.models import Machine

    config_objects = [self.snapshot.crawl, self.snapshot] if self.snapshot.crawl else [self.snapshot]

@@ -1458,7 +1458,7 @@ def finalize_background_hook(archiveresult: 'ArchiveResult') -> None:
        archiveresult: ArchiveResult instance to finalize
    """
    from django.utils import timezone
-    from machine.models import Machine
+    from archivebox.machine.models import Machine

    extractor_dir = Path(archiveresult.pwd)
    stdout_file = extractor_dir / 'stdout.log'
--- a/old/TODO_hook_statemachine_cleanup.md
+++ b/old/TODO_hook_statemachine_cleanup.md
@@ -173,15 +173,15 @@ def process_hook_records(records: List[Dict], overrides: Dict = None) -> Dict[st

        # Dispatch to appropriate model
        if record_type == 'Snapshot':
-            from core.models import Snapshot
+            from archivebox.core.models import Snapshot
            Snapshot.from_jsonl(record, overrides)
            stats['Snapshot'] = stats.get('Snapshot', 0) + 1
        elif record_type == 'Tag':
-            from core.models import Tag
+            from archivebox.core.models import Tag
            Tag.from_jsonl(record, overrides)
            stats['Tag'] = stats.get('Tag', 0) + 1
        elif record_type == 'Binary':
-            from machine.models import Binary
+            from archivebox.machine.models import Binary
            Binary.from_jsonl(record, overrides)
            stats['Binary'] = stats.get('Binary', 0) + 1
        # ... etc
@@ -526,7 +526,7 @@ class Model:
            # Update children from filesystem
            child.update_from_output()

-    def update_for_workers(self, **fields):
+    def update_and_requeue(self, **fields):
        """Update fields and bump modified_at."""
        for field, value in fields.items():
            setattr(self, field, value)
@@ -575,7 +575,7 @@ All core models (Crawl, Snapshot, ArchiveResult) now follow the unified pattern:
 - State machines orchestrate transitions
 - `.run()` methods execute hooks and process JSONL
 - `.cleanup()` methods kill background hooks
- `.update_for_workers()` methods update state for worker coordination
+- `.update_and_requeue()` methods update state for worker coordination
 - Consistent use of `process_hook_records()` for JSONL dispatching

 ### ✅ Phases 7-8: Binary State Machine (Dependency Model Eliminated)