mirror of
https://github.com/zebrajr/ArchiveBox.git
synced 2026-01-15 12:15:10 +00:00
wip
This commit is contained in:
@@ -743,7 +743,7 @@ def update(filter_patterns: Iterable[str] = (),
|
||||
from archivebox.config.django import setup_django
|
||||
setup_django()
|
||||
|
||||
from core.models import Snapshot
|
||||
from archivebox.core.models import Snapshot
|
||||
from django.utils import timezone
|
||||
|
||||
while True:
|
||||
@@ -790,7 +790,7 @@ def import_orphans_from_archive(resume_from: str = None, batch_size: int = 100)
|
||||
Skip symlinks (already migrated).
|
||||
Create DB records and trigger migration on save().
|
||||
"""
|
||||
from core.models import Snapshot
|
||||
from archivebox.core.models import Snapshot
|
||||
from archivebox.config import CONSTANTS
|
||||
from django.db import transaction
|
||||
|
||||
@@ -858,7 +858,7 @@ def process_all_db_snapshots(batch_size: int = 100) -> dict:
|
||||
Process all snapshots in DB.
|
||||
Reconcile index.json and queue for archiving.
|
||||
"""
|
||||
from core.models import Snapshot
|
||||
from archivebox.core.models import Snapshot
|
||||
from django.db import transaction
|
||||
from django.utils import timezone
|
||||
|
||||
@@ -896,7 +896,7 @@ def process_filtered_snapshots(
|
||||
batch_size: int
|
||||
) -> dict:
|
||||
"""Process snapshots matching filters (DB query only)."""
|
||||
from core.models import Snapshot
|
||||
from archivebox.core.models import Snapshot
|
||||
from django.db import transaction
|
||||
from django.utils import timezone
|
||||
from datetime import datetime
|
||||
@@ -1042,7 +1042,7 @@ def search(filter_patterns: list[str] | None=None,
|
||||
with_headers: bool=False):
|
||||
"""List, filter, and export information about archive entries"""
|
||||
|
||||
from core.models import Snapshot
|
||||
from archivebox.core.models import Snapshot
|
||||
|
||||
if with_headers and not (json or html or csv):
|
||||
stderr('[X] --with-headers requires --json, --html or --csv\n', color='red')
|
||||
|
||||
@@ -658,7 +658,7 @@ def create_model_record(record: dict) -> Any:
|
||||
Returns:
|
||||
Created/updated model instance
|
||||
"""
|
||||
from machine.models import Binary, Dependency
|
||||
from archivebox.machine.models import Binary, Dependency
|
||||
|
||||
model_type = record.pop('type')
|
||||
|
||||
@@ -917,7 +917,7 @@ def find_binary_for_cmd(cmd: List[str], machine_id: str) -> Optional[str]:
|
||||
if not cmd:
|
||||
return None
|
||||
|
||||
from machine.models import Binary
|
||||
from archivebox.machine.models import Binary
|
||||
|
||||
bin_path_or_name = cmd[0]
|
||||
|
||||
@@ -977,7 +977,7 @@ def run_hook(
|
||||
"""
|
||||
import time
|
||||
from datetime import datetime, timezone
|
||||
from machine.models import Machine
|
||||
from archivebox.machine.models import Machine
|
||||
|
||||
start_time = time.time()
|
||||
|
||||
@@ -1125,7 +1125,7 @@ def run(self):
|
||||
"""
|
||||
from django.utils import timezone
|
||||
from archivebox.hooks import BUILTIN_PLUGINS_DIR, USER_PLUGINS_DIR, run_hook, find_binary_for_cmd, create_model_record
|
||||
from machine.models import Machine
|
||||
from archivebox.machine.models import Machine
|
||||
|
||||
config_objects = [self.snapshot.crawl, self.snapshot] if self.snapshot.crawl else [self.snapshot]
|
||||
|
||||
@@ -1458,7 +1458,7 @@ def finalize_background_hook(archiveresult: 'ArchiveResult') -> None:
|
||||
archiveresult: ArchiveResult instance to finalize
|
||||
"""
|
||||
from django.utils import timezone
|
||||
from machine.models import Machine
|
||||
from archivebox.machine.models import Machine
|
||||
|
||||
extractor_dir = Path(archiveresult.pwd)
|
||||
stdout_file = extractor_dir / 'stdout.log'
|
||||
|
||||
@@ -173,15 +173,15 @@ def process_hook_records(records: List[Dict], overrides: Dict = None) -> Dict[st
|
||||
|
||||
# Dispatch to appropriate model
|
||||
if record_type == 'Snapshot':
|
||||
from core.models import Snapshot
|
||||
from archivebox.core.models import Snapshot
|
||||
Snapshot.from_jsonl(record, overrides)
|
||||
stats['Snapshot'] = stats.get('Snapshot', 0) + 1
|
||||
elif record_type == 'Tag':
|
||||
from core.models import Tag
|
||||
from archivebox.core.models import Tag
|
||||
Tag.from_jsonl(record, overrides)
|
||||
stats['Tag'] = stats.get('Tag', 0) + 1
|
||||
elif record_type == 'Binary':
|
||||
from machine.models import Binary
|
||||
from archivebox.machine.models import Binary
|
||||
Binary.from_jsonl(record, overrides)
|
||||
stats['Binary'] = stats.get('Binary', 0) + 1
|
||||
# ... etc
|
||||
@@ -526,7 +526,7 @@ class Model:
|
||||
# Update children from filesystem
|
||||
child.update_from_output()
|
||||
|
||||
def update_for_workers(self, **fields):
|
||||
def update_and_requeue(self, **fields):
|
||||
"""Update fields and bump modified_at."""
|
||||
for field, value in fields.items():
|
||||
setattr(self, field, value)
|
||||
@@ -575,7 +575,7 @@ All core models (Crawl, Snapshot, ArchiveResult) now follow the unified pattern:
|
||||
- State machines orchestrate transitions
|
||||
- `.run()` methods execute hooks and process JSONL
|
||||
- `.cleanup()` methods kill background hooks
|
||||
- `.update_for_workers()` methods update state for worker coordination
|
||||
- `.update_and_requeue()` methods update state for worker coordination
|
||||
- Consistent use of `process_hook_records()` for JSONL dispatching
|
||||
|
||||
### ✅ Phases 7-8: Binary State Machine (Dependency Model Eliminated)
|
||||
|
||||
Reference in New Issue
Block a user