diff --git a/README.md b/README.md
index 791c13c..3a94e6e 100644
--- a/README.md
+++ b/README.md
@@ -163,8 +163,20 @@ Settings are initialized by `openkb init`, and stored in `.openkb/config.yaml`:
 model: gpt-5.4                   # LLM model (any LiteLLM-supported provider)
 language: en                     # Wiki output language
 pageindex_threshold: 20          # PDF pages threshold for PageIndex
+storage_backend: sqlite          # Storage backend: sqlite (default) or json
 ```
 
+### Storage Backend
+
+OpenKB supports two storage backends for the file hash registry:
+
+| Backend | Description | Use Case |
+|---------|-------------|----------|
+| `sqlite` | SQLite database (default) | Better concurrency, scalability, recommended for production |
+| `json` | JSON file | Simple, human-readable, for small installations |
+
+Migration from JSON to SQLite happens automatically when you switch to `sqlite` backend and a `hashes.json` file exists. The JSON file is preserved but no longer used.
+
 Model names use `provider/model` LiteLLM [format](https://docs.litellm.ai/docs/providers) (OpenAI models can omit the prefix):
 
 | Provider | Model example |
diff --git a/openkb/cli.py b/openkb/cli.py
index d91789f..82dd9d0 100644
--- a/openkb/cli.py
+++ b/openkb/cli.py
@@ -138,14 +138,15 @@ def _add_single_file(file_path: Path, kb_dir: Path) -> None:
     4. Else: compile_short_doc.
     """
     from openkb.agent.compiler import compile_long_doc, compile_short_doc
-    from openkb.state import HashRegistry
+    from openkb.state import get_registry
 
     logger = logging.getLogger(__name__)
     openkb_dir = kb_dir / ".openkb"
     config = load_config(openkb_dir / "config.yaml")
     _setup_llm_key(kb_dir)
     model: str = config.get("model", DEFAULT_CONFIG["model"])
-    registry = HashRegistry(openkb_dir / "hashes.json")
+    backend = config.get("storage_backend", "sqlite")
+    registry = get_registry(openkb_dir, backend=backend)
 
     # 2. Convert document
     click.echo(f"Adding: {file_path.name}")
@@ -299,9 +300,10 @@ def init():
         "model": model,
         "language": DEFAULT_CONFIG["language"],
         "pageindex_threshold": DEFAULT_CONFIG["pageindex_threshold"],
+        "storage_backend": DEFAULT_CONFIG["storage_backend"],
     }
     save_config(openkb_dir / "config.yaml", config)
-    (openkb_dir / "hashes.json").write_text(json.dumps({}), encoding="utf-8")
+    # SQLite DB 会在首次访问时由 get_registry() 自动创建，无需预创建
 
     # Write API key to KB-local .env (0600) if the user provided one
     if api_key:
@@ -590,13 +592,13 @@ def list_cmd(ctx):
         click.echo("No knowledge base found. Run `openkb init` first.")
         return
 
-    openkb_dir = kb_dir / ".openkb"
-    hashes_file = openkb_dir / "hashes.json"
-    if not hashes_file.exists():
-        click.echo("No documents indexed yet.")
-        return
+    from openkb.state import get_registry
 
-    hashes = json.loads(hashes_file.read_text(encoding="utf-8"))
+    openkb_dir = kb_dir / ".openkb"
+    config = load_config(openkb_dir / "config.yaml")
+    backend = config.get("storage_backend", "sqlite")
+    registry = get_registry(openkb_dir, backend=backend)
+    hashes = registry.all_entries()
     if not hashes:
         click.echo("No documents indexed yet.")
         return
@@ -673,11 +675,14 @@ def status(ctx):
         click.echo(f"  {'raw':<20} {raw_count:<10}")
 
     # Hash registry summary
+    from openkb.state import get_registry
+
     openkb_dir = kb_dir / ".openkb"
-    hashes_file = openkb_dir / "hashes.json"
-    if hashes_file.exists():
-        hashes = json.loads(hashes_file.read_text(encoding="utf-8"))
-        click.echo(f"\n  Total indexed: {len(hashes)} document(s)")
+    config = load_config(openkb_dir / "config.yaml")
+    backend = config.get("storage_backend", "sqlite")
+    registry = get_registry(openkb_dir, backend=backend)
+    hashes = registry.all_entries()
+    click.echo(f"\n  Total indexed: {len(hashes)} document(s)")
 
     # Last compile time: newest file in wiki/summaries/
     summaries_dir = wiki_dir / "summaries"
diff --git a/openkb/config.py b/openkb/config.py
index b83e134..4c2169a 100644
--- a/openkb/config.py
+++ b/openkb/config.py
@@ -9,6 +9,7 @@
     "model": "gpt-5.4-mini",
     "language": "en",
     "pageindex_threshold": 20,
+    "storage_backend": "sqlite",
 }
 
 GLOBAL_CONFIG_DIR = Path.home() / ".config" / "openkb"
diff --git a/openkb/converter.py b/openkb/converter.py
index 3f5f529..51359a6 100644
--- a/openkb/converter.py
+++ b/openkb/converter.py
@@ -11,7 +11,7 @@
 
 from openkb.config import load_config
 from openkb.images import copy_relative_images, extract_base64_images, convert_pdf_with_images
-from openkb.state import HashRegistry
+from openkb.state import get_registry
 
 logger = logging.getLogger(__name__)
 
@@ -50,12 +50,13 @@ def convert_document(src: Path, kb_dir: Path) -> ConvertResult:
     openkb_dir = kb_dir / ".openkb"
     config = load_config(openkb_dir / "config.yaml")
     threshold: int = config.get("pageindex_threshold", 20)
-    registry = HashRegistry(openkb_dir / "hashes.json")
+    backend = config.get("storage_backend", "sqlite")
+    registry = get_registry(openkb_dir, backend=backend)
 
     # ------------------------------------------------------------------
     # 1. Hash check
     # ------------------------------------------------------------------
-    file_hash = HashRegistry.hash_file(src)
+    file_hash = registry.hash_file(src)
     if registry.is_known(file_hash):
         logger.info("Skipping already-known file: %s", src.name)
         return ConvertResult(skipped=True)
diff --git a/openkb/state.py b/openkb/state.py
index 9381606..dc9cd6a 100644
--- a/openkb/state.py
+++ b/openkb/state.py
@@ -2,7 +2,19 @@
 
 import hashlib
 import json
+import sqlite3
+from contextlib import contextmanager
 from pathlib import Path
+from typing import Iterator
+
+
+def _hash_file(path: Path) -> str:
+    """Return the SHA-256 hex digest (64 chars) of the file at path."""
+    h = hashlib.sha256()
+    with path.open("rb") as fh:
+        for chunk in iter(lambda: fh.read(65536), b""):
+            h.update(chunk)
+    return h.hexdigest()
 
 
 class HashRegistry:
@@ -57,8 +69,154 @@ def _persist(self) -> None:
     @staticmethod
     def hash_file(path: Path) -> str:
         """Return the SHA-256 hex digest (64 chars) of the file at path."""
-        h = hashlib.sha256()
-        with path.open("rb") as fh:
-            for chunk in iter(lambda: fh.read(65536), b""):
-                h.update(chunk)
-        return h.hexdigest()
+        return _hash_file(path)
+
+
+class DbRegistry:
+    """SQLite-backed registry mapping file SHA-256 hashes to metadata dicts.
+    
+    Provides better scalability, concurrency support, and extensibility
+    compared to JSON-backed HashRegistry.
+    """
+
+    def __init__(self, path: Path, migrate_from: Path | None = None) -> None:
+        """Initialize DbRegistry.
+        
+        Args:
+            path: Path to SQLite database file.
+            migrate_from: Optional path to JSON file to migrate from.
+                          Migration only happens if DB doesn't exist yet.
+        """
+        self._path = path
+        should_migrate = migrate_from is not None and not path.exists()
+        self._init_db()
+        if should_migrate:
+            self._migrate_from_json(migrate_from)
+
+    def _migrate_from_json(self, json_path: Path) -> None:
+        """Migrate data from JSON file to SQLite database."""
+        if not json_path.exists():
+            return
+        
+        with json_path.open("r", encoding="utf-8") as fh:
+            data: dict[str, dict] = json.load(fh)
+        
+        with self._connect() as conn:
+            for file_hash, metadata in data.items():
+                metadata_json = json.dumps(metadata, ensure_ascii=False)
+                conn.execute("""
+                    INSERT OR REPLACE INTO registry (file_hash, metadata_json)
+                    VALUES (?, ?)
+                """, (file_hash, metadata_json))
+
+    def _init_db(self) -> None:
+        """Initialize database schema if not exists."""
+        self._path.parent.mkdir(parents=True, exist_ok=True)
+        
+        with self._connect() as conn:
+            conn.execute("PRAGMA journal_mode=WAL")
+            conn.execute("PRAGMA foreign_keys=ON")
+            conn.execute("""
+                CREATE TABLE IF NOT EXISTS registry (
+                    file_hash TEXT PRIMARY KEY,
+                    metadata_json TEXT NOT NULL,
+                    created_at TEXT DEFAULT CURRENT_TIMESTAMP,
+                    updated_at TEXT DEFAULT CURRENT_TIMESTAMP
+                )
+            """)
+            conn.execute("""
+                CREATE INDEX IF NOT EXISTS idx_created_at ON registry(created_at)
+            """)
+
+    @contextmanager
+    def _connect(self) -> Iterator[sqlite3.Connection]:
+        """Context manager for database connections."""
+        conn = sqlite3.connect(str(self._path))
+        try:
+            yield conn
+            conn.commit()
+        finally:
+            conn.close()
+
+    def is_known(self, file_hash: str) -> bool:
+        """Return True if file_hash is already registered."""
+        with self._connect() as conn:
+            cursor = conn.execute(
+                "SELECT 1 FROM registry WHERE file_hash = ?",
+                (file_hash,)
+            )
+            return cursor.fetchone() is not None
+
+    def get(self, file_hash: str) -> dict | None:
+        """Return metadata for file_hash, or None if not found."""
+        with self._connect() as conn:
+            cursor = conn.execute(
+                "SELECT metadata_json FROM registry WHERE file_hash = ?",
+                (file_hash,)
+            )
+            row = cursor.fetchone()
+            if row is None:
+                return None
+            return json.loads(row[0])
+
+    def all_entries(self) -> dict[str, dict]:
+        """Return a shallow copy of all hash -> metadata entries."""
+        with self._connect() as conn:
+            cursor = conn.execute(
+                "SELECT file_hash, metadata_json FROM registry"
+            )
+            return {
+                row[0]: json.loads(row[1])
+                for row in cursor.fetchall()
+            }
+
+    def add(self, file_hash: str, metadata: dict) -> None:
+        """Register file_hash with metadata and persist to disk.
+        
+        If file_hash already exists, updates the metadata.
+        """
+        metadata_json = json.dumps(metadata, ensure_ascii=False)
+        with self._connect() as conn:
+            conn.execute("""
+                INSERT INTO registry (file_hash, metadata_json, updated_at)
+                VALUES (?, ?, CURRENT_TIMESTAMP)
+                ON CONFLICT(file_hash) DO UPDATE SET
+                    metadata_json = excluded.metadata_json,
+                    updated_at = CURRENT_TIMESTAMP
+            """, (file_hash, metadata_json))
+
+    @staticmethod
+    def hash_file(path: Path) -> str:
+        """Return the SHA-256 hex digest (64 chars) of the file at path."""
+        return _hash_file(path)
+
+
+def get_registry(
+    openkb_dir: Path,
+    backend: str = "sqlite",
+) -> HashRegistry | DbRegistry:
+    """Factory function to get the appropriate registry implementation.
+    
+    Args:
+        openkb_dir: Path to .openkb directory.
+        backend: Storage backend - "sqlite" or "json".
+        
+    Returns:
+        HashRegistry for "json" backend, DbRegistry for "sqlite" backend.
+        
+    When switching from json to sqlite and a JSON file exists,
+    automatically migrates the data.
+    """
+    if backend not in ("sqlite", "json"):
+        raise ValueError(f"Unknown storage_backend: {backend!r}")
+
+    if backend == "json":
+        return HashRegistry(openkb_dir / "hashes.json")
+    
+    db_path = openkb_dir / "hashes.db"
+    json_path = openkb_dir / "hashes.json"
+    
+    if json_path.exists() and not db_path.exists():
+        return DbRegistry(db_path, migrate_from=json_path)
+    
+    return DbRegistry(db_path)
diff --git a/tests/test_cli.py b/tests/test_cli.py
index afb961d..407896d 100644
--- a/tests/test_cli.py
+++ b/tests/test_cli.py
@@ -2,6 +2,7 @@
 from unittest.mock import patch
 
 import pytest
+import yaml
 from click.testing import CliRunner
 
 from openkb.cli import cli
@@ -30,11 +31,11 @@ def test_init_creates_structure(tmp_path):
         assert (cwd / "wiki" / "log.md").is_file()
         assert (cwd / "wiki" / "index.md").is_file()
         assert (cwd / ".openkb" / "config.yaml").is_file()
-        assert (cwd / ".openkb" / "hashes.json").is_file()
+        # SQLite DB 在首次访问时由 get_registry() 惰性创建
+        assert not (cwd / ".openkb" / "hashes.json").exists()
 
-        # hashes.json is empty object
-        hashes = json.loads((cwd / ".openkb" / "hashes.json").read_text())
-        assert hashes == {}
+        config = yaml.safe_load((cwd / ".openkb" / "config.yaml").read_text())
+        assert config["storage_backend"] == "sqlite"
 
         # index.md header
         index_content = (cwd / "wiki" / "index.md").read_text()
diff --git a/tests/test_config_storage_backend.py b/tests/test_config_storage_backend.py
new file mode 100644
index 0000000..7a0e987
--- /dev/null
+++ b/tests/test_config_storage_backend.py
@@ -0,0 +1,37 @@
+"""Tests for storage_backend config option."""
+from __future__ import annotations
+
+from pathlib import Path
+
+from openkb.config import DEFAULT_CONFIG, load_config, save_config
+
+
+def test_default_config_has_storage_backend():
+    """DEFAULT_CONFIG should include storage_backend key."""
+    assert "storage_backend" in DEFAULT_CONFIG
+
+
+def test_default_storage_backend_is_sqlite():
+    """Default storage_backend should be 'sqlite'."""
+    assert DEFAULT_CONFIG["storage_backend"] == "sqlite"
+
+
+def test_load_config_includes_storage_backend(tmp_path):
+    """load_config should return storage_backend from config file."""
+    config_path = tmp_path / "config.yaml"
+    save_config(config_path, {"storage_backend": "json"})
+    loaded = load_config(config_path)
+    assert loaded["storage_backend"] == "json"
+
+
+def test_storage_backend_valid_values(tmp_path):
+    """storage_backend should accept 'sqlite' or 'json'."""
+    config_path = tmp_path / "config.yaml"
+    
+    save_config(config_path, {"storage_backend": "sqlite"})
+    loaded = load_config(config_path)
+    assert loaded["storage_backend"] == "sqlite"
+    
+    save_config(config_path, {"storage_backend": "json"})
+    loaded = load_config(config_path)
+    assert loaded["storage_backend"] == "json"
diff --git a/tests/test_converter.py b/tests/test_converter.py
index 6c184fd..919819f 100644
--- a/tests/test_converter.py
+++ b/tests/test_converter.py
@@ -48,14 +48,15 @@ def test_md_file_copied_to_wiki_sources(self, kb_dir):
 
     def test_md_duplicate_skipped(self, kb_dir):
         """Second call with same file returns skipped=True when hash is registered."""
-        from openkb.state import HashRegistry
+        from openkb.state import get_registry
 
         src = kb_dir / "raw" / "notes.md"
         src.write_text("# Notes\n\nSome content here.", encoding="utf-8")
 
         result1 = convert_document(src, kb_dir)  # first call
         # Simulate CLI registering the hash after successful compilation
-        registry = HashRegistry(kb_dir / ".openkb" / "hashes.json")
+        openkb_dir = kb_dir / ".openkb"
+        registry = get_registry(openkb_dir, backend="sqlite")
         registry.add(result1.file_hash, {"name": src.name, "type": "md"})
 
         result2 = convert_document(src, kb_dir)  # second call
diff --git a/tests/test_db_registry.py b/tests/test_db_registry.py
new file mode 100644
index 0000000..491343e
--- /dev/null
+++ b/tests/test_db_registry.py
@@ -0,0 +1,172 @@
+"""Tests for DbRegistry SQLite-backed storage."""
+from __future__ import annotations
+
+import json
+import sqlite3
+from pathlib import Path
+
+import pytest
+
+from openkb.state import DbRegistry
+
+
+def test_db_registry_creates_database_file(tmp_path):
+    """DbRegistry should create a .db file on init."""
+    db_path = tmp_path / "hashes.db"
+    registry = DbRegistry(db_path)
+    assert db_path.exists()
+
+
+def test_db_registry_creates_table(tmp_path):
+    """DbRegistry should create the registry table."""
+    db_path = tmp_path / "hashes.db"
+    registry = DbRegistry(db_path)
+    
+    conn = sqlite3.connect(str(db_path))
+    cursor = conn.execute(
+        "SELECT name FROM sqlite_master WHERE type='table' AND name='registry'"
+    )
+    result = cursor.fetchone()
+    conn.close()
+    assert result is not None
+
+
+def test_db_empty_registry_is_known_false(tmp_path):
+    """Empty DbRegistry should return False for is_known."""
+    registry = DbRegistry(tmp_path / "hashes.db")
+    assert registry.is_known("abc123") is False
+
+
+def test_db_empty_registry_get_returns_none(tmp_path):
+    """Empty DbRegistry should return None for get."""
+    registry = DbRegistry(tmp_path / "hashes.db")
+    assert registry.get("abc123") is None
+
+
+def test_db_add_and_is_known(tmp_path):
+    """After add, is_known should return True."""
+    registry = DbRegistry(tmp_path / "hashes.db")
+    registry.add("deadbeef", {"filename": "test.pdf"})
+    assert registry.is_known("deadbeef") is True
+
+
+def test_db_add_and_get(tmp_path):
+    """After add, get should return the metadata."""
+    registry = DbRegistry(tmp_path / "hashes.db")
+    metadata = {"filename": "doc.pdf", "pages": 10}
+    registry.add("cafebabe", metadata)
+    assert registry.get("cafebabe") == metadata
+
+
+def test_db_persistence_across_instances(tmp_path):
+    """Data should persist across DbRegistry instances."""
+    db_path = tmp_path / "hashes.db"
+    r1 = DbRegistry(db_path)
+    r1.add("hash1", {"file": "a.pdf"})
+    
+    r2 = DbRegistry(db_path)
+    assert r2.is_known("hash1") is True
+    assert r2.get("hash1") == {"file": "a.pdf"}
+
+
+def test_db_all_entries_returns_all(tmp_path):
+    """all_entries should return all hash -> metadata mappings."""
+    registry = DbRegistry(tmp_path / "hashes.db")
+    registry.add("h1", {"name": "one"})
+    registry.add("h2", {"name": "two"})
+    entries = registry.all_entries()
+    assert "h1" in entries
+    assert "h2" in entries
+    assert entries["h1"] == {"name": "one"}
+    assert entries["h2"] == {"name": "two"}
+
+
+def test_db_all_entries_empty(tmp_path):
+    """all_entries on empty registry should return empty dict."""
+    registry = DbRegistry(tmp_path / "hashes.db")
+    assert registry.all_entries() == {}
+
+
+def test_db_hash_file_unchanged(tmp_path):
+    """DbRegistry.hash_file should work same as HashRegistry."""
+    f = tmp_path / "sample.txt"
+    f.write_text("hello world")
+    digest = DbRegistry.hash_file(f)
+    assert len(digest) == 64
+    assert all(c in "0123456789abcdef" for c in digest)
+
+
+def test_db_update_existing_hash(tmp_path):
+    """Adding same hash twice should update metadata."""
+    registry = DbRegistry(tmp_path / "hashes.db")
+    registry.add("hash1", {"version": 1})
+    registry.add("hash1", {"version": 2})
+    assert registry.get("hash1") == {"version": 2}
+
+
+def test_db_metadata_with_nested_dict(tmp_path):
+    """Metadata can contain nested dictionaries."""
+    registry = DbRegistry(tmp_path / "hashes.db")
+    metadata = {
+        "name": "doc.pdf",
+        "stats": {"pages": 10, "words": 5000},
+    }
+    registry.add("hash1", metadata)
+    assert registry.get("hash1") == metadata
+
+
+def test_db_wal_mode_enabled(tmp_path):
+    """Database should use WAL mode for concurrency."""
+    db_path = tmp_path / "hashes.db"
+    DbRegistry(db_path)
+    
+    conn = sqlite3.connect(str(db_path))
+    cursor = conn.execute("PRAGMA journal_mode")
+    result = cursor.fetchone()
+    conn.close()
+    assert result[0].lower() == "wal"
+
+
+def test_migrate_from_json(tmp_path):
+    """DbRegistry should migrate existing JSON data on first access."""
+    json_path = tmp_path / "hashes.json"
+    existing_data = {
+        "hash1": {"name": "doc1.pdf", "pages": 10},
+        "hash2": {"name": "doc2.pdf", "pages": 20},
+    }
+    json_path.write_text(json.dumps(existing_data), encoding="utf-8")
+    
+    db_path = tmp_path / "hashes.db"
+    registry = DbRegistry(db_path, migrate_from=json_path)
+    
+    assert registry.is_known("hash1")
+    assert registry.is_known("hash2")
+    assert registry.get("hash1") == {"name": "doc1.pdf", "pages": 10}
+    assert registry.get("hash2") == {"name": "doc2.pdf", "pages": 20}
+
+
+def test_migrate_only_once(tmp_path):
+    """Migration should only happen once, not on subsequent loads."""
+    json_path = tmp_path / "hashes.json"
+    existing_data = {"hash1": {"name": "doc1.pdf"}}
+    json_path.write_text(json.dumps(existing_data), encoding="utf-8")
+    
+    db_path = tmp_path / "hashes.db"
+    
+    r1 = DbRegistry(db_path, migrate_from=json_path)
+    assert r1.is_known("hash1")
+    
+    existing_data["hash2"] = {"name": "doc2.pdf"}
+    json_path.write_text(json.dumps(existing_data), encoding="utf-8")
+    
+    r2 = DbRegistry(db_path, migrate_from=json_path)
+    assert r2.is_known("hash1")
+    assert not r2.is_known("hash2")
+
+
+def test_migrate_optional(tmp_path):
+    """DbRegistry should work without migration."""
+    db_path = tmp_path / "hashes.db"
+    registry = DbRegistry(db_path)
+    registry.add("hash1", {"name": "doc.pdf"})
+    assert registry.is_known("hash1")
diff --git a/tests/test_migration.py b/tests/test_migration.py
new file mode 100644
index 0000000..67e8996
--- /dev/null
+++ b/tests/test_migration.py
@@ -0,0 +1,74 @@
+"""Integration tests for JSON to SQLite migration."""
+from __future__ import annotations
+
+import json
+import threading
+from pathlib import Path
+
+import pytest
+
+from openkb.state import get_registry, DbRegistry
+
+
+def test_full_migration_workflow(tmp_path):
+    """Test complete migration from JSON to SQLite."""
+    openkb_dir = tmp_path / ".openkb"
+    openkb_dir.mkdir()
+    
+    # Step 1: Start with JSON backend
+    json_registry = get_registry(openkb_dir, backend="json")
+    json_registry.add("hash1", {"name": "doc1.pdf", "pages": 10})
+    json_registry.add("hash2", {"name": "doc2.pdf", "pages": 20})
+    
+    # Verify JSON file exists
+    json_path = openkb_dir / "hashes.json"
+    assert json_path.exists()
+    
+    # Step 2: Switch to SQLite backend (triggers migration)
+    sqlite_registry = get_registry(openkb_dir, backend="sqlite")
+    
+    # Verify data was migrated
+    assert sqlite_registry.is_known("hash1")
+    assert sqlite_registry.is_known("hash2")
+    assert sqlite_registry.get("hash1") == {"name": "doc1.pdf", "pages": 10}
+    assert sqlite_registry.get("hash2") == {"name": "doc2.pdf", "pages": 20}
+    
+    # Step 3: Add new data via SQLite
+    sqlite_registry.add("hash3", {"name": "doc3.pdf", "pages": 30})
+    
+    # Step 4: Create new SQLite instance - should have all data
+    sqlite_registry2 = get_registry(openkb_dir, backend="sqlite")
+    assert sqlite_registry2.is_known("hash1")
+    assert sqlite_registry2.is_known("hash2")
+    assert sqlite_registry2.is_known("hash3")
+
+
+def test_concurrent_sqlite_access(tmp_path):
+    """Test that SQLite handles concurrent access correctly."""
+    openkb_dir = tmp_path / ".openkb"
+    openkb_dir.mkdir()
+    
+    registry = get_registry(openkb_dir, backend="sqlite")
+    errors = []
+    
+    def add_entries(start: int, count: int) -> None:
+        try:
+            for i in range(start, start + count):
+                registry.add(f"hash{i}", {"index": i})
+        except Exception as e:
+            errors.append(e)
+    
+    threads = [
+        threading.Thread(target=add_entries, args=(0, 50)),
+        threading.Thread(target=add_entries, args=(50, 50)),
+        threading.Thread(target=add_entries, args=(100, 50)),
+    ]
+    
+    for t in threads:
+        t.start()
+    for t in threads:
+        t.join()
+    
+    assert not errors
+    entries = registry.all_entries()
+    assert len(entries) == 150
diff --git a/tests/test_state.py b/tests/test_state.py
index 1b4371f..cc9c5ce 100644
--- a/tests/test_state.py
+++ b/tests/test_state.py
@@ -82,3 +82,50 @@ def test_load_existing_json(tmp_path):
     registry = HashRegistry(path)
     assert registry.is_known("existinghash") is True
     assert registry.get("existinghash") == {"file": "pre.pdf"}
+
+
+# ---------------------------------------------------------------------------
+# Factory function tests
+# ---------------------------------------------------------------------------
+
+from openkb.state import get_registry
+
+
+def test_get_registry_returns_db_registry_by_default(tmp_path):
+    """get_registry should return DbRegistry by default."""
+    openkb_dir = tmp_path / ".openkb"
+    openkb_dir.mkdir()
+    
+    registry = get_registry(openkb_dir)
+    assert type(registry).__name__ == "DbRegistry"
+
+
+def test_get_registry_returns_hash_registry_for_json_backend(tmp_path):
+    """get_registry should return HashRegistry when backend is 'json'."""
+    openkb_dir = tmp_path / ".openkb"
+    openkb_dir.mkdir()
+    
+    registry = get_registry(openkb_dir, backend="json")
+    assert type(registry).__name__ == "HashRegistry"
+
+
+def test_get_registry_returns_db_registry_for_sqlite_backend(tmp_path):
+    """get_registry should return DbRegistry when backend is 'sqlite'."""
+    openkb_dir = tmp_path / ".openkb"
+    openkb_dir.mkdir()
+    
+    registry = get_registry(openkb_dir, backend="sqlite")
+    assert type(registry).__name__ == "DbRegistry"
+
+
+def test_get_registry_migrates_json_to_sqlite(tmp_path):
+    """get_registry should migrate existing JSON when switching to sqlite."""
+    openkb_dir = tmp_path / ".openkb"
+    openkb_dir.mkdir()
+    
+    json_path = openkb_dir / "hashes.json"
+    json_path.write_text('{"hash1": {"name": "doc.pdf"}}', encoding="utf-8")
+    
+    registry = get_registry(openkb_dir, backend="sqlite")
+    assert registry.is_known("hash1")
+    assert registry.get("hash1") == {"name": "doc.pdf"}