zarr-developers
diff --git a/‎docs/api/storage.rst
Lines changed: 4 additions & 0 deletions b/‎docs/api/storage.rst
Lines changed: 4 additions & 0 deletions
diff --git a/‎docs/release.rst
Lines changed: 4 additions & 0 deletions b/‎docs/release.rst
Lines changed: 4 additions & 0 deletions
diff --git a/‎docs/tutorial.rst
Lines changed: 10 additions & 0 deletions b/‎docs/tutorial.rst
Lines changed: 10 additions & 0 deletions
diff --git a/‎zarr/__init__.py
Lines changed: 2 additions & 1 deletion b/‎zarr/__init__.py
Lines changed: 2 additions & 1 deletion
diff --git a/‎zarr/storage.py
Lines changed: 207 additions & 0 deletions b/‎zarr/storage.py
Lines changed: 207 additions & 0 deletions
diff --git a/‎zarr/tests/test_core.py
Lines changed: 26 additions & 1 deletion b/‎zarr/tests/test_core.py
Lines changed: 26 additions & 1 deletion
diff --git a/‎zarr/tests/test_hierarchy.py
Lines changed: 18 additions & 2 deletions b/‎zarr/tests/test_hierarchy.py
Lines changed: 18 additions & 2 deletions
@@ -21,6 +21,10 @@ Storage (``zarr.storage``)
     .. automethod:: close
     .. automethod:: flush
 
+.. autoclass:: SQLiteStore
+
+    .. automethod:: close
+
 .. autoclass:: LRUStoreCache
 
     .. automethod:: invalidate
 
@@ -19,6 +19,10 @@ Enhancements
 * Support has been added for structured arrays with sub-array shape and/or nested fields. By
   :user:`Tarik Onalan <onalant>`, :issue:`111`, :issue:`296`.
 
+* Adds the SQLite-backed :class:`zarr.storage.SQLiteStore` class enabling an
+  SQLite database to be used as the backing store for an array or group.
+  By :user:`John Kirkham <jakirkham>`, :issue:`368`, :issue:`365`.
+
 Bug fixes
 ~~~~~~~~~
 
 
@@ -729,6 +729,16 @@ group (requires `lmdb <http://lmdb.readthedocs.io/>`_ to be installed)::
     >>> z[:] = 42
     >>> store.close()
 
+In Zarr version 2.3 is the :class:`zarr.storage.SQLiteStore` class which
+enables the SQLite database to be used for storing an array or group (requires
+Python is built with SQLite support)::
+
+    >>> store = zarr.SQLiteStore('data/example.sqldb')
+    >>> root = zarr.group(store=store, overwrite=True)
+    >>> z = root.zeros('foo/bar', shape=(1000, 1000), chunks=(100, 100), dtype='i4')
+    >>> z[:] = 42
+    >>> store.close()
+ F438
 Distributed/cloud storage
 ~~~~~~~~~~~~~~~~~~~~~~~~~
 
 
@@ -7,7 +7,8 @@
 from zarr.creation import (empty, zeros, ones, full, array, empty_like, zeros_like,
                            ones_like, full_like, open_array, open_like, create)
 from zarr.storage import (DictStore, DirectoryStore, ZipStore, TempStore,
-                          NestedDirectoryStore, DBMStore, LMDBStore, LRUStoreCache)
+                          NestedDirectoryStore, DBMStore, LMDBStore, SQLiteStore,
+                          LRUStoreCache)
 from zarr.hierarchy import group, open_group, Group
 from zarr.sync import ThreadSynchronizer, ProcessSynchronizer
 from zarr.codecs import *
 
@@ -18,6 +18,7 @@
 from __future__ import absolute_import, print_function, division
 from collections import MutableMapping, OrderedDict
 import os
+import operator
 import tempfile
 import zipfile
 import shutil
@@ -26,6 +27,7 @@
 import sys
 import json
 import multiprocessing
+from pickle import PicklingError
 from threading import Lock, RLock
 import glob
 import warnings
@@ -1877,6 +1879,211 @@ def __delitem__(self, key):
             self._invalidate_value(key)
 
 
+class SQLiteStore(MutableMapping):
+    """Storage class using SQLite.
+
+    Parameters
+    ----------
+    path : string
+        Location of database file.
+    **kwargs
+        Keyword arguments passed through to the `sqlite3.connect` function.
+
+    Examples
+    --------
+    Store a single array::
+
+        >>> import zarr
+        >>> store = zarr.SQLiteStore('data/array.sqldb')
+        >>> z = zarr.zeros((10, 10), chunks=(5, 5), store=store, overwrite=True)
+        >>> z[...] = 42
+        >>> store.close()  # don't forget to call this when you're done
+
+    Store a group::
+
+        >>> store = zarr.SQLiteStore('data/group.sqldb')
+        >>> root = zarr.group(store=store, overwrite=True)
+        >>> foo = root.create_group('foo')
+        >>> bar = foo.zeros('bar', shape=(10, 10), chunks=(5, 5))
+        >>> bar[...] = 42
+        >>> store.close()  # don't forget to call this when you're done
+    """
+
+    def __init__(self, path, **kwargs):
+        import sqlite3
+
+        # normalize path
+        if path != ':memory:':
+            path = os.path.abspath(path)
+
+        # store properties
+        self.path = path
+        self.kwargs = kwargs
+
+        # allow threading if SQLite connections are thread-safe
+        #
+        # ref: https://www.sqlite.org/releaselog/3_3_1.html
+        # ref: https://bugs.python.org/issue27190
+        check_same_thread = True
+        if sqlite3.sqlite_version_info >= (3, 3, 1):
+            check_same_thread = False
+
+        # keep a lock for serializing mutable operations
+        self.lock = Lock()
+
+        # open database
+        self.db = sqlite3.connect(
+            self.path,
+            detect_types=0,
+            isolation_level=None,
+            check_same_thread=check_same_thread,
+            **self.kwargs
+        )
+
+        # handle keys as `str`s
+        self.db.text_factory = str
+
+        # get a cursor to read/write to the database
+        self.cursor = self.db.cursor()
+
+        # initialize database with our table if missing
+        with self.lock:
+            self.cursor.execute(
+                'CREATE TABLE IF NOT EXISTS zarr(k TEXT PRIMARY KEY, v BLOB)'
+            )
+
+    def __getstate__(self):
+        if self.path == ':memory:':
+            raise PicklingError('Cannot pickle in-memory SQLite databases')
+        return self.path, self.kwargs
+
+    def __setstate__(self, state):
+        path, kwargs = state
+        self.__init__(path=path, **kwargs)
+
+    def close(self):
+        """Closes the underlying database."""
+
+        # close cursor and db objects
+        self.cursor.close()
+        self.db.close()
+
+    def __getitem__(self, key):
+        value = self.cursor.execute('SELECT v FROM zarr WHERE (k = ?)', (key,))
+        for v, in value:
+            return v
+        raise KeyError(key)
+
+    def __setitem__(self, key, value):
+        self.update({key: value})
+
+    def __delitem__(self, key):
+        with self.lock:
+            self.cursor.execute('DELETE FROM zarr WHERE (k = ?)', (key,))
+            if self.cursor.rowcount < 1:
+                raise KeyError(key)
+
+    def __contains__(self, key):
+        cs = self.cursor.execute(
+            'SELECT COUNT(*) FROM zarr WHERE (k = ?)', (key,)
+        )
+        for has, in cs:
+            has = bool(has)
+            return has
+
+    def items(self):
+        kvs = self.cursor.execute('SELECT k, v FROM zarr')
+        for k, v in kvs:
+            yield k, v
+
+    def keys(self):
+        ks = self.cursor.execute('SELECT k FROM zarr')
+        for k, in ks:
+            yield k
+
+    def values(self):
+        vs = self.cursor.execute('SELECT v FROM zarr')
+        for v, in vs:
+            yield v
+
+    def __iter__(self):
+        return self.keys()
+
+    def __len__(self):
+        cs = self.cursor.execute('SELECT COUNT(*) FROM zarr')
+        for c, in cs:
+            return c
+
+    def update(self, *args, **kwargs):
+        args += (kwargs,)
+
+        kv_list = []
+        for dct in args:
+            for k, v in dct.items():
+                # Python 2 cannot store `memoryview`s, but it can store
+                # `buffer`s. However Python 2 won't return `bytes` then. So we
+                # coerce to `bytes`, which are handled correctly. Python 3
+                # doesn't have these issues.
+                if PY2:  # pragma: py3 no cover
+                    v = ensure_bytes(v)
+                else:  # pragma: py2 no cover
+                    v = ensure_contiguous_ndarray(v)
+
+                # Accumulate key-value pairs for storage
+                kv_list.append((k, v))
+
+        with self.lock:
+            self.cursor.executemany('REPLACE INTO zarr VALUES (?, ?)', kv_list)
+
+    def listdir(self, path=None):
+        path = normalize_storage_path(path)
+        keys = self.cursor.execute(
+            '''
+            SELECT DISTINCT SUBSTR(m, 0, INSTR(m, "/")) AS l FROM (
+                SELECT LTRIM(SUBSTR(k, LENGTH(?) + 1), "/") || "/" AS m
+                FROM zarr WHERE k LIKE (? || "_%")
+            ) ORDER BY l ASC
+            ''',
+            (path, path)
+        )
+        keys = list(map(operator.itemgetter(0), keys))
+        return keys
+
+    def getsize(self, path=None):
+        path = normalize_storage_path(path)
+        size = self.cursor.execute(
+            '''
+            SELECT COALESCE(SUM(LENGTH(v)), 0) FROM zarr
+            WHERE k LIKE (? || "%") AND
+                  0 == INSTR(LTRIM(SUBSTR(k, LENGTH(?) + 1), "/"), "/")
+            (path, path)
+        )
+        for s, in size:
+            return s
+
+    def rmdir(self, path=None):
+        path = normalize_storage_path(path)
+        if path:
+            with self.lock:
+                self.cursor.execute(
+                    'DELETE FROM zarr WHERE k LIKE (? || "_%")', (path,)
+                )
+        else:
+            self.clear()
+
+    def clear(self):
+        with self.lock:
+            self.cursor.executescript(
+                '''
+                BEGIN TRANSACTION;
+                    DROP TABLE zarr;
+                    CREATE TABLE zarr(k TEXT PRIMARY KEY, v BLOB);
+                COMMIT TRANSACTION;
+                '''
+            )
+
+
 class ConsolidatedMetadataStore(MutableMapping):
     """A layer over other storage, where the metadata has been consolidated into
     a single key.
 
@@ -15,7 +15,7 @@
 
 
 from zarr.storage import (DirectoryStore, init_array, init_group, NestedDirectoryStore,
-                          DBMStore, LMDBStore, atexit_rmtree, atexit_rmglob,
+                          DBMStore, LMDBStore, SQLiteStore, atexit_rmtree, atexit_rmglob,
                           LRUStoreCache)
 from zarr.core import Array
 from zarr.errors import PermissionError
@@ -1390,6 +1390,31 @@ def test_nbytes_stored(self):
         pass  # not implemented
 
 
+try:
+    import sqlite3
+except ImportError:  # pragma: no cover
+    sqlite3 = None
+
+
+@unittest.skipIf(sqlite3 is None, 'python built without sqlite')
+class TestArrayWithSQLiteStore(TestArray):
+
+    @staticmethod
+    def create_array(read_only=False, **kwargs):
+        path = mktemp(suffix='.db')
+        atexit.register(atexit_rmtree, path)
+        store = SQLiteStore(path)
+        cache_metadata = kwargs.pop('cache_metadata', True)
+        cache_attrs = kwargs.pop('cache_attrs', True)
+        kwargs.setdefault('compressor', Zlib(1))
+        init_array(store, **kwargs)
+        return Array(store, read_only=read_only, cache_metadata=cache_metadata,
+                     cache_attrs=cache_attrs)
+
+    def test_nbytes_stored(self):
+        pass  # not implemented
+
+
 class TestArrayWithNoCompressor(TestArray):
 
     def create_array(self, read_only=False, **kwargs):
 
@@ -17,8 +17,8 @@
 
 from zarr.storage import (DictStore, DirectoryStore, ZipStore, init_group, init_array,
                           array_meta_key, group_meta_key, atexit_rmtree,
-                          NestedDirectoryStore, DBMStore, LMDBStore, atexit_rmglob,
-                          LRUStoreCache)
+                          NestedDirectoryStore, DBMStore, LMDBStore, SQLiteStore,
+                          atexit_rmglob, LRUStoreCache)
 from zarr.core import Array
 from zarr.compat import PY2, text_type
 from zarr.hierarchy import Group, group, open_group
@@ -928,6 +928,22 @@ def create_store():
         return store, None
 
 
+try:
+    import sqlite3
+except ImportError:  # pragma: no cover
+    sqlite3 = None
+
+
+@unittest.skipIf(sqlite3 is None, 'python built without sqlite')
+class TestGroupWithSQLiteStore(TestGroup):
+
+    def create_store(self):
+        path = tempfile.mktemp(suffix='.db')
+        atexit.register(atexit_rmtree, path)
+        store = SQLiteStore(path)
+        return store, None
+
+
 class TestGroupWithChunkStore(TestGroup):
 
     @staticmethod