pytorch
diff --git a/‎docs/source/distributed.checkpoint.rst
Lines changed: 4 additions & 0 deletions b/‎docs/source/distributed.checkpoint.rst
Lines changed: 4 additions & 0 deletions
diff --git a/‎docs/source/index.rst
Lines changed: 1 addition & 0 deletions b/‎docs/source/index.rst
Lines changed: 1 addition & 0 deletions
diff --git a/‎test/distributed/_shard/checkpoint/test_checkpoint.py renamed to ‎test/distributed/checkpoint/test_checkpoint.py
Lines changed: 3 additions & 3 deletions b/‎test/distributed/_shard/checkpoint/test_checkpoint.py renamed to ‎test/distributed/checkpoint/test_checkpoint.py
Lines changed: 3 additions & 3 deletions
diff --git a/‎test/distributed/_shard/checkpoint/test_file_system_checkpoint.py renamed to ‎test/distributed/checkpoint/test_file_system_checkpoint.py
Lines changed: 1 addition & 1 deletion b/‎test/distributed/_shard/checkpoint/test_file_system_checkpoint.py renamed to ‎test/distributed/checkpoint/test_file_system_checkpoint.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎test/distributed/_shard/checkpoint/test_file_system_checkpoint_cpu.py renamed to ‎test/distributed/checkpoint/test_file_system_checkpoint_cpu.py
Lines changed: 1 addition & 1 deletion b/‎test/distributed/_shard/checkpoint/test_file_system_checkpoint_cpu.py renamed to ‎test/distributed/checkpoint/test_file_system_checkpoint_cpu.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎test/distributed/_shard/checkpoint/test_planner.py renamed to ‎test/distributed/checkpoint/test_planner.py
Lines changed: 3 additions & 3 deletions b/‎test/distributed/_shard/checkpoint/test_planner.py renamed to ‎test/distributed/checkpoint/test_planner.py
Lines changed: 3 additions & 3 deletions
diff --git a/‎test/distributed/_shard/checkpoint/test_utils.py renamed to ‎test/distributed/checkpoint/test_utils.py
Lines changed: 2 additions & 2 deletions b/‎test/distributed/_shard/checkpoint/test_utils.py renamed to ‎test/distributed/checkpoint/test_utils.py
Lines changed: 2 additions & 2 deletions
diff --git a/‎torch/distributed/_shard/checkpoint/__init__.py
Lines changed: 10 additions & 19 deletions b/‎torch/distributed/_shard/checkpoint/__init__.py
Lines changed: 10 additions & 19 deletions
diff --git a/‎torch/distributed/checkpoint/__init__.py
Lines changed: 21 additions & 0 deletions b/‎torch/distributed/checkpoint/__init__.py
Lines changed: 21 additions & 0 deletions
diff --git a/‎torch/distributed/_shard/checkpoint/api.py renamed to ‎torch/distributed/checkpoint/api.py
Lines changed: 9 additions & 1 deletion b/‎torch/distributed/_shard/checkpoint/api.py renamed to ‎torch/distributed/checkpoint/api.py
Lines changed: 9 additions & 1 deletion
diff --git a/‎torch/distributed/_shard/checkpoint/default_planner.py renamed to ‎torch/distributed/checkpoint/default_planner.py
Lines changed: 61 additions & 21 deletions b/‎torch/distributed/_shard/checkpoint/default_planner.py renamed to ‎torch/distributed/checkpoint/default_planner.py
Lines changed: 61 additions & 21 deletions
@@ -0,0 +1,4 @@
+Distributed Checkpoint
+========================
+
+.. automodule:: torch.distributed.checkpoint
@@ -70,6 +70,7 @@ Features described in this documentation are classified by release status:
    torch.distributed.elastic <distributed.elastic>
    torch.distributed.fsdp <fsdp>
    torch.distributed.optim <distributed.optim>
+   torch.distributed.checkpoint <distributed.checkpoint>
    torch.distributions <distributions>
    torch.fft <fft>
    futures
 
@@ -20,17 +20,17 @@
 
 from torch.distributed._shard import sharded_tensor
 
-from torch.distributed._shard.checkpoint.default_planner import (
+from torch.distributed.checkpoint.default_planner import (
     _create_default_local_metadata,
 )
 
-from torch.distributed._shard.checkpoint.metadata import (
+from torch.distributed.checkpoint.metadata import (
     BytesStorageMetadata,
     Metadata,
     TensorStorageMetadata,
 )
 
-from torch.distributed._shard.checkpoint.planner import (
+from torch.distributed.checkpoint.planner import (
     SavePlan,
     SavePlanner,
     LoadPlan,
 
@@ -31,7 +31,7 @@
     run_tests,
 )
 
-from torch.distributed._shard.checkpoint import (
+from torch.distributed.checkpoint import (
     FileSystemReader,
     FileSystemWriter,
     load_state_dict,
 
@@ -31,7 +31,7 @@
     run_tests,
 )
 
-from torch.distributed._shard.checkpoint import (
+from torch.distributed.checkpoint import (
     FileSystemReader,
     FileSystemWriter,
     load_state_dict,
 
@@ -3,7 +3,7 @@
 import sys
 
 import torch
-from torch.distributed._shard.checkpoint.planner import LoadItemType, WriteItemType
+from torch.distributed.checkpoint.planner import LoadItemType, WriteItemType
 
 from torch.distributed._shard.sharded_tensor import (
     Shard,
@@ -18,13 +18,13 @@
     TEST_WITH_DEV_DBG_ASAN,
     run_tests,
 )
-from torch.distributed._shard.checkpoint.metadata import BytesStorageMetadata, MetadataIndex, TensorStorageMetadata
+from torch.distributed.checkpoint.metadata import BytesStorageMetadata, MetadataIndex, TensorStorageMetadata
 from torch.testing._internal.distributed.distributed_utils import (
     with_fake_comms,
     with_dist
 )
 
-from torch.distributed._shard.checkpoint.default_planner import (
+from torch.distributed.checkpoint.default_planner import (
     create_default_global_save_plan,
     create_default_local_save_plan,
     create_default_local_load_plan,
 
@@ -17,8 +17,8 @@
     TEST_WITH_DEV_DBG_ASAN,
     run_tests,
 )
-from torch.distributed._shard.checkpoint.utils import find_state_dict_object
-from torch.distributed._shard.checkpoint.metadata import MetadataIndex
+from torch.distributed.checkpoint.utils import find_state_dict_object
+from torch.distributed.checkpoint.metadata import MetadataIndex
 from torch.testing._internal.distributed.distributed_utils import (
     with_fake_comms
 )
 
@@ -1,21 +1,12 @@
-from .metadata import (
-    TensorStorageMetadata,
-    BytesStorageMetadata,
-    ChunkStorageMetadata,
-    Metadata,
-)
-from .state_dict_loader import load_state_dict
-from .state_dict_saver import save_state_dict
-from .storage import StorageReader, StorageWriter
-from .filesystem import FileSystemReader, FileSystemWriter
-from .api import CheckpointException
-
+# Keep old package for BC purposes, this file should be removed once
+# everything moves to the `torch.distributed.checkpoint` package.
+import sys
+import torch
+import warnings
 
-from .planner import (
-    SavePlanner,
-    LoadPlanner,
-    SavePlan,
-    LoadPlan,
-    ReadItem,
-    WriteItem,
+from torch.distributed.checkpoint import *  # noqa: F403
+warnings.warn(
+    "torch.distributed._shard.checkpoint will be deprecated, use torch.distributed.checkpoint instead",
+    DeprecationWarning
 )
+sys.modules['torch.distributed._shard.checkpoint'] = torch.distributed.checkpoint
@@ -0,0 +1,21 @@
+from .metadata import (
+    TensorStorageMetadata,
+    BytesStorageMetadata,
+    ChunkStorageMetadata,
+    Metadata,
+)
+from .state_dict_loader import load_state_dict
+from .state_dict_saver import save_state_dict
+from .storage import StorageReader, StorageWriter
+from .filesystem import FileSystemReader, FileSystemWriter
+from .api import CheckpointException
+
+
+from .planner import (
+    SavePlanner,
+    LoadPlanner,
+    SavePlan,
+    LoadPlan,
+    ReadItem,
+    WriteItem,
+)
@@ -3,20 +3,28 @@
 
 WRAPPED_EXCEPTION = Tuple[BaseException, tb.StackSummary]
 
+__all__ = ["CheckpointException"]
+
+
 def _wrap_exception(exc: BaseException) -> WRAPPED_EXCEPTION:
     return (exc, tb.extract_tb(exc.__traceback__))
 
+
 def _is_wrapped_exception(obj: Any) -> bool:
     if not isinstance(obj, tuple):
         return False
     if len(obj) != 2:
         return False
-    return isinstance(obj[0], BaseException) and isinstance(obj[1], tb.StackSummary)
+    return isinstance(obj[0], BaseException) and isinstance(
+        obj[1], tb.StackSummary
+    )
+
 
 class CheckpointException(BaseException):
     """
     Exception raised if failure was detected as part of a checkpoint load or save.
     """
+
     def __init__(self, msg: str, failures: Dict[int, WRAPPED_EXCEPTION]):
         super().__init__(msg, failures)
         self._failures = failures
 
@@ -24,37 +24,53 @@
     MetadataIndex,
     Metadata,
     STATE_DICT_TYPE,
-    STORAGE_TYPES
+    STORAGE_TYPES,
 )
 
 from .planner_helpers import (
     _create_read_items,
     _create_write_items,
-    _create_default_metadata_only_plan
+    _create_default_metadata_only_plan,
 )
 
-from .utils import (
-    find_state_dict_object
-)
+from .utils import find_state_dict_object
+
+__all__ = [
+    "DefaultSavePlanner",
+    "DefaultLoadPlanner",
+    "create_default_local_load_plan",
+    "create_default_global_load_plan",
+    "create_default_local_save_plan",
+    "create_default_global_save_plan",
+]
+
 
 class DefaultSavePlanner(SavePlanner):
     def init(self, state_dict: Dict[str, Any], is_coordinator: bool) -> None:
         self.state_dict = state_dict
         self.is_coordinator = is_coordinator
 
     def create_local_plan(self) -> SavePlan:
-        self.plan = create_default_local_save_plan(self.state_dict, self.is_coordinator)
+        self.plan = create_default_local_save_plan(
+            self.state_dict, self.is_coordinator
+        )
         return self.plan
 
-    def create_global_plan(self, all_plans: List[SavePlan]) -> Tuple[List[SavePlan], Metadata]:
-        self.global_plan, self.metadata = create_default_global_save_plan(all_plans)
+    def create_global_plan(
+        self, all_plans: List[SavePlan]
+    ) -> Tuple[List[SavePlan], Metadata]:
+        self.global_plan, self.metadata = create_default_global_save_plan(
+            all_plans
+        )
         return self.global_plan, self.metadata
 
     def finish_plan(self, new_plan: SavePlan) -> SavePlan:
         self.plan = new_plan
         return new_plan
 
-    def resolve_data(self, write_item: WriteItem) -> Union[torch.Tensor, io.BytesIO]:
+    def resolve_data(
+        self, write_item: WriteItem
+    ) -> Union[torch.Tensor, io.BytesIO]:
         object = self.lookup_object(write_item.index)
         return self.transform_object(write_item, object)
 
@@ -76,7 +92,12 @@ def transform_object(self, write_item: WriteItem, object: Any):
 
 
 class DefaultLoadPlanner(LoadPlanner):
-    def init(self, state_dict: STATE_DICT_TYPE, metadata: Metadata, is_coordinator: bool) -> None:
+    def init(
+        self,
+        state_dict: STATE_DICT_TYPE,
+        metadata: Metadata,
+        is_coordinator: bool,
+    ) -> None:
         self.state_dict = state_dict
         self.metadata = metadata
         self.is_coordinator = is_coordinator
@@ -110,7 +131,9 @@ def transform_tensor(self, read_item: ReadItem, tensor: torch.Tensor):
         """
         This is an extension from the planner interface to make it easy to extend the default planner
         """
-        return narrow_tensor_by_index(tensor, read_item.dest_offsets, read_item.lengths)
+        return narrow_tensor_by_index(
+            tensor, read_item.dest_offsets, read_item.lengths
+        )
 
 
 def create_default_local_load_plan(
@@ -133,7 +156,10 @@ def create_default_local_load_plan(
 
     return LoadPlan(requests)
 
-def create_default_global_load_plan(all_plans: List[LoadPlan]) -> List[LoadPlan]:
+
+def create_default_global_load_plan(
+    all_plans: List[LoadPlan],
+) -> List[LoadPlan]:
     """
     Create global load plan used by DefaultLoadPlanner.
 
@@ -142,7 +168,10 @@ def create_default_global_load_plan(all_plans: List[LoadPlan]) -> List[LoadPlan]
     """
     return all_plans
 
-def create_default_local_save_plan(state_dict: Dict[str, Any], is_coordinator: bool) -> SavePlan:
+
+def create_default_local_save_plan(
+    state_dict: Dict[str, Any], is_coordinator: bool
+) -> SavePlan:
     """
     Create the ``SavePlan`` used by DefaultSavePlanner.
 
@@ -157,7 +186,10 @@ def create_default_local_save_plan(state_dict: Dict[str, Any], is_coordinator: b
             requests += _create_write_items(fqn, obj)
     return SavePlan(requests)
 
-def create_default_global_save_plan(all_plans: List[SavePlan]) -> Tuple[List[SavePlan], Metadata]:
+
+def create_default_global_save_plan(
+    all_plans: List[SavePlan],
+) -> Tuple[List[SavePlan], M
10000
etadata]:
     """
     Create the global plan and metadata used by DefaultSavePlanner.
 
@@ -180,21 +212,29 @@ def create_default_global_save_plan(all_plans: List[SavePlan]) -> Tuple[List[Sav
                 assert item.tensor_data is not None
                 tensor_md = cast(
                     TensorStorageMetadata,
-                    md.setdefault(item.index.fqn, TensorStorageMetadata(
-                        properties=item.tensor_data.properties,
-                        size=item.tensor_data.size,
-                        chunks=[],
-                    ))
+                    md.setdefault(
+                        item.index.fqn,
+                        TensorStorageMetadata(
+                            properties=item.tensor_data.properties,
+                            size=item.tensor_data.size,
+                            chunks=[],
+                        ),
+                    ),
+                )
+                new_index = dataclasses.replace(
+                    item.index, index=len(tensor_md.chunks)
                 )
-                new_index = dataclasses.replace(item.index, index=len(tensor_md.chunks))
                 new_item = dataclasses.replace(item, index=new_index)
                 new_items.append(new_item)
 
-                assert item.tensor_data.chunk is not None, f"Cannot create MD for tensor without bounds. FQN: {item.index.fqn}"
+                assert (
+                    item.tensor_data.chunk is not None
+                ), f"Cannot create MD for tensor without bounds. FQN: {item.index.fqn}"
                 tensor_md.chunks.append(item.tensor_data.chunk)
         new_plans.append(dataclasses.replace(plan, items=new_items))
     return (new_plans, Metadata(md))
 
+
 def _create_default_local_metadata(state_dict: STATE_DICT_TYPE) -> Metadata:
     """
     Return the ``Metadata`` if DefaultSavePlanner was used to checkpoint ``state_dict``.
Original file line number	Diff line number	Diff line change
`@@ -31,7 +31,7 @@`
`31`	`31`	`run_tests,`
`32`	`32`	`)`
`33`	`33`
`34`		`-from torch.distributed._shard.checkpoint import (`
	`34`	`+from torch.distributed.checkpoint import (`
`35`	`35`	`FileSystemReader,`
`36`	`36`	`FileSystemWriter,`
`37`	`37`	`load_state_dict,`
Original file line number	Diff line number	Diff line change
`@@ -17,8 +17,8 @@`
`17`	`17`	`TEST_WITH_DEV_DBG_ASAN,`
`18`	`18`	`run_tests,`
`19`	`19`	`)`
`20`		`-from torch.distributed._shard.checkpoint.utils import find_state_dict_object`
`21`		`-from torch.distributed._shard.checkpoint.metadata import MetadataIndex`
	`20`	`+from torch.distributed.checkpoint.utils import find_state_dict_object`
	`21`	`+from torch.distributed.checkpoint.metadata import MetadataIndex`
`22`	`22`	`from torch.testing._internal.distributed.distributed_utils import (`
`23`	`23`	`with_fake_comms`
`24`	`24`	`)`