pytorch
diff --git a/‎test/dist_autograd_test.py
Lines changed: 248 additions & 65 deletions b/‎test/dist_autograd_test.py
Lines changed: 248 additions & 65 deletions
diff --git a/‎torch/csrc/distributed/autograd/context/dist_autograd_container.cpp
Lines changed: 11 additions & 0 deletions b/‎torch/csrc/distributed/autograd/context/dist_autograd_container.cpp
Lines changed: 11 additions & 0 deletions
diff --git a/‎torch/csrc/distributed/autograd/context/dist_autograd_container.h
Lines changed: 7 additions & 1 deletion b/‎torch/csrc/distributed/autograd/context/dist_autograd_container.h
Lines changed: 7 additions & 1 deletion
diff --git a/‎torch/csrc/distributed/autograd/rpc_messages/rpc_with_autograd.cpp
Lines changed: 3 additions & 4 deletions b/‎torch/csrc/distributed/autograd/rpc_messages/rpc_with_autograd.cpp
Lines changed: 3 additions & 4 deletions
diff --git a/‎torch/csrc/distributed/autograd/rpc_messages/rpc_with_autograd.h
Lines changed: 11 additions & 1 deletion b/‎torch/csrc/distributed/autograd/rpc_messages/rpc_with_autograd.h
Lines changed: 11 additions & 1 deletion
diff --git a/‎torch/csrc/distributed/autograd/utils.cpp
Lines changed: 78 additions & 32 deletions b/‎torch/csrc/distributed/autograd/utils.cpp
Lines changed: 78 additions & 32 deletions
diff --git a/‎torch/csrc/distributed/autograd/utils.h
Lines changed: 16 additions & 0 deletions b/‎torch/csrc/distributed/autograd/utils.h
Lines changed: 16 additions & 0 deletions
diff --git a/‎torch/csrc/distributed/rpc/python_functions.cpp
Lines changed: 7 additions & 28 deletions b/‎torch/csrc/distributed/rpc/python_functions.cpp
Lines changed: 7 additions & 28 deletions
diff --git a/‎torch/csrc/distributed/rpc/request_callback.cpp
Lines changed: 18 additions & 0 deletions b/‎torch/csrc/distributed/rpc/request_callback.cpp
Lines changed: 18 additions & 0 deletions
@@ -156,6 +156,17 @@ int64_t DistAutogradContainer::getMaxId() {
   return max_id_;
 }
 
+void DistAutogradContainer::setCurrentContextId(int64_t contextId) {
+  TORCH_INTERNAL_ASSERT(
+      current_context_id_ == kInvalidContextId,
+      "Already have an autograd context id for this thread.");
+  current_context_id_ = contextId;
+}
+
+void DistAutogradContainer::clearCurrentContext() {
+  current_context_id_ = -1;
+}
+
 } // namespace autograd
 } // namespace distributed
 } // namespace torch
@@ -59,9 +59,15 @@ class TORCH_API DistAutogradContainer {
   // can be generated by this worker.
   int64_t getMaxId();
 
-  // retrieves the worker ID for this node
+  // Retrieves the worker ID for this node
   rpc::worker_id_t getWorkerId() const;
 
+  // Can set current context id if there is no valid context yet
+  void setCurrentContextId(int64_t contextId);
+
+  // Clear current context id
+  void clearCurrentContext();
+
  private:
   DistAutogradContainer();
   ~DistAutogradContainer() = default;
 
@@ -17,15 +17,14 @@ RpcWithAutograd::RpcWithAutograd(
     worker_id_t fromWorkerId,
     MessageType messageType,
     const AutogradMetadata& autogradMetadata,
-    std::unique_ptr<RpcCommandBase> wrappedRpc)
+    rpc::Message&& wrappedMessage)
     : fromWorkerId_(fromWorkerId),
       messageType_(messageType),
-      autogradMetadata_(autogradMetadata) {
-  TORCH_INTERNAL_ASSERT(wrappedRpc != nullptr, "wrappedRpc cannot be null!");
+      autogradMetadata_(autogradMetadata),
+      wrappedMessage_(std::move(wrappedMessage)) {
   TORCH_INTERNAL_ASSERT(
       messageType_ == MessageType::FORWARD_AUTOGRAD_REQ ||
       messageType_ == MessageType::FORWARD_AUTOGRAD_RESP);
-  wrappedMessage_ = std::move(*wrappedRpc).toMessage();
   tensors_ = wrappedMessage_.tensors();
   wrappedMessageType_ = wrappedMessage_.type();
 }
 
@@ -18,7 +18,7 @@ class TORCH_API RpcWithAutograd final : public rpc::RpcCommandBase {
       rpc::worker_id_t fromWorkerId,
       rpc::MessageType messageType,
       const AutogradMetadata& autogradMetadata,
-      std::unique_ptr<rpc::RpcCommandBase> wrappedRpc);
+      rpc::Message&& wrappedMessage);
 
   // Used when receiving an RPC over the wire.
   RpcWithAutograd(
@@ -57,10 +57,20 @@ class TORCH_API RpcWithAutograd final : public rpc::RpcCommandBase {
   rpc::MessageType messageType_;
 
   AutogradMetadata autogradMetadata_;
+
+  // Since wrappedMessage_ is destructively constructed from wrappedRpc_,
+  // they are valid exclusively. They are used for different purpose.
+  // wrappedRpc_ is used while constructing receive rpcWithAutograd;
+  // wrappedMessage_ is used while constructing send rpcWithAutograd;
+
+  // When receive rpcWithAutograd is constructed fromMessage, it is valid;
+  // When send rpcWithAutograd is constructed before toMessage, it is nullptr;
   std::unique_ptr<RpcCommandBase> wrappedRpc_;
 
   // Serialized message representing wrappedRpc_. Used mostly as a cache to
   // avoid serializing the request twice.
+  // When receive rpcWithAutograd is constructed fromMessage, it is nullptr;
+  // When send rpcWithAutograd is constructed before toMessage, it is valid;
   rpc::Message wrappedMessage_;
 
   // message type of the wrappedMessage, this is stored separately since
 
@@ -9,54 +9,100 @@ namespace torch {
 namespace distributed {
 namespace autograd {
 
+using torch::distributed::autograd::AutogradMetadata;
+using torch::distributed::autograd::RpcWithAutograd;
+using torch::distributed::rpc::FutureMessage;
 using torch::distributed::rpc::Message;
+using torch::distributed::rpc::MessageType;
+using torch::distributed::rpc::RpcAgent;
+using torch::distributed::rpc::RpcCommandBase;
+using torch::distributed::rpc::WorkerInfo;
 
 void addSendRpcBackward(
     DistAutogradContext& autogradContext,
     const AutogradMetadata& autogradMetadata,
     std::vector<torch::Tensor>& tensors,
     const rpc::worker_id_t dst) {
   // Attach the appropriate autograd edges.
-  if (torch::autograd::compute_requires_grad(tensors)) {
-    auto grad_fn = std::make_shared<SendRpcBackward>();
-    grad_fn->set_next_edges(torch::autograd::collect_next_edges(tensors));
-
-    // Add the appropriate input metadata for the grad_fn.
-    for (const auto& tensor : tensors) {
-      grad_fn->add_input_metadata(tensor);
-    }
-
-    // Record the send autograd function in our current context.
-    autogradContext.addSendFunction(
-        grad_fn, autogradMetadata.autogradMessageId);
-    // Record the workerID
-    autogradContext.addKnownWorkerId(dst);
+  auto grad_fn = std::make_shared<SendRpcBackward>();
+  grad_fn->set_next_edges(torch::autograd::collect_next_edges(tensors));
+
+  // Add the appropriate input metadata for the grad_fn.
+  for (const auto& tensor : tensors) {
+    grad_fn->add_input_metadata(tensor);
   }
+
+  // Record the send autograd function in our current context.
+  autogradContext.addSendFunction(grad_fn, autogradMetadata.autogradMessageId);
+  // Record the workerID
+  autogradContext.addKnownWorkerId(dst);
 }
 
 DistAutogradContext* addRecvRpcBackward(
     const AutogradMetadata& autogradMetadata,
     std::vector<torch::Tensor>& tensors,
     rpc::worker_id_t fromWorkerId) {
-  if (torch::autograd::compute_requires_grad(tensors)) {
-    // Initialize autograd context if necessary.
-    auto& autogradContainer = DistAutogradContainer::getInstance();
-    DistAutogradContext& autogradContext = autogradContainer.getOrCreateContext(
-        autogradMetadata.autogradContextId);
-
-    // Attach the tensors as inputs to the autograd function.
-    auto grad_fn = std::make_shared<RecvRpcBackward>(
-        autogradMetadata, autogradContext, fromWorkerId);
-    for (auto& tensor : tensors) {
-      torch::autograd::set_history(tensor, grad_fn);
-    }
-
-    // Now update the autograd context with the necessary information.
-    autogradContext.addRecvFunction(
-        grad_fn, autogradMetadata.autogradMessageId);
-    return &autogradContext;
+  TORCH_INTERNAL_ASSERT(
+      torch::autograd::compute_requires_grad(tensors),
+      "Received tensors do not require grad, addRecvRpcBackward should not be called");
+  // Initialize autograd context if necessary.
+  auto& autogradContainer = DistAutogradContainer::getInstance();
+  DistAutogradContext& autogradContext =
+      autogradContainer.getOrCreateContext(autogradMetadata.autogradContextId);
+
+  // Attach the tensors as inputs to the autograd function.
+  auto grad_fn = std::make_shared<RecvRpcBackward>(
+      autogradMetadata, autogradContext, fromWorkerId);
+  for (auto& tensor : tensors) {
+    torch::autograd::set_history(tensor, grad_fn);
+  }
+
+  // Now update the autograd context with the necessary information.
+  autogradContext.addRecvFunction(grad_fn, autogradMetadata.autogradMessageId);
+  return &autogradContext;
+}
+
+Message getMessageWithAutograd(
+    const rpc::worker_id_t dstId,
+    torch::distributed::rpc::Message&& wrappedRpcMsg,
+    MessageType msgType) {
+  auto& autogradContainer = DistAutogradContainer::getInstance();
+
+  // If there is no valid context and no tensor requires grads, send original
+  // rpc message. otherwise, attach grad info and grad functions and send
+  // rpcWithAutograd message.
+  if (!autogradContainer.hasValidContext() ||
+      !torch::autograd::compute_requires_grad(wrappedRpcMsg.tensors())) {
+    return std::move(wrappedRpcMsg);
   }
-  return nullptr;
+
+  // Retrieve the appropriate context to modify.
+  auto& autogradContext = autogradContainer.currentContext();
+
+  // Wrap the original rpc with autograd information.
+  AutogradMetadata autogradMetadata(
+      autogradContext.contextId(), autogradContainer.newAutogradMessageId());
+  auto rpcWithAutograd = c10::guts::make_unique<RpcWithAutograd>(
+      RpcAgent::getDefaultRpcAgent()->getWorkerInfo().id_,
+      msgType,
+      autogradMetadata,
+      std::move(wrappedRpcMsg));
+
+  // Record autograd information for 'send'.
+  addSendRpcBackward(
+      autogradContext, autogradMetadata, rpcWithAutograd->tensors(), dstId);
+
+  return std::move(*rpcWithAutograd).toMessage();
+}
+
+std::shared_ptr<FutureMessage> sendMessageWithAutograd(
+    RpcAgent& agent,
+    const WorkerInfo& dst,
+    torch::distributed::rpc::Message&& wrappedRpcMsg) {
+  auto msg = getMessageWithAutograd(
+      dst.id_, std::move(wrappedRpcMsg), MessageType::FORWARD_AUTOGRAD_REQ);
+
+  return agent.send(dst, std::move(msg));
 }
 
 } // namespace autograd
 
@@ -32,6 +32,22 @@ TORCH_API DistAutogradContext* addRecvRpcBackward(
     std::vector<torch::Tensor>& tensors,
     rpc::worker_id_t fromWorkerId);
 
+// This method is a wrapper utility used internally to wrap autograd info
+// and attach autograd function for each type of rpc call if it has valid
+// context and tensors require grads, in this case, return RpcWithAutograd
+// message; otherwise return original rpc message.
+TORCH_API rpc::Message getMessageWithAutograd(
+    const rpc::worker_id_t dstId,
+    rpc::Message&& wrappedRpcMsg,
+    rpc::MessageType msgType);
+
+// Send message after autograd checking
+TORCH_API std::shared_ptr<torch::distributed::rpc::FutureMessage>
+sendMessageWithAutograd(
+    rpc::RpcAgent& agent,
+    const rpc::WorkerInfo& dst,
+    rpc::Message&& wrappedRpcMsg);
+
 } // namespace autograd
 } // namespace distributed
 } // namespace torch
@@ -124,28 +124,8 @@ std::shared_ptr<FutureMessage> pyRpcBuiltin(
   Stack stack;
   auto op = matchBuiltinOp(opName, args, kwargs, stack);
   auto scriptCall = c10::guts::make_unique<ScriptCall>(op, std::move(stack));
-  auto& autogradContainer = DistAutogradContainer::getInstance();
-  if (autogradContainer.hasValidContext()) {
-    // Retrieve the appropriate context to modify.
-    auto& autogradContext = autogradContainer.currentContext();
-
-    // Wrap the original rpc with autograd information.
-    AutogradMetadata autogradMetadata(
-        autogradContext.contextId(), autogradContainer.newAutogradMessageId());
-    RpcWithAutograd rpcWithAutograd(
-        agent.getWorkerInfo().id_,
-        MessageType::FORWARD_AUTOGRAD_REQ,
-        autogradMetadata,
-        std::move(scriptCall));
-
-    // Record autograd information for 'send'.
-    addSendRpcBackward(
-        autogradContext, autogradMetadata, rpcWithAutograd.tensors(), dst.id_);
-
-    return agent.send(dst, std::move(rpcWithAutograd).toMessage());
-  } else {
-    return agent.send(dst, std::move(*scriptCall).toMessage());
-  }
+  return sendMessageWithAutograd(
+      agent, dst, std::move(*scriptCall).toMessage());
 }
 
 PyRRef pyRemoteBuiltin(
@@ -179,12 +159,11 @@ std::shared_ptr<FutureMessage> pyRpcPythonUdf(
     const WorkerInfo& dst,
     std::string& pickledPythonUDF,
     std::vector<torch::Tensor>& tensors) {
-  return agent.send(
-      dst,
-      PythonUDFCall(
-          std::vector<char>(pickledPythonUDF.begin(), pickledPythonUDF.end()),
-          tensors)
-          .toMessage());
+  auto pythonUDFCall = c10::guts::make_unique<PythonUDFCall>(
+      std::vector<char>(pickledPythonUDF.begin(), pickledPythonUDF.end()),
+      tensors);
+  return sendMessageWithAutograd(
+      agent, dst, std::move(*pythonUDFCall).toMessage());
 }
 
 PyRRef pyRemotePythonUdf(
 
@@ -20,9 +20,27 @@ Message createException(const Message& request, const std::exception& e) {
       request.id());
 }
 
+// When request message has autograd info, processMessage() will set up valid
+// current context id properly. This struct is used to clean up current context
+// id after processMessage() is done.
+struct ClearAutogradContextGuard {
+  ClearAutogradContextGuard() = default;
+  ~ClearAutogradContextGuard() {
+    clear();
+  }
+
+  void clear() {
+    auto& autogradContainer = DistAutogradContainer::getInstance();
+    autogradContainer.clearCurrentContext();
+  }
+};
+
 } // anonymous namespace
 
 Message RequestCallback::operator()(Message& request) const {
+  // For a rev thread, current context id should be invalid outside
+  // processMessage().
+  ClearAutogradContextGuard guard;
   try {
     return processMessage(request);
   } catch (std::exception& e) {