pytorch
diff --git a/‎.circleci/scripts/binary_linux_test.sh
Lines changed: 19 additions & 19 deletions b/‎.circleci/scripts/binary_linux_test.sh
Lines changed: 19 additions & 19 deletions
diff --git a/‎CMakeLists.txt
Lines changed: 8 additions & 0 deletions b/‎CMakeLists.txt
Lines changed: 8 additions & 0 deletions
diff --git a/‎aten/src/ATen/LegacyTHFunctionsCUDA.h
Lines changed: 2 additions & 2 deletions b/‎aten/src/ATen/LegacyTHFunctionsCUDA.h
Lines changed: 2 additions & 2 deletions
diff --git a/‎aten/src/ATen/core/builtin_function.h
Lines changed: 5 additions & 1 deletion b/‎aten/src/ATen/core/builtin_function.h
Lines changed: 5 additions & 1 deletion
diff --git a/‎aten/src/ATen/core/interned_strings.h
Lines changed: 1 addition & 0 deletions b/‎aten/src/ATen/core/interned_strings.h
Lines changed: 1 addition & 0 deletions
diff --git a/‎aten/src/ATen/core/op_registration/op_registration.cpp
Lines changed: 1 addition & 0 deletions b/‎aten/src/ATen/core/op_registration/op_registration.cpp
Lines changed: 1 addition & 0 deletions
diff --git a/‎aten/src/ATen/core/op_registration/op_registration.h
Lines changed: 3 additions & 1 deletion b/‎aten/src/ATen/core/op_registration/op_registration.h
Lines changed: 3 additions & 1 deletion
diff --git a/‎aten/src/ATen/cuda/LegacyTHFunctionsCUDA.cpp
Lines changed: 7 additions & 2 deletions b/‎aten/src/ATen/cuda/LegacyTHFunctionsCUDA.cpp
Lines changed: 7 additions & 2 deletions
diff --git a/‎aten/src/ATen/native/BinaryOps.cpp
Lines changed: 0 additions & 8 deletions b/‎aten/src/ATen/native/BinaryOps.cpp
Lines changed: 0 additions & 8 deletions
diff --git a/‎aten/src/ATen/native/CompositeRandomAccessorCommon.h
Lines changed: 3 additions & 0 deletions b/‎aten/src/ATen/native/CompositeRandomAccessorCommon.h
Lines changed: 3 additions & 0 deletions
@@ -39,27 +39,27 @@ fi
 #   conda build scripts themselves. These should really be consolidated
 pkg="/final_pkgs/\$(ls /final_pkgs)"
 if [[ "$PACKAGE_TYPE" == conda ]]; then
-  conda install \${EXTRA_CONDA_FLAGS} -y "\$pkg" --offline
-  if [[ "$DESIRED_CUDA" == 'cpu' ]]; then
-    retry conda install \${EXTRA_CONDA_FLAGS} -y cpuonly -c pytorch
-  fi
-  retry conda install \${EXTRA_CONDA_FLAGS} -yq future numpy protobuf six
-  if [[ "$DESIRED_CUDA" != 'cpu' ]]; then
-    # DESIRED_CUDA is in format cu90 or cu102
-    if [[ "${#DESIRED_CUDA}" == 4 ]]; then
-      cu_ver="${DESIRED_CUDA:2:1}.${DESIRED_CUDA:3}"
-    else
-      cu_ver="${DESIRED_CUDA:2:2}.${DESIRED_CUDA:4}"
+  (
+    # For some reason conda likes to re-activate the conda environment when attempting this install
+    # which means that a deactivate is run and some variables might not exist when that happens,
+    # namely CONDA_MKL_INTERFACE_LAYER_BACKUP from libblas so let's just ignore unbound variables when
+    # it comes to the conda installation commands
+    set +u
+    conda install \${EXTRA_CONDA_FLAGS} -y "\$pkg" --offline
+    if [[ "$DESIRED_CUDA" == 'cpu' ]]; then
+      retry conda install \${EXTRA_CONDA_FLAGS} -y cpuonly -c pytorch
     fi
-    (
-      # For some reason conda likes to re-activate the conda environment when attempting this install
-      # which means that a deactivate is run and some variables might not exist when that happens,
-      # namely CONDA_MKL_INTERFACE_LAYER_BACKUP from libblas so let's just ignore unbound variables when
-      # it comes to the conda installation commands
-      set +u
+    retry conda install \${EXTRA_CONDA_FLAGS} -yq future numpy protobuf six
+    if [[ "$DESIRED_CUDA" != 'cpu' ]]; then
+      # DESIRED_CUDA is in format cu90 or cu102
+      if [[ "${#DESIRED_CUDA}" == 4 ]]; then
+        cu_ver="${DESIRED_CUDA:2:1}.${DESIRED_CUDA:3}"
+      else
+        cu_ver="${DESIRED_CUDA:2:2}.${DESIRED_CUDA:4}"
+      fi
       retry conda install \${EXTRA_CONDA_FLAGS} -yq -c nvidia -c pytorch "cudatoolkit=\${cu_ver}"
-    )
-  fi
+    fi
+  )
   pip install "\$pkg"
   retry pip install -q future numpy protobuf six
 
@@ -30,8 +30,16 @@ endif()
 
 set(CMAKE_INSTALL_MESSAGE NEVER)
 
+# check and set CMAKE_CXX_STANDARD
+string(FIND "${CMAKE_CXX_FLAGS}" "-std=c++" env_cxx_standard)
+if(env_cxx_standard GREATER -1)
+  message(
+      WARNING "C++ standard version definition detected in environment variable."
+      "PyTorch requires -std=c++14. Please remove -std=c++ settings in your environment.")
+endif()
 set(CMAKE_CXX_STANDARD 14)
 set(CMAKE_C_STANDARD 11)
+
 if(DEFINED GLIBCXX_USE_CXX11_ABI)
   if(${GLIBCXX_USE_CXX11_ABI} EQUAL 1)
     set(CXX_STANDARD_REQUIRED ON)
 
@@ -29,8 +29,8 @@ Tensor & _th_put_(Tensor & self, const Tensor & index, const Tensor & source, bo
 Tensor & _th_index_fill_(Tensor & self, int64_t dim, const Tensor & index, Scalar value);
 std::tuple<Tensor &,Tensor &> _th_mode_out(Tensor & values, Tensor & indices, const Tensor & self, int64_t dim, bool keepdim);
 std::tuple<Tensor,Tensor> _th_mode(const Tensor & self, int64_t dim, bool keepdim);
-std::tuple<Tensor &,Tensor &> _th_sort_out(Tensor & values, Tensor & indices, const Tensor & self, int64_t dim, bool descending);
-std::tuple<Tensor,Tensor> _th_sort(const Tensor & self, int64_t dim, bool descending);
+std::tuple<Tensor &,Tensor &> _th_sort_out(Tensor & values, Tensor & indices, const Tensor & self, int64_t dim, bool descending, bool stable);
+std::tuple<Tensor,Tensor> _th_sort(const Tensor & self, int64_t dim, bool descending, bool stable);
 std::tuple<Tensor &,Tensor &> _th_topk_out(Tensor & values, Tensor & indices, const Tensor & self, int64_t k, int64_t dim, bool largest, bool sorted);
 std::tuple<Tensor,Tensor> _th_topk(const Tensor & self, int64_t k, int64_t dim, bool largest, bool sorted);
 Tensor & _th_renorm_out(Tensor & result, const Tensor & self, Scalar p, int64_t dim, Scalar maxnorm);
 
@@ -1,7 +1,11 @@
 #pragma once
 
-#include <ATen/core/dispatch/Dispatcher.h>
 #include <ATen/core/function.h>
+#include <ATen/core/ivalue.h>
+#include <c10/util/Exception.h>
+#include <c10/util/intrusive_ptr.h>
+#include <functional>
+#include <utility>
 
 namespace torch {
 namespace jit {
 
@@ -103,6 +103,7 @@ namespace c10 {
   _(prim, Guard)                     \
   _(prim, BailOut)                   \
   _(prim, TypeCheck)                 \
+  _(prim, RequiresGradCheck)         \
   _(prim, FallbackGraph)             \
   _(prim, FusedConcat)               \
   _(prim, ConstantChunk)             \
 
@@ -1,5 +1,6 @@
 #include <c10/macros/Macros.h>
 
+#include <ATen/core/dispatch/Dispatcher.h>
 #include <ATen/core/op_registration/op_registration.h>
 #if !defined(CAFFE2_IS_XPLAT_BUILD)
 #include <torch/csrc/jit/frontend/function_schema_parser.h>
 
@@ -7,7 +7,9 @@
 
 #include <c10/core/DispatchKey.h>
 #include <c10/core/CompileTimeFunctionPointer.h>
-#include <ATen/core/dispatch/Dispatcher.h>
+#include <ATen/core/boxing/KernelFunction.h>
+#include <ATen/core/dispatch/CppSignature.h>
+#include <ATen/core/dispatch/RegistrationHandleRAII.h>
 #include <ATen/core/op_registration/infer_schema.h>
 #if defined(EXPOSE_C2_OPS) || !defined(CAFFE2_IS_XPLAT_BUILD)
 #include <torch/csrc/jit/frontend/function_schema_parser.h>
 
@@ -698,10 +698,12 @@ std::tuple<Tensor,Tensor> _th_mode(const Tensor & self, int64_t dim, bool keepdi
     }
     return std::tuple<Tensor, Tensor>(values, indices);
 }
-std::tuple<Tensor &,Tensor &> _th_sort_out(Tensor & values, Tensor & indices, const Tensor & self, int64_t dim, bool descending) {
+std::tuple<Tensor &,Tensor &> _th_sort_out(Tensor & values, Tensor & indices, const Tensor & self, int64_t dim, bool descending, bool stable) {
     // DeviceGuard omitted
     auto dispatch_scalar_type = infer_scalar_type(self);
 
+    TORCH_CHECK(!stable, "stable=True is not implemented on CUDA yet.");
+
     switch (dispatch_scalar_type) {
         case ScalarType::Byte: {
             auto values_ = checked_dense_tensor_unwrap(values, "values", 0, "_th_sort_out", false, DeviceType::CUDA, dispatch_scalar_type);
@@ -764,8 +766,11 @@ std::tuple<Tensor &,Tensor &> _th_sort_out(Tensor & values, Tensor & indices, co
     }
     return std::tuple<Tensor &, Tensor &>(values, indices);
 }
-std::tuple<Tensor,Tensor> _th_sort(const Tensor & self, int64_t dim, bool descending) {
+std::tuple<Tensor,Tensor> _th_sort(const Tensor & self, int64_t dim, bool descending, bool stable) {
     // DeviceGuard omitted
+
+    TORCH_CHECK(!stable, "stable=True is not implemented on CUDA yet.");
+
     auto dispatch_scalar_type = infer_scalar_type(self);
     auto values_ = c10::make_intrusive<TensorImpl, UndefinedTensorImpl>(c10::Storage(c10::Storage::use_byte_size_t(), 0, allocator(), true),DispatchKey::CUDA, scalarTypeToTypeMeta(dispatch_scalar_type)).release();
     auto values = Tensor(c10::intrusive_ptr<TensorImpl, UndefinedTensorImpl>::reclaim(values_));
 
@@ -826,16 +826,12 @@ Tensor logical_xor(const Tensor& self, Scalar other) { return comparison_op(self
 Tensor& logical_xor_(Tensor& self, Scalar other) { return comparison_op_(self, other, static_cast<OutFunc>(at::logical_xor_out)); }
 
 Tensor& maximum_out(Tensor& result, const Tensor& self, const Tensor& other) {
-  TORCH_CHECK(!self.is_complex() && !other.is_complex(), "maximum does not support complex inputs.");
-
   auto iter = TensorIterator::binary_op(result, self, other);
   maximum_stub(iter.device_type(), iter);
   return result;
 }
 
 Tensor maximum(const Tensor& self, const Tensor& other) {
-  TORCH_CHECK(!self.is_complex() && !other.is_complex(), "maximum does not support complex inputs.");
-
   Tensor result;
   auto iter = TensorIterator::binary_op(result, self, other);
   maximum_stub(iter.device_type(), iter);
@@ -852,16 +848,12 @@ Tensor max(const Tensor& self, const Tensor& other) {
 }
 
 Tensor& minimum_out(Tensor& result, const Tensor& self, const Tensor& other) {
-  TORCH_CHECK(!self.is_complex() && !other.is_complex(), "minimum does not support complex inputs.");
-
   auto iter = TensorIterator::binary_op(result, self, other);
   minimum_stub(iter.device_type(), iter);
   return result;
 }
 
 Tensor minimum(const Tensor& self, const Tensor& other) {
-  TORCH_CHECK(!self.is_complex() && !other.is_complex(), "minimum does not support complex inputs.");
-
   Tensor result;
   auto iter = TensorIterator::binary_op(result, self, other);
   minimum_stub(iter.device_type(), iter);
 
@@ -122,6 +122,9 @@ class CompositeRandomAccessor {
   using difference_type = typename std::iterator_traits<KeyAccessor>::difference_type;
   using iterator_category = std::random_access_iterator_tag;
 
+  C10_HOST_DEVICE
+  CompositeRandomAccessor() = default;
+
   C10_HOST_DEVICE
   CompositeRandomAccessor(KeyAccessor keys, ValueAccessor values)
     : keys(keys), values(values)