CJavaScala
diff --git a/‎configure
Lines changed: 4 additions & 4 deletions b/‎configure
Lines changed: 4 additions & 4 deletions
diff --git a/‎tensorflow/core/common_runtime/gpu/gpu_bfc_allocator_test.cc
Lines changed: 8 additions & 8 deletions b/‎tensorflow/core/common_runtime/gpu/gpu_bfc_allocator_test.cc
Lines changed: 8 additions & 8 deletions
diff --git a/‎tensorflow/core/common_runtime/gpu/gpu_debug_allocator_test.cc
Lines changed: 5 additions & 5 deletions b/‎tensorflow/core/common_runtime/gpu/gpu_debug_allocator_test.cc
Lines changed: 5 additions & 5 deletions
diff --git a/‎tensorflow/core/common_runtime/gpu/gpu_region_allocator_test.cc
Lines changed: 3 additions & 3 deletions b/‎tensorflow/core/common_runtime/gpu/gpu_region_allocator_test.cc
Lines changed: 3 additions & 3 deletions
diff --git a/‎tensorflow/core/common_runtime/gpu/gpu_util.cc
Lines changed: 1 addition & 1 deletion b/‎tensorflow/core/common_runtime/gpu/gpu_util.cc
Lines changed: 1 addition & 1 deletion
diff --git a/‎tensorflow/core/framework/allocator.h
Lines changed: 67 additions & 6 deletions b/‎tensorflow/core/framework/allocator.h
Lines changed: 67 additions & 6 deletions
diff --git a/‎tensorflow/core/framework/allocator_test.cc
Lines changed: 2 additions & 2 deletions b/‎tensorflow/core/framework/allocator_test.cc
Lines changed: 2 additions & 2 deletions
diff --git a/‎tensorflow/core/framework/tensor.cc
Lines changed: 5 additions & 45 deletions b/‎tensorflow/core/framework/tensor.cc
Lines changed: 5 additions & 45 deletions
@@ -64,12 +64,12 @@ while true; do
   # Retry
 done
 
-# Find out where the CUDNN library is installed
+# Find out where the cuDNN library is installed
 while true; do
   fromuser=""
   if [ -z "$CUDNN_INSTALL_PATH" ]; then
     default_cudnn_path=${CUDA_TOOLKIT_PATH}
-    read -p "Please specify the location where CUDNN 6.5 V2 library is installed. Refer to README.md for more details. [Default is $default_cudnn_path]: " CUDNN_INSTALL_PATH
+    read -p "Please specify the location where cuDNN v2 library is installed. Refer to README.md for more details. [Default is $default_cudnn_path]: " CUDNN_INSTALL_PATH
     fromuser="1"
     if [ -z "$CUDNN_INSTALL_PATH" ]; then
       CUDNN_INSTALL_PATH=$default_cudnn_path
@@ -81,7 +81,7 @@ while true; do
   if [ -e "$CUDNN_INSTALL_PATH/libcudnn.so.6.5" -o -e "$CUDNN_INSTALL_PATH/lib64/libcudnn.so.6.5" ]; then
     break
   fi
-  echo "Invalid path to CUDNN 6.5 V2 toolkit. Neither of the following two files can be found:"
+  echo "Invalid path to cuDNN v2 toolkit. Neither of the following two files can be found:"
   echo "$CUDNN_INSTALL_PATH/lib64/libcudnn.so.6.5"
   echo "$CUDNN_INSTALL_PATH/libcudnn.so.6.5"
   if [ -z "$fromuser" ]; then
@@ -96,7 +96,7 @@ cat > third_party/gpus/cuda/cuda.config <<EOF
 # at the moment.
 CUDA_TOOLKIT_PATH="$CUDA_TOOLKIT_PATH"
 
-# CUDNN_INSTALL_PATH refers to the CUDNN toolkit. The cudnn header and library
+# CUDNN_INSTALL_PATH refers to the cuDNN toolkit. The cuDNN header and library
 # files can be either in this directory, or under include/ and lib64/
 # directories separately.
 CUDNN_INSTALL_PATH="$CUDNN_INSTALL_PATH"
 
@@ -119,7 +119,7 @@ TEST(GPUBFCAllocatorTest, ExerciseCoalescing) {
   GPUBFCAllocator a(0, 1 << 30);
 
   float* first_ptr = a.Allocate<float>(1024);
-  a.Deallocate(first_ptr);
+  a.DeallocateRaw(first_ptr);
   for (int i = 0; i < 1024; ++i) {
     // Allocate several buffers of different sizes, and then clean them
     // all up.  We should be able to repeat this endlessly without
@@ -130,18 +130,18 @@ TEST(GPUBFCAllocatorTest, ExerciseCoalescing) {
     double* t3 = a.Allocate<double>(2048);
     float* t4 = a.Allocate<float>(10485760);
-    a.Deallocate(t1);
-    a.Deallocate(t2);
-    a.Deallocate(t3);
-    a.Deallocate(t4);
+    a.DeallocateRaw(t1);
+    a.DeallocateRaw(t2);
+    a.DeallocateRaw(t3);
+    a.DeallocateRaw(t4);
   }
 
   // At the end, we should have coalesced all memory into one region
   // starting at the beginning, so validate that allocating a pointer
   // starts from this region.
   float* first_ptr_after = a.Allocate<float>(1024);
   EXPECT_EQ(first_ptr, first_ptr_after);
-  a.Deallocate(first_ptr_after);
+  a.DeallocateRaw(first_ptr_after);
 }
 
 TEST(GPUBFCAllocatorTest, AllocateZeroBufSize) {
@@ -160,7 +160,7 @@ TEST(GPUBFCAllocatorTest, AllocatedVsRequested) {
   float* t1 = a.Allocate<float>(1);
   EXPECT_EQ(4, a.RequestedSize(t1));
   EXPECT_EQ(256, a.AllocatedSize(t1));
-  a.Deallocate(t1);
+  a.DeallocateRaw(t1);
 }
 
 TEST(GPUBFCAllocatorTest, TestCustomMemoryLimit) {
@@ -172,7 +172,7 @@ TEST(GPUBFCAllocatorTest, TestCustomMemoryLimit) {
 
   EXPECT_NE(nullptr, first_ptr);
   EXPECT_EQ(nullptr, second_ptr);
-  a.Deallocate(first_ptr);
+  a.DeallocateRaw(first_ptr);
 }
 
 }  // namespace
 
               stream_exec->SynchronousMemcpy(&gpu_hdr_ptr, &pi, sizeof(float)));
 
           // Expect error on free.
-          a.Deallocate(gpu_array);
+          a.DeallocateRaw(gpu_array);
         },
         "");
   }
@@ -114,7 +114,7 @@ TEST(GPUDebugAllocatorTest, OverwriteDetection_Footer) {
               stream_exec->SynchronousMemcpy(&gpu_ftr_ptr, &pi, sizeof(float)));
 
           // Expect error on free.
-          a.Deallocate(gpu_array);
+          a.DeallocateRaw(gpu_array);
         },
         "");
   }
@@ -149,7 +149,7 @@ TEST(GPUDebugAllocatorTest, ResetToNan) {
   ASSERT_EQ(1.0, cpu_array_result[0]);
 
   // Free the array
-  a.Deallocate(gpu_array);
+  a.DeallocateRaw(gpu_array);
 
   // All values should be reset to nan.
   ASSERT_TRUE(
@@ -192,7 +192,7 @@ TEST(GPUDebugAllocatorTest, ResetToNanWithHeaderFooter) {
   ASSERT_EQ(1.0, cpu_array_result[0]);
 
   // Free the array
-  a.Deallocate(gpu_array);
+  a.DeallocateRaw(gpu_array);
 
   // All values should be reset to nan.
   ASSERT_TRUE(
@@ -214,7 +214,7 @@ TEST(GPUDebugAllocatorTest, AllocatedVsRequested) {
   float* t1 = a.Allocate<float>(1);
   EXPECT_EQ(4, a.RequestedSize(t1));
   EXPECT_EQ(256, a.AllocatedSize(t1));
-  a.Deallocate(t1);
+  a.DeallocateRaw(t1);
 }
 
 }  // namespace tensorflow
 
@@ -48,8 +48,8 @@ TEST(GPURegionAllocatorTest, Simple) {
   }
   float* t1 = a.Allocate<float>(1024);
   double* t2 = a.Allocate<double>(1048576);
-  a.Deallocate(t1);
-  a.Deallocate(t2);
+  a.Deallocate(t1, 1024);
+  a.Deallocate(t2, 1048576);
 }
 
 TEST(GPURegionAllocatorTest, CheckMemLeak) {
@@ -77,7 +77,7 @@ TEST(GPURegionAllocatorTest, AllocatedVsRequested) {
   // Minimum allocation size if 256
   EXPECT_EQ(256, a.AllocatedSize(t1));
 
-  a.Deallocate(t1);
+  a.Deallocate(t1, 1);
 }
 
 }  // namespace
 
@@ -100,7 +100,7 @@ void GPUUtil::SetProtoFromGPU(const Tensor& tensor, Device* dev,
           }
           tensor_ref.Unref();
           port::CopyFromArray(proto->mutable_tensor_content(), mb, num_bytes);
-          alloc->Deallocate<char>(mb);
+          alloc->Deallocate<char>(mb, num_bytes);
           done(Status::OK());
         });
   } else {
 
@@ -21,6 +21,8 @@ limitations under the License.
 
 #include <limits>
 
+#include "tensorflow/core/framework/numeric_types.h"
+#include "tensorflow/core/framework/type_traits.h"
 #include "tensorflow/core/platform/logging.h"
 #include "tensorflow/core/platform/port.h"
 
@@ -66,9 +68,11 @@ class Allocator {
   // REQUIRES: "ptr" was previously returned by a call to AllocateRaw
   virtual void DeallocateRaw(void* ptr) = 0;
 
-  // Convenience functions to do typed allocation.  Note that these functions
-  // do not invoke C++ constructors or destructors.  May return NULL if the
-  // tensor has too many elements to represent in a single allocation.
+  // Convenience functions to do typed allocation.  C++ constructors
+  // and destructors are invoked for complex types if necessary,
+  // depending on the concrete Allocator implementation. May return
+  // NULL if the tensor has too many elements to represent in a single
+  // allocation.
   template <typename T>
   T* Allocate(size_t num_elements) {
     return Allocate<T>(num_elements, AllocationAttributes());
@@ -86,12 +90,17 @@ class Allocator {
 
     void* p = AllocateRaw(32 /* align to 32 byte boundary */,
                           sizeof(T) * num_elements, allocation_attr);
-    return reinterpret_cast<T*>(p);
+    T* typed_p = reinterpret_cast<T*>(p);
+    if (typed_p) RunCtor<T>(typed_p, num_elements);
+    return typed_p;
   }
 
   template <typename T>
-  void Deallocate(T* ptr) {
-    DeallocateRaw(ptr);
+  void Deallocate(T* ptr, size_t num_elements) {
+    if (ptr) {
+      RunDtor<T>(ptr, num_elements);
+      DeallocateRaw(ptr);
+    }
   }
 
   // Returns true if this allocator tracks the sizes of allocations.
@@ -122,11 +131,63 @@ class Allocator {
   // allocated by this allocator.
   virtual size_t AllocatedSize(void* ptr) { return RequestedSize(ptr); }
 
+  // is_simple<T>::value if T[] can be safely constructed and destructed
+  // directly because std::complex<float> is not trival but its array
+  // can be constructed and destructed without running its default ctor
+  // and dtor.
+  template <typename T>
+  struct is_simple {
+    static const bool value = std::is_trivial<T>::value ||
+                              std::is_same<T, complex64>::value ||
+                              is_quantized<T>::value;
+  };
+
+ private:
+  // No constructors or destructors are run for simple types
+  template <typename T>
+  void RunCtor(T* p, size_t n) {
+    static_assert(is_simple<T>::value, "T is not a simple type.");
+  }
+
+  template <typename T>
+  void RunDtor(T* p, size_t n) {}
+
+  // custom constructors and destructors that can be overridden for
+  // non-standard allocators
+
+  // Runs string's default constructor for  p[0], p[1], ..., p[n-1].
+  virtual void RunStringCtor(string* p, size_t n) {
+    for (size_t i = 0; i < n; ++p, ++i) new (p) string();
+  }
+
+  // Runs string's default destructor for  p[0], p[1], ..., p[n-1].
+  virtual void RunStringDtor(string* p, size_t n) {
+    for (size_t i = 0; i < n; ++p, ++i) p->~string();
+  }
+
   // TODO(jeff): Maybe provide some interface to give info about
   // current allocation state (total number of bytes available for
   // allocation, number of bytes free on device, etc.)
 };
 
+template <>
+struct Allocator::is_simple<bfloat16> {
+  static const bool value = true;
+};
+
+// Allocator-specific constructors and destructors are used for
+// strings
+template <>
+inline void Allocator::RunCtor(string* p, size_t n) {
+  RunStringCtor(p, n);
+}
+
+template <>
+inline void Allocator::RunDtor(string* p, size_t n) {
+  RunStringDtor(p, n);
+}
+
 // A tensorflow Op may need access to different kinds of memory that
 // are not simply a function of the device to which the Op has been
 // assigned.  For example, an Op executing on a GPU may still need
 
@@ -35,8 +35,8 @@ TEST(CPUAllocatorTest, Simple) {
   }
   float* t1 = a->Allocate<float>(1024);
   double* t2 = a->Allocate<double>(1048576);
-  a->Deallocate(t1);
-  a->Deallocate(t2);
+  a->Deallocate(t1, 1024);
+  a->Deallocate(t2, 1048576);
 }
 
 // Define a struct that we will use to observe behavior in the unit tests
 
@@ -76,36 +76,13 @@ class Buffer : public TensorBuffer {
   TF_DISALLOW_COPY_AND_ASSIGN(Buffer);
 };
 
-// is_simple<T>::value if T[] can be safely constructed and destructed
-// without running T() and ~T().  We do not use std::is_trivial<T>
-// directly because std::complex<float> is not trival but its array
-// can be constructed and destructed without running its default ctor
-// and dtor.
-template <typename T>
-struct is_simple {
-  static const bool value = std::is_trivial<T>::value ||
-                            std::is_same<T, complex64>::value ||
-                            is_quantized<T>::value;
-};
-
-template <>
-struct is_simple<bfloat16> {
-  static const bool value = true;
-};
-
 // A set of helper functions depending on T.
 template <typename T>
 struct Helper {
   // By default, we assume T is a simple type (float, int32, etc.)
-  static_assert(is_simple<T>::value, "T is not a simple type.");
+  static_assert(Allocator::is_simple<T>::value, "T is not a simple type.");
   typedef protobuf::RepeatedField<T> RepeatedFieldType;
 
-  // No constructor to run.
-  static void RunCtor(T* p, int n) {}
-
-  // No destructor to run.
-  static void RunDtor(T* p, int n) {}
-
   // Encoder of simple type T to a string.  We do a copy.
   template <typename Destination>
   static void Encode(TensorBuffer* in, int64 n, Destination* out) {
@@ -142,16 +119,6 @@ struct Helper<string> {
   // Proto message uses RepeatedFieldType to hold repeated T.
   typedef protobuf::RepeatedPtrField<string> RepeatedFieldType;
 
-  // Runs string's default constructor for  p[0], p[1], ..., p[n-1].
-  static void RunCtor(string* p, int n) {
-    for (int i = 0; i < n; ++p, ++i) new (p) string();
-  }
-
-  // Runs T's default destructor for  p[0], p[1], ..., p[n-1].
-  static void RunDtor(string* p, int n) {
-    for (int i = 0; i < n; ++p, ++i) p->~string();
-  }
-
   // Encodes "n" elements of type string stored in "in" into Cord
   // "out", which is usually the TensorProto::tensor_content.
   template <typename Destination>
@@ -273,23 +240,16 @@ struct ProtoHelper<bfloat16> {
 
 template <typename T>
 Buffer<T>::Buffer(Allocator* a, int64 n)
-    : alloc_(a), data_(a->Allocate<T>(n)), elem_(n) {
-  if (data_) Helper<T>::RunCtor(data_, elem_);
-}
+    : alloc_(a), data_(a->Allocate<T>(n)), elem_(n) {}
 
 template <typename T>
 Buffer<T>::Buffer(Allocator* a, int64 n,
                   const AllocationAttributes& allocation_attr)
-    : alloc_(a), data_(a->Allocate<T>(n, allocation_attr)), elem_(n) {
-  if (data_) Helper<T>::RunCtor(data_, elem_);
-}
+    : alloc_(a), data_(a->Allocate<T>(n, allocation_attr)), elem_(n) {}
 
 template <typename T>
 Buffer<T>::~Buffer() {
-  if (data_) {
-    Helper<T>::RunDtor(data_, elem_);
-    alloc_->Deallocate<T>(data_);
-  }
+  alloc_->Deallocate<T>(data_, elem_);
 }
 
 // Allocates a T[n] buffer. Fills in the buffer with repeated values
@@ -542,7 +502,7 @@ size_t Tensor::TotalBytes() const {
 }
 
 bool Tensor::CanUseDMA() const {
-  CASES(dtype(), return is_simple<T>::value);
+  CASES(dtype(), return Allocator::is_simple<T>::value);
   return false;  // Makes compiler happy.
 }
Original file line number	Diff line number	Diff line change
`@@ -48,8 +48,8 @@ TEST(GPURegionAllocatorTest, Simple) {`
`48`	`48`	`}`
`49`	`49`	`float* t1 = a.Allocate<float>(1024);`
`50`	`50`	`double* t2 = a.Allocate<double>(1048576);`
`51`		`- a.Deallocate(t1);`
`52`		`- a.Deallocate(t2);`
	`51`	`+ a.Deallocate(t1, 1024);`
	`52`	`+ a.Deallocate(t2, 1048576);`
`53`	`53`	`}`
`54`	`54`
`55`	`55`	`TEST(GPURegionAllocatorTest, CheckMemLeak) {`
`@@ -77,7 +77,7 @@ TEST(GPURegionAllocatorTest, AllocatedVsRequested) {`
`77`	`77`	`// Minimum allocation size if 256`
`78`	`78`	`EXPECT_EQ(256, a.AllocatedSize(t1));`
`79`	`79`
`80`		`- a.Deallocate(t1);`
	`80`	`+ a.Deallocate(t1, 1);`
`81`	`81`	`}`
`82`	`82`
`83`	`83`	`} // namespace`
Original file line number	Diff line number	Diff line change
`@@ -100,7 +100,7 @@ void GPUUtil::SetProtoFromGPU(const Tensor& tensor, Device* dev,`
`100`	`100`	`}`
`101`	`101`	`tensor_ref.Unref();`
`102`	`102`	`port::CopyFromArray(proto->mutable_tensor_content(), mb, num_bytes);`
`103`		`- alloc->Deallocate<char>(mb);`
	`103`	`+ alloc->Deallocate<char>(mb, num_bytes);`
`104`	`104`	`done(Status::OK());`
`105`	`105`	`});`
`106`	`106`	`} else {`
Original file line number	Diff line number	Diff line change
`@@ -35,8 +35,8 @@ TEST(CPUAllocatorTest, Simple) {`
`35`	`35`	`}`
`36`	`36`	`float* t1 = a->Allocate<float>(1024);`
`37`	`37`	`double* t2 = a->Allocate<double>(1048576);`
`38`		`- a->Deallocate(t1);`
`39`		`- a->Deallocate(t2);`
	`38`	`+ a->Deallocate(t1, 1024);`
	`39`	`+ a->Deallocate(t2, 1048576);`
`40`	`40`	`}`
`41`	`41`
`42`	`42`	`// Define a struct that we will use to observe behavior in the unit tests`