tensorflow · copybara-service · May 17, 2025
diff --git a/tensorflow/compiler/mlir/tools/kernel_gen/kernel_creator.cc b/tensorflow/compiler/mlir/tools/kernel_gen/kernel_creator.cc
@@ -342,7 +342,7 @@ absl::Status LowerKernelBodiesToLowLevelIr(mlir::ModuleOp module,
   kernelPm.addPass(mlir::createGpuKernelToRocdlPass(architecture));
 #elif GOOGLE_CUDA
   kernelPm.addPass(mlir::createGpuKernelToNvvmPass());
-  kernelPm.addPass(mlir::NVVM::createOptimizeForTargetPass());
+  kernelPm.addPass(mlir::LLVM::createNVVMOptimizeForTargetPass());
 #endif
   // Remove all location information to prevent a debug build.
   pm.addPass(::mlir::createStripDebugInfoPass());

diff --git a/tensorflow/compiler/mlir/tools/kernel_gen/tests/buffer_reuse.mlir b/tensorflow/compiler/mlir/tools/kernel_gen/tests/buffer_reuse.mlir
@@ -534,7 +534,7 @@ func.func @abs_f32(%arg0: memref<*xf32>) -> memref<*xf32>
     %12 = math.absf %arg1 : f32
     linalg.yield %12 : f32
   }
-  %10 = bufferization.to_memref %0 : tensor<?xindex> to memref<?xindex>
+  %10 = bufferization.to_buffer %0 : tensor<?xindex> to memref<?xindex>
   %11 = memref.reshape %9(%10)
       : (memref<?xf32>, memref<?xindex>) -> memref<*xf32>
   func.return %11 : memref<*xf32>

diff --git a/third_party/llvm/generated.patch b/third_party/llvm/generated.patch
@@ -1,49 +1,206 @@
 Auto generated patch. Do not edit or delete it, even if empty.
-diff -ruN --strip-trailing-cr a/clang/lib/Sema/SemaDecl.cpp b/clang/lib/Sema/SemaDecl.cpp
---- a/clang/lib/Sema/SemaDecl.cpp
-+++ b/clang/lib/Sema/SemaDecl.cpp
-@@ -4755,8 +4755,16 @@
-         return;
-     }
-   } else {
--    Diag(New->getLocation(), diag::warn_cxx_compat_tentative_definition) << New;
--    Diag(Old->getLocation(), diag::note_previous_declaration);
-+    // C++ may not have a tentative definition rule, but it has a different
-+    // rule about what constitutes a definition in the first place. See
-+    // [basic.def]p2 for details, but the basic idea is: if the old declaration
-+    // contains the extern specifier and doesn't have an initializer, it's fine
-+    // in C++.
-+    if (Old->getStorageClass() != SC_Extern || Old->hasInit()) {
-+      Diag(New->getLocation(), diag::warn_cxx_compat_tentative_definition)
-+          << New;
-+      Diag(Old->getLocation(), diag::note_previous_declaration);
+diff -ruN --strip-trailing-cr a/clang/include/clang/Sema/Overload.h b/clang/include/clang/Sema/Overload.h
+--- a/clang/include/clang/Sema/Overload.h
++++ b/clang/include/clang/Sema/Overload.h
+@@ -430,8 +430,15 @@
+       if (!ReferenceBinding) {
+ #ifndef NDEBUG
+         auto Decay = [&](QualType T) {
+-          return (T->isArrayType() || T->isFunctionType()) ? C.getDecayedType(T)
+-                                                           : T;
++          if (T->isArrayType() || T->isFunctionType())
++            T = C.getDecayedType(T);
++
++          // A function pointer type can be resolved to a member function type,
++          // which is still an identity conversion.
++          if (auto *N = T->getAs<MemberPointerType>();
++              N && N->isMemberFunctionPointer())
++            T = C.getDecayedType(N->getPointeeType());
++          return T;
+         };
+         // The types might differ if there is an array-to-pointer conversion
+         // an function-to-pointer conversion, or lvalue-to-rvalue conversion.
+diff -ruN --strip-trailing-cr a/clang/test/SemaCXX/overload-resolution-deferred-templates.cpp b/clang/test/SemaCXX/overload-resolution-deferred-templates.cpp
+--- a/clang/test/SemaCXX/overload-resolution-deferred-templates.cpp
++++ b/clang/test/SemaCXX/overload-resolution-deferred-templates.cpp
+@@ -232,3 +232,45 @@
+ };
+
+ InitListAreNotPerfectCpy InitListAreNotPerfectCpy_test({InitListAreNotPerfectCpy{}});
++
++namespace PointerToMemFunc {
++template <typename>
++class A;
++struct N {
++  template <typename T>
++  void f(T);
++};
++template <typename T>
++struct E {
++  template <class = A<int>>
++  void g() = delete;
++  void g(void (T::*)(char));
++};
++void f() {
++  E<N> e;
++  e.g(&N::f);
++}
++}
++
++#if __cplusplus >= 201402
++namespace PointerToMemData {
++struct N {
++  int field;
++};
++template <typename It, typename T>
++struct B {
++  B(It, T);
++  template <typename It2>
++  B(B<It2, T>);
++};
++template <typename T>
++struct C {
++  auto g() { return B<int, T>(0, T{}); }
++};
++void f() {
++  using T = decltype(C<decltype(&N::field)>{}.g());
++}
++
++}
++
++#endif
+diff -ruN --strip-trailing-cr a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp
+--- a/llvm/lib/CodeGen/CodeGenPrepare.cpp
++++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp
+@@ -5771,6 +5771,35 @@
+   return false;
+ }
+
++// Find an insert position of Addr for MemoryInst. We can't guarantee MemoryInst
++// is the first instruction that will use Addr. So we need to find the first
++// user of Addr in current BB.
++static BasicBlock::iterator findInsertPos(Value *Addr, Instruction *MemoryInst,
++                                          Value *SunkAddr) {
++  if (Addr->hasOneUse())
++    return MemoryInst->getIterator();
++
++  // We already have a SunkAddr in current BB, but we may need to insert cast
++  // instruction after it.
++  if (SunkAddr) {
++    if (Instruction *AddrInst = dyn_cast<Instruction>(SunkAddr))
++      return std::next(AddrInst->getIterator());
++  }
++
++  // Find the first user of Addr in current BB.
++  Instruction *Earliest = MemoryInst;
++  for (User *U : Addr->users()) {
++    Instruction *UserInst = dyn_cast<Instruction>(U);
++    if (UserInst && UserInst->getParent() == MemoryInst->getParent()) {
++      if (isa<PHINode>(UserInst) || UserInst->isDebugOrPseudoInst())
++        continue;
++      if (UserInst->comesBefore(Earliest))
++        Earliest = UserInst;
 +    }
++  }
++  return Earliest->getIterator();
++}
++
+ /// Sink addressing mode computation immediate before MemoryInst if doing so
+ /// can be done without increasing register pressure.  The need for the
+ /// register pressure constraint means this can end up being an all or nothing
+@@ -5895,11 +5924,6 @@
+     return Modified;
    }
 
-   if (haveIncompatibleLanguageLinkages(Old, New)) {
-diff -ruN --strip-trailing-cr a/clang/test/Sema/warn-tentative-defn-compat.c b/clang/test/Sema/warn-tentative-defn-compat.c
---- a/clang/test/Sema/warn-tentative-defn-compat.c
-+++ b/clang/test/Sema/warn-tentative-defn-compat.c
-@@ -20,4 +20,7 @@
-                cxx-error {{redefinition of 'k'}}
+-  // Insert this computation right after this user.  Since our caller is
+-  // scanning from the top of the BB to the bottom, reuse of the expr are
+-  // guaranteed to happen later.
+-  IRBuilder<> Builder(MemoryInst);
+-
+   // Now that we determined the addressing expression we want to use and know
+   // that we have to sink it into this block.  Check to see if we have already
+   // done this for some other load/store instr in this block.  If so, reuse
+@@ -5910,6 +5934,13 @@
+
+   Value *SunkAddr = SunkAddrVH.pointsToAliveValue() ? SunkAddrVH : nullptr;
+   Type *IntPtrTy = DL->getIntPtrType(Addr->getType());
++
++  // The current BB may be optimized multiple times, we can't guarantee the
++  // reuse of Addr happens later, call findInsertPos to find an appropriate
++  // insert position.
++  IRBuilder<> Builder(MemoryInst->getParent(),
++                      findInsertPos(Addr, MemoryInst, SunkAddr));
++
+   if (SunkAddr) {
+     LLVM_DEBUG(dbgs() << "CGP: Reusing nonlocal addrmode: " << AddrMode
+                       << " for " << *MemoryInst << "\n");
+diff -ruN --strip-trailing-cr a/llvm/test/Transforms/CodeGenPrepare/X86/sink-addr-reuse.ll b/llvm/test/Transforms/CodeGenPrepare/X86/sink-addr-reuse.ll
+--- a/llvm/test/Transforms/CodeGenPrepare/X86/sink-addr-reuse.ll
++++ b/llvm/test/Transforms/CodeGenPrepare/X86/sink-addr-reuse.ll
+@@ -0,0 +1,44 @@
++; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
++; RUN: opt -S -p 'require<profile-summary>,codegenprepare' -cgpp-huge-func=0 < %s | FileCheck %s
++
++target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
++target triple = "x86_64-grtev4-linux-gnu"
++
++declare void @g(ptr)
++
++; %load and %load5 use the same address, %load5 is optimized first, %load is
++; optimized later and reuse the same address computation instruction. We must
++; make sure not to generate use before def error.
++
++define void @f(ptr %arg) {
++; CHECK-LABEL: define void @f(
++; CHECK-SAME: ptr [[ARG:%.*]]) {
++; CHECK-NEXT:  [[BB:.*:]]
++; CHECK-NEXT:    [[GETELEMENTPTR:%.*]] = getelementptr i8, ptr [[ARG]], i64 -64
++; CHECK-NEXT:    call void @g(ptr [[GETELEMENTPTR]])
++; CHECK-NEXT:    [[SUNKADDR1:%.*]] = getelementptr i8, ptr [[ARG]], i64 -64
++; CHECK-NEXT:    [[LOAD:%.*]] = load ptr, ptr [[SUNKADDR1]], align 8
++; CHECK-NEXT:    [[SUNKADDR:%.*]] = getelementptr i8, ptr [[ARG]], i64 -56
++; CHECK-NEXT:    [[LOAD4:%.*]] = load i32, ptr [[SUNKADDR]], align 8
++; CHECK-NEXT:    [[LOAD5:%.*]] = load ptr, ptr [[SUNKADDR1]], align 8
++; CHECK-NEXT:    [[TMP0:%.*]] = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 1, i32 0)
++; CHECK-NEXT:    [[MATH:%.*]] = extractvalue { i32, i1 } [[TMP0]], 0
++; CHECK-NEXT:    ret void
++;
++bb:
++  %getelementptr = getelementptr i8, ptr %arg, i64 -64
++  %getelementptr1 = getelementptr i8, ptr %arg, i64 -56
++  call void @g(ptr %getelementptr)
++  br label %bb3
++
++bb3:
++  %load = load ptr, ptr %getelementptr, align 8
++  %load4 = load i32, ptr %getelementptr1, align 8
++  %load5 = load ptr, ptr %getelementptr, align 8
++  %add = add i32 1, 0
++  %icmp = icmp eq i32 %add, 0
++  br i1 %icmp, label %bb7, label %bb7
++
++bb7:
++  ret void
++}
+diff -ruN --strip-trailing-cr a/mlir/include/mlir/Query/Matcher/SliceMatchers.h b/mlir/include/mlir/Query/Matcher/SliceMatchers.h
+--- a/mlir/include/mlir/Query/Matcher/SliceMatchers.h
++++ b/mlir/include/mlir/Query/Matcher/SliceMatchers.h
+@@ -14,6 +14,7 @@
+ #define MLIR_TOOLS_MLIRQUERY_MATCHERS_SLICEMATCHERS_H
+
+ #include "mlir/Analysis/SliceAnalysis.h"
++#include "mlir/IR/Operation.h"
 
- // Cannot have two declarations with initializers, that is a redefinition in
--// both C and C++.
-+// both C and C++. However, C++ does have a different definition of what makes
-+// a declaration a definition.
-+extern const int a;
-+const int a = 12; // Okay in C and C++
-diff -ruN --strip-trailing-cr a/mlir/lib/TableGen/Pattern.cpp b/mlir/lib/TableGen/Pattern.cpp
---- a/mlir/lib/TableGen/Pattern.cpp
-+++ b/mlir/lib/TableGen/Pattern.cpp
-@@ -304,8 +304,8 @@
-     assert(index < 0);
-     auto *operand = cast<NamedTypeConstraint *>(op->getArg(getArgIndex()));
-     if (operand->isOptional()) {
--      auto repl =
--          formatv(fmt, formatv("({0}.empty() ? Value() : *{0}.begin())", name));
-+      auto repl = formatv(
-+          fmt, formatv("({0}.empty() ? ::mlir::Value() : *{0}.begin())", name));
-       LLVM_DEBUG(dbgs() << repl << " (OptionalOperand)\n");
-       return std::string(repl);
-     }
+ /// A matcher encapsulating `getBackwardSlice` method from SliceAnalysis.h.
+ /// Additionally, it limits the slice computation to a certain depth level using
+diff -ruN --strip-trailing-cr a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel
+--- a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel
++++ b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel
+@@ -12874,6 +12874,7 @@
+     ),
+     includes = ["include"],
+     deps = [
++        ":Analysis",
+         ":FuncDialect",
+         ":IR",
+         ":Reducer",
diff --git a/third_party/llvm/workspace.bzl b/third_party/llvm/workspace.bzl
@@ -4,8 +4,8 @@ load("//third_party:repo.bzl", "tf_http_archive")
 
 def repo(name):
     """Imports LLVM."""
-    LLVM_COMMIT = "52ed6791f87a3ef862f555f84ba88a7cdf8fe461"
-    LLVM_SHA256 = "5f4230b06dd2ff977919f26e2deb0b82da00f0a3265f60ac206743169693e933"
+    LLVM_COMMIT = "70ef89b9137e03b86cd49fd221cb8c0324984684"
+    LLVM_SHA256 = "1afc4d7133bd40c25ab3f5406db98a1e249cca744bddfc62cb6e91e21b1ba811"
 
     tf_http_archive(
         name = name,