8000 Integrate LLVM at llvm/llvm-project@70ef89b9137e by copybara-service[bot] · Pull Request #93560 · tensorflow/tensorflow · GitHub
[go: up one dir, main page]

Skip to content
8000

Integrate LLVM at llvm/llvm-project@70ef89b9137e #93560

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
10000 Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -342,7 +342,7 @@ absl::Status LowerKernelBodiesToLowLevelIr(mlir::ModuleOp module,
kernelPm.addPass(mlir::createGpuKernelToRocdlPass(architecture));
#elif GOOGLE_CUDA
kernelPm.addPass(mlir::createGpuKernelToNvvmPass());
kernelPm.addPass(mlir::NVVM::createOptimizeForTargetPass());
kernelPm.addPass(mlir::LLVM::createNVVMOptimizeForTargetPass());
#endif
// Remove all location information to prevent a debug build.
pm.addPass(::mlir::createStripDebugInfoPass());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -534,7 +534,7 @@ func.func @abs_f32(%arg0: memref<*xf32>) -> memref<*xf32>
%12 = math.absf %arg1 : f32
linalg.yield %12 : f32
}
%10 = bufferization.to_memref %0 : tensor<?xindex> to memref<?xindex>
%10 = bufferization.to_buffer %0 : tensor<?xindex> to memref<?xindex>
%11 = memref.reshape %9(%10)
: (memref<?xf32>, memref<?xindex>) -> memref<*xf32>
func.return %11 : memref<*xf32>
Expand Down
245 changes: 201 additions & 44 deletions third_party/llvm/generated.patch
Original file line number Diff line number Diff line change
@@ -1,49 +1,206 @@
Auto generated patch. Do not edit or delete it, even if empty.
diff -ruN --strip-trailing-cr a/clang/lib/Sema/SemaDecl.cpp b/clang/lib/Sema/SemaDecl.cpp
--- a/clang/lib/Sema/SemaDecl.cpp
+++ b/clang/lib/Sema/SemaDecl.cpp
@@ -4755,8 +4755,16 @@
return;
}
} else {
- Diag(New->getLocation(), diag::warn_cxx_compat_tentative_definition) << New;
- Diag(Old->getLocation(), diag::note_previous_declaration);
+ // C++ may not have a tentative definition rule, but it has a different
+ // rule about what constitutes a definition in the first place. See
+ // [basic.def]p2 for details, but the basic idea is: if the old declaration
+ // contains the extern specifier and doesn't have an initializer, it's fine
+ // in C++.
+ if (Old->getStorageClass() != SC_Extern || Old->hasInit()) {
+ Diag(New->getLocation(), diag::warn_cxx_compat_tentative_definition)
+ << New;
+ Diag(Old->getLocation(), diag::note_previous_declaration);
diff -ruN --strip-trailing-cr a/clang/include/clang/Sema/Overload.h b/clang/include/clang/Sema/Overload.h
--- a/clang/include/clang/Sema/Overload.h
+++ b/clang/include/clang/Sema/Overload.h
@@ -430,8 +430,15 @@
if (!ReferenceBinding) {
#ifndef NDEBUG
auto Decay = [&](QualType T) {
- return (T->isArrayType() || T->isFunctionType()) ? C.getDecayedType(T)
- : T;
+ if (T->isArrayType() || T->isFunctionType())
+ T = C.getDecayedType(T);
+
+ // A function pointer type can be resolved to a member function type,
+ // which is still an identity conversion.
+ if (auto *N = T->getAs<MemberPointerType>();
+ N && N->isMemberFunctionPointer())
+ T = C.getDecayedType(N->getPointeeType());
+ return T;
};
// The types might differ if there is an array-to-pointer conversion
// an function-to-pointer conversion, or lvalue-to-rvalue conversion.
diff -ruN --strip-trailing-cr a/clang/test/SemaCXX/overload-resolution-deferred-templates.cpp b/clang/test/SemaCXX/overload-resolution-deferred-templates.cpp
--- a/clang/test/SemaCXX/overload-resolution-deferred-templates.cpp
+++ b/clang/test/SemaCXX/overload-resolution-deferred-templates.cpp
@@ -232,3 +232,45 @@
};

InitListAreNotPerfectCpy InitListAreNotPerfectCpy_test({InitListAreNotPerfectCpy{}});
+
+namespace PointerToMemFunc {
+template <typename>
+class A;
+struct N {
+ template <typename T>
+ void f(T);
+};
+template <typename T>
+struct E {
+ template <class = A<int>>
+ void g() = delete;
+ void g(void (T::*)(char));
+};
+void f() {
+ E<N> e;
+ e.g(&N::f);
+}
+}
+
+#if __cplusplus >= 201402
+namespace PointerToMemData {
+struct N {
+ int field;
+};
+template <typename It, typename T>
+struct B {
+ B(It, T);
+ template <typename It2>
+ B(B<It2, T>);
+};
+template <typename T>
+struct C {
+ auto g() { return B<int, T>(0, T{}); }
+};
+void f() {
+ using T = decltype(C<decltype(&N::field)>{}.g());
+}
+
+}
+
+#endif
diff -ruN --strip-trailing-cr a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp
--- a/llvm/lib/CodeGen/CodeGenPrepare.cpp
+++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp
@@ -5771,6 +5771,35 @@
return false;
}

+// Find an insert position of Addr for MemoryInst. We can't guarantee MemoryInst
+// is the first instruction that will use Addr. So we need to find the first
+// user of Addr in current BB.
+static BasicBlock::iterator findInsertPos(Value *Addr, Instruction *MemoryInst,
+ Value *SunkAddr) {
+ if (Addr->hasOneUse())
+ return MemoryInst->getIterator();
+
+ // We already have a SunkAddr in current BB, but we may need to insert cast
+ // instruction after it.
+ if (SunkAddr) {
+ if (Instruction *AddrInst = dyn_cast<Instruction>(SunkAddr))
+ return std::next(AddrInst->getIterator());
+ }
+
+ // Find the first user of Addr in current BB.
+ Instruction *Earliest = MemoryInst;
+ for (User *U : Addr->users()) {
+ Instruction *UserInst = dyn_cast<Instruction>(U);
+ if (UserInst && UserInst->getParent() == MemoryInst->getParent()) {
+ if (isa<PHINode>(UserInst) || UserInst->isDebugOrPseudoInst())
+ continue;
+ if (UserInst->comesBefore(Earliest))
+ Earliest = UserInst;
+ }
+ }
+ return Earliest->getIterator();
+}
+
/// Sink addressing mode computation immediate before MemoryInst if doing so
/// can be done without increasing register pressure. The need for the
/// register pressure constraint means this can end up being an all or nothing
@@ -5895,11 +5924,6 @@
return Modified;
}

if (haveIncompatibleLanguageLinkages(Old, New)) {
diff -ruN --strip-trailing-cr a/clang/test/Sema/warn-tentative-defn-compat.c b/clang/test/Sema/warn-tentative-defn-compat.c
--- a/clang/test/Sema/warn-tentative-defn-compat.c
+++ b/clang/test/Sema/warn-tentative-defn-compat.c
@@ -20,4 +20,7 @@
cxx-error {{redefinition of 'k'}}
- // Insert this computation right after this user. Since our caller is
- // scanning from the top of the BB to the bottom, reuse of the expr are
- // guaranteed to happen later.
- IRBuilder<> Builder(MemoryInst);
-
// Now that we determined the addressing expression we want to use and know
// that we have to sink it into this block. Check to see if we have already
// done this for some other load/store instr in this block. If so, reuse
@@ -5910,6 +5934,13 @@

Value *SunkAddr = SunkAddrVH.pointsToAliveValue() ? SunkAddrVH : nullptr;
Type *IntPtrTy = DL->getIntPtrType(Addr->getType());
+
+ // The current BB may be optimized multiple times, we can't guarantee the
+ // reuse of Addr happens later, call findInsertPos to find an appropriate
+ // insert position.
+ IRBuilder<> Builder(MemoryInst->getParent(),
+ findInsertPos(Addr, MemoryInst, SunkAddr));
+
if (SunkAddr) {
LLVM_DEBUG(dbgs() << "CGP: Reusing nonlocal addrmode: " << AddrMode
<< " for " << *MemoryInst << "\n");
diff -ruN --strip-trailing-cr a/llvm/test/Transforms/CodeGenPrepare/X86/sink-addr-reuse.ll b/llvm/test/Transforms/CodeGenPrepare/X86/sink-addr-reuse.ll
--- a/llvm/test/Transforms/CodeGenPrepare/X86/sink-addr-reuse.ll
+++ b/llvm/test/Transforms/CodeGenPrepare/X86/sink-addr-reuse.ll
@@ -0,0 +1,44 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -S -p 'require<profile-summary>,codegenprepare' -cgpp-huge-func=0 < %s | FileCheck %s
+
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-grtev4-linux-gnu"
+
+declare void @g(ptr)
+
+; %load and %load5 use the same address, %load5 is optimized first, %load is
+; optimized later and reuse the same address computation instruction. We must
+; make sure not to generate use before def error.
+
+define void @f(ptr %arg) {
+; CHECK-LABEL: define void @f(
+; CHECK-SAME: ptr [[ARG:%.*]]) {
+; CHECK-NEXT: [[BB:.*:]]
+; CHECK-NEXT: [[GETELEMENTPTR:%.*]] = getelementptr i8, ptr [[ARG]], i64 -64
+; CHECK-NEXT: call void @g(ptr [[GETELEMENTPTR]])
+; CHECK-NEXT: [[SUNKADDR1:%.*]] = getelementptr i8, ptr [[ARG]], i64 -64
+; CHECK-NEXT: [[LOAD:%.*]] = load ptr, ptr [[SUNKADDR1]], align 8
+; CHECK-NEXT: [[SUNKADDR:%.*]] = getelementptr i8, ptr [[ARG]], i64 -56
+; CHECK-NEXT: [[LOAD4:%.*]] = load i32, ptr [[SUNKADDR]], align 8
+; CHECK-NEXT: [[LOAD5:%.*]] = load ptr, ptr [[SUNKADDR1]], align 8
+; CHECK-NEXT: [[TMP0:%.*]] = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 1, i32 0)
+; CHECK-NEXT: [[MATH:%.*]] = extractvalue { i32, i1 } [[TMP0]], 0
+; CHECK-NEXT: ret void
+;
+bb:
+ %getelementptr = getelementptr i8, ptr %arg, i64 -64
+ %getelementptr1 = getelementptr i8, ptr %arg, i64 -56
+ call void @g(ptr %getelementptr)
+ br label %bb3
+
+bb3:
+ %load = load ptr, ptr %getelementptr, align 8
+ %load4 = load i32, ptr %getelementptr1, align 8
+ %load5 = load ptr, ptr %getelementptr, align 8
+ %add = add i32 1, 0
+ %icmp = icmp eq i32 %add, 0
+ br i1 %icmp, label %bb7, label %bb7
+
+bb7:
+ ret void
+}
diff -ruN --strip-trailing-cr a/mlir/include/mlir/Query/Matcher/SliceMatchers.h b/mlir/include/mlir/Query/Matcher/SliceMatchers.h
--- a/mlir/include/mlir/Query/Matcher/SliceMatchers.h
+++ b/mlir/include/mlir/Query/Matcher/SliceMatchers.h
@@ -14,6 +14,7 @@
#define MLIR_TOOLS_MLIRQUERY_MATCHERS_SLICEMATCHERS_H

#include "mlir/Analysis/SliceAnalysis.h"
+#include "mlir/IR/Operation.h"

// Cannot have two declarations with initializers, that is a redefinition in
-// both C and C++.
+// both C and C++. However, C++ does have a different definition of what makes
+// a declaration a definition.
+extern const int a;
+const int a = 12; // Okay in C and C++
diff -ruN --strip-trailing-cr a/mlir/lib/TableGen/Pattern.cpp b/mlir/lib/TableGen/Pattern.cpp
--- a/mlir/lib/TableGen/Pattern.cpp
+++ b/mlir/lib/TableGen/Pattern.cpp
@@ -304,8 +304,8 @@
assert(index < 0);
auto *operand = cast<NamedTypeConstraint *>(op->getArg(getArgIndex()));
if (operand->isOptional()) {
- auto repl =
- formatv(fmt, formatv("({0}.empty() ? Value() : *{0}.begin())", name));
+ auto repl = formatv(
+ fmt, formatv("({0}.empty() ? ::mlir::Value() : *{0}.begin())", name));
LLVM_DEBUG(dbgs() << repl << " (OptionalOperand)\n");
return std::string(repl);
}
/// A matcher encapsulating `getBackwardSlice` method from SliceAnalysis.h.
/// Additionally, it limits the slice computation to a certain depth level using
diff -ruN --strip-trailing-cr a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel
--- a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel
+++ b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel
@@ -12874,6 +12874,7 @@
),
includes = ["include"],
deps = [
+ ":Analysis",
":FuncDialect",
":IR",
":Reducer",
4 changes: 2 additions & 2 deletions third_party/llvm/workspace.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@ load("//third_party:repo.bzl", "tf_http_archive")

def repo(name):
"""Imports LLVM."""
LLVM_COMMIT = "52ed6791f87a3ef862f555f84ba88a7cdf8fe461"
LLVM_SHA256 = "5f4230b06dd2ff977919f26e2deb0b82da00f0a3265f60ac206743169693e933"
LLVM_COMMIT = "70ef89b9137e03b86cd49fd221cb8c0324984684"
LLVM_SHA256 = "1afc4d7133bd40c25ab3f5406db98a1e249cca744bddfc62cb6e91e21b1ba811"

tf_http_archive(
name = name,
Expand Down
Loading
0