diff --git a/flang/include/flang/Lower/AbstractConverter.h b/flang/include/flang/Lower/AbstractConverter.h index 796933a4eb5f6..55bc33e76e5f6 100644 --- a/flang/include/flang/Lower/AbstractConverter.h +++ b/flang/include/flang/Lower/AbstractConverter.h @@ -16,6 +16,7 @@ #include "flang/Common/Fortran.h" #include "flang/Lower/LoweringOptions.h" #include "flang/Lower/PFTDefs.h" +#include "flang/Lower/SymbolMap.h" #include "flang/Optimizer/Builder/BoxValue.h" #include "flang/Semantics/symbol.h" #include "mlir/IR/Builders.h" @@ -296,6 +297,9 @@ class AbstractConverter { return loweringOptions; } + virtual Fortran::lower::SymbolBox + lookupOneLevelUpSymbol(const Fortran::semantics::Symbol &sym) = 0; + private: /// Options controlling lowering behavior. const Fortran::lower::LoweringOptions &loweringOptions; diff --git a/flang/include/flang/Lower/SymbolMap.h b/flang/include/flang/Lower/SymbolMap.h index a55e4b133fe0a..9f18d63ce3e7d 100644 --- a/flang/include/flang/Lower/SymbolMap.h +++ b/flang/include/flang/Lower/SymbolMap.h @@ -312,6 +312,7 @@ class SymMap { lookupVariableDefinition(semantics::SymbolRef sym) { if (auto symBox = lookupSymbol(sym)) return symBox.getIfFortranVariableOpInterface(); + return std::nullopt; } diff --git a/flang/lib/Lower/Bridge.cpp b/flang/lib/Lower/Bridge.cpp index 579f94ba75684..226792c9f346e 100644 --- a/flang/lib/Lower/Bridge.cpp +++ b/flang/lib/Lower/Bridge.cpp @@ -1052,7 +1052,10 @@ class FirConverter : public Fortran::lower::AbstractConverter { if (sym.detailsIf()) return symMap->lookupSymbol(sym); - return {}; + // With delayed privatization, Fortran symbols might now be mapped to + // simple `mlir::Value`s (arguments to the `omp.private` ops in this + // case). Therefore, it is possible that none of the above cases applies. + // return {}; } if (Fortran::lower::SymbolBox v = symMap->lookupSymbol(sym)) return v; @@ -1070,7 +1073,7 @@ class FirConverter : public Fortran::lower::AbstractConverter { /// Find the symbol in one level up of symbol map such as for host-association /// in OpenMP code or return null. Fortran::lower::SymbolBox - lookupOneLevelUpSymbol(const Fortran::semantics::Symbol &sym) { + lookupOneLevelUpSymbol(const Fortran::semantics::Symbol &sym) override { if (Fortran::lower::SymbolBox v = localSymbols.lookupOneLevelUpSymbol(sym)) return v; return {}; diff --git a/flang/lib/Lower/OpenMP.cpp b/flang/lib/Lower/OpenMP.cpp index 0a68aba162618..81160086b1e83 100644 --- a/flang/lib/Lower/OpenMP.cpp +++ b/flang/lib/Lower/OpenMP.cpp @@ -32,6 +32,7 @@ #include "mlir/Dialect/SCF/IR/SCF.h" #include "mlir/Transforms/RegionUtils.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/StringSet.h" #include "llvm/Frontend/OpenMP/OMPConstants.h" #include "llvm/Support/CommandLine.h" @@ -40,6 +41,12 @@ static llvm::cl::opt treatIndexAsSection( llvm::cl::desc("In the OpenMP data clauses treat `a(N)` as `a(N:N)`."), llvm::cl::init(true)); +static llvm::cl::opt enableDelayedPrivatization( + "openmp-enable-delayed-privatization", + llvm::cl::desc( + "Emit `[first]private` variables as clauses on the MLIR ops."), + llvm::cl::init(false)); + using DeclareTargetCapturePair = std::pair; @@ -147,6 +154,14 @@ static void genNestedEvaluations(Fortran::lower::AbstractConverter &converter, //===----------------------------------------------------------------------===// class DataSharingProcessor { +public: + struct DelayedPrivatizationInfo { + llvm::SetVector privatizers; + llvm::SetVector hostAddresses; + llvm::SetVector hostSymbols; + }; + +private: bool hasLastPrivateOp; mlir::OpBuilder::InsertPoint lastPrivIP; mlir::OpBuilder::InsertPoint insPt; @@ -161,6 +176,12 @@ class DataSharingProcessor { const Fortran::parser::OmpClauseList &opClauseList; Fortran::lower::pft::Evaluation &eval; + bool useDelayedPrivatization; + llvm::SetVector existingPrivatizerNames; + Fortran::lower::SymMap *symTable; + + DelayedPrivatizationInfo delayedPrivatizationInfo; + bool needBarrier(); void collectSymbols(Fortran::semantics::Symbol::Flag flag); void collectOmpObjectListSymbol( @@ -171,6 +192,8 @@ class DataSharingProcessor { void collectDefaultSymbols(); void privatize(); void defaultPrivatize(); + void doPrivatize(const Fortran::semantics::Symbol *sym); + void copyLastPrivatize(mlir::Operation *op); void insertLastPrivateCompare(mlir::Operation *op); void cloneSymbol(const Fortran::semantics::Symbol *sym); @@ -182,10 +205,20 @@ class DataSharingProcessor { public: DataSharingProcessor(Fortran::lower::AbstractConverter &converter, const Fortran::parser::OmpClauseList &opClauseList, - Fortran::lower::pft::Evaluation &eval) + Fortran::lower::pft::Evaluation &eval, + bool useDelayedPrivatization = false, + Fortran::lower::SymMap *symTable = nullptr) : hasLastPrivateOp(false), converter(converter), firOpBuilder(converter.getFirOpBuilder()), opClauseList(opClauseList), - eval(eval) {} + eval(eval), useDelayedPrivatization(useDelayedPrivatization), + symTable(symTable) { + for (auto privateOp : converter.getModuleOp() + .getRegion() + .getOps()) { + existingPrivatizerNames.insert(privateOp.getSymName()); + } + } + // Privatisation is split into two steps. // Step1 performs cloning of all privatisation clauses and copying for // firstprivates. Step1 is performed at the place where process/processStep1 @@ -204,6 +237,10 @@ class DataSharingProcessor { assert(!loopIV && "Loop iteration variable already set"); loopIV = iv; } + + const DelayedPrivatizationInfo &getDelayedPrivatizationInfo() const { + return delayedPrivatizationInfo; + } }; void DataSharingProcessor::processStep1() { @@ -488,16 +525,15 @@ void DataSharingProcessor::collectDefaultSymbols() { } void DataSharingProcessor::privatize() { + for (const Fortran::semantics::Symbol *sym : privatizedSymbols) { if (const auto *commonDet = sym->detailsIf()) { for (const auto &mem : commonDet->objects()) { - cloneSymbol(&*mem); - copyFirstPrivateSymbol(&*mem); + doPrivatize(&*mem); } } else { - cloneSymbol(sym); - copyFirstPrivateSymbol(sym); + doPrivatize(sym); } } } @@ -523,12 +559,66 @@ void DataSharingProcessor::defaultPrivatize() { !symbolsInNestedRegions.contains(sym) && !symbolsInParentRegions.contains(sym) && !privatizedSymbols.contains(sym)) { - cloneSymbol(sym); - copyFirstPrivateSymbol(sym); + doPrivatize(sym); } } } +void DataSharingProcessor::doPrivatize(const Fortran::semantics::Symbol *sym) { + if (useDelayedPrivatization) { + auto ip = firOpBuilder.saveInsertionPoint(); + + auto moduleOp = firOpBuilder.getInsertionBlock() + ->getParentOp() + ->getParentOfType(); + + firOpBuilder.setInsertionPoint(&moduleOp.getBodyRegion().front(), + moduleOp.getBodyRegion().front().end()); + + Fortran::lower::SymbolBox hsb = converter.lookupOneLevelUpSymbol(*sym); + assert(hsb && "Host symbol box not found"); + + mlir::Type symType = hsb.getAddr().getType(); + mlir::Location symLoc = hsb.getAddr().getLoc(); + std::string privatizerName = sym->name().ToString() + ".privatizer"; + + unsigned uniquingCounter = 0; + auto uniquePrivatizerName = mlir::SymbolTable::generateSymbolName<64>( + privatizerName, + [&](auto &suggestedName) { + return existingPrivatizerNames.count(suggestedName); + }, + uniquingCounter); + + auto privatizerOp = firOpBuilder.create( + symLoc, symType, uniquePrivatizerName); + firOpBuilder.setInsertionPointToEnd(&privatizerOp.getBody().front()); + + symTable->pushScope(); + symTable->addSymbol(*sym, privatizerOp.getArgument(0)); + symTable->pushScope(); + + cloneSymbol(sym); + copyFirstPrivateSymbol(sym); + + firOpBuilder.create( + hsb.getAddr().getLoc(), symTable->shallowLookupSymbol(*sym).getAddr()); + + symTable->popScope(); + symTable->popScope(); + firOpBuilder.restoreInsertionPoint(ip); + + delayedPrivatizationInfo.privatizers.insert( + mlir::SymbolRefAttr::get(privatizerOp)); + delayedPrivatizationInfo.hostAddresses.insert(hsb.getAddr()); + delayedPrivatizationInfo.hostSymbols.insert(sym); + existingPrivatizerNames.insert(uniquePrivatizerName); + } else { + cloneSymbol(sym); + copyFirstPrivateSymbol(sym); + } +} + //===----------------------------------------------------------------------===// // ClauseProcessor //===----------------------------------------------------------------------===// @@ -2267,7 +2357,9 @@ static void createBodyOfOp( Op &op, Fortran::lower::AbstractConverter &converter, mlir::Location &loc, Fortran::lower::pft::Evaluation &eval, bool genNested, const Fortran::parser::OmpClauseList *clauses = nullptr, - const llvm::SmallVector &args = {}, + std::function( + mlir::Operation *)> + genRegionEntryCB = nullptr, bool outerCombined = false, DataSharingProcessor *dsp = nullptr) { fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder(); @@ -2281,27 +2373,15 @@ static void createBodyOfOp( // argument. Also update the symbol's address with the mlir argument value. // e.g. For loops the argument is the induction variable. And all further // uses of the induction variable should use this mlir value. - if (args.size()) { - std::size_t loopVarTypeSize = 0; - for (const Fortran::semantics::Symbol *arg : args) - loopVarTypeSize = std::max(loopVarTypeSize, arg->GetUltimate().size()); - mlir::Type loopVarType = getLoopVarType(converter, loopVarTypeSize); - llvm::SmallVector tiv(args.size(), loopVarType); - llvm::SmallVector locs(args.size(), loc); - firOpBuilder.createBlock(&op.getRegion(), {}, tiv, locs); - // The argument is not currently in memory, so make a temporary for the - // argument, and store it there, then bind that location to the argument. - mlir::Operation *storeOp = nullptr; - for (auto [argIndex, argSymbol] : llvm::enumerate(args)) { - mlir::Value indexVal = - fir::getBase(op.getRegion().front().getArgument(argIndex)); - storeOp = - createAndSetPrivatizedLoopVar(converter, loc, indexVal, argSymbol); + auto regionArgs = + [&]() -> llvm::SmallVector { + if (genRegionEntryCB != nullptr) { + return genRegionEntryCB(op); } - firOpBuilder.setInsertionPointAfter(storeOp); - } else { + firOpBuilder.createBlock(&op.getRegion()); - } + return {}; + }(); // Mark the earliest insertion point. mlir::Operation *marker = insertMarker(firOpBuilder); @@ -2399,8 +2479,8 @@ static void createBodyOfOp( assert(tempDsp.has_value()); tempDsp->processStep2(op, isLoop); } else { - if (isLoop && args.size() > 0) - dsp->setLoopIV(converter.getSymbolAddress(*args[0])); + if (isLoop && regionArgs.size() > 0) + dsp->setLoopIV(converter.getSymbolAddress(*regionArgs[0])); dsp->processStep2(op, isLoop); } } @@ -2476,16 +2556,19 @@ static void genBodyOfTargetDataOp( } template -static OpTy genOpWithBody(Fortran::lower::AbstractConverter &converter, - Fortran::lower::pft::Evaluation &eval, bool genNested, - mlir::Location currentLocation, bool outerCombined, - const Fortran::parser::OmpClauseList *clauseList, - Args &&...args) { +static OpTy genOpWithBody( + Fortran::lower::AbstractConverter &converter, + Fortran::lower::pft::Evaluation &eval, bool genNested, + mlir::Location currentLocation, bool outerCombined, + const Fortran::parser::OmpClauseList *clauseList, + std::function( + mlir::Operation *)> + genRegionEntryCB, + DataSharingProcessor *dsp, Args &&...args) { auto op = converter.getFirOpBuilder().create( currentLocation, std::forward(args)...); createBodyOfOp(op, converter, currentLocation, eval, genNested, - clauseList, - /*args=*/{}, outerCombined); + clauseList, genRegionEntryCB, outerCombined, dsp); return op; } @@ -2493,11 +2576,12 @@ static mlir::omp::MasterOp genMasterOp(Fortran::lower::AbstractConverter &converter, Fortran::lower::pft::Evaluation &eval, bool genNested, mlir::Location currentLocation) { - return genOpWithBody(converter, eval, genNested, - currentLocation, - /*outerCombined=*/false, - /*clauseList=*/nullptr, - /*resultTypes=*/mlir::TypeRange()); + return genOpWithBody( + converter, eval, genNested, currentLocation, + /*outerCombined=*/false, + /*clauseList=*/nullptr, /*genRegionEntryCB=*/nullptr, + /*dsp=*/nullptr, + /*resultTypes=*/mlir::TypeRange()); } static mlir::omp::OrderedRegionOp @@ -2507,11 +2591,14 @@ genOrderedRegionOp(Fortran::lower::AbstractConverter &converter, return genOpWithBody( converter, eval, genNested, currentLocation, /*outerCombined=*/false, - /*clauseList=*/nullptr, /*simd=*/false); + /*clauseList=*/nullptr, /*genRegionEntryCB=*/nullptr, + /*dsp=*/nullptr, + /*simd=*/false); } static mlir::omp::ParallelOp genParallelOp(Fortran::lower::AbstractConverter &converter, + Fortran::lower::SymMap &symTable, Fortran::lower::pft::Evaluation &eval, bool genNested, mlir::Location currentLocation, const Fortran::parser::OmpClauseList &clauseList, @@ -2533,8 +2620,67 @@ genParallelOp(Fortran::lower::AbstractConverter &converter, if (!outerCombined) cp.processReduction(currentLocation, reductionVars, reductionDeclSymbols); + if (!enableDelayedPrivatization) { + return genOpWithBody( + converter, eval, genNested, currentLocation, outerCombined, &clauseList, + /*genRegionEntryCB=*/nullptr, /*dsp=*/nullptr, + /*resultTypes=*/mlir::TypeRange(), ifClauseOperand, + numThreadsClauseOperand, allocateOperands, allocatorOperands, + reductionVars, + reductionDeclSymbols.empty() + ? nullptr + : mlir::ArrayAttr::get(converter.getFirOpBuilder().getContext(), + reductionDeclSymbols), + procBindKindAttr, /*private_vars=*/llvm::SmallVector{}, + /*privatizers=*/nullptr); + } + + bool privatize = !outerCombined; + DataSharingProcessor dsp(converter, clauseList, eval, + /*useDelayedPrivatization=*/true, &symTable); + + if (privatize) { + dsp.processStep1(); + } + + const auto &delayedPrivatizationInfo = dsp.getDelayedPrivatizationInfo(); + llvm::SmallVector privatizers( + delayedPrivatizationInfo.privatizers.begin(), + delayedPrivatizationInfo.privatizers.end()); + + llvm::SmallVector privateSymAddresses( + delayedPrivatizationInfo.hostAddresses.begin(), + delayedPrivatizationInfo.hostAddresses.end()); + + auto genRegionEntryCB = [&](mlir::Operation *op) { + auto parallelOp = llvm::cast(op); + auto privateVars = parallelOp.getPrivateVars(); + auto ®ion = parallelOp.getRegion(); + llvm::SmallVector privateVarTypes; + llvm::SmallVector privateVarLocs; + + for (auto privateVar : privateVars) { + privateVarTypes.push_back(privateVar.getType()); + privateVarLocs.push_back(privateVar.getLoc()); + } + + converter.getFirOpBuilder().createBlock(®ion, {}, privateVarTypes, + privateVarLocs); + + int argIdx = 0; + for (const auto *sym : delayedPrivatizationInfo.hostSymbols) { + converter.bindSymbol(*sym, region.getArgument(argIdx)); + ++argIdx; + } + + return llvm::SmallVector( + delayedPrivatizationInfo.hostSymbols.begin(), + delayedPrivatizationInfo.hostSymbols.end()); + }; + return genOpWithBody( converter, eval, genNested, currentLocation, outerCombined, &clauseList, + genRegionEntryCB, &dsp, /*resultTypes=*/mlir::TypeRange(), ifClauseOperand, numThreadsClauseOperand, allocateOperands, allocatorOperands, reductionVars, @@ -2542,7 +2688,11 @@ genParallelOp(Fortran::lower::AbstractConverter &converter, ? nullptr : mlir::ArrayAttr::get(converter.getFirOpBuilder().getContext(), reductionDeclSymbols), - procBindKindAttr); + procBindKindAttr, privateSymAddresses, + privatizers.empty() + ? nullptr + : mlir::ArrayAttr::get(converter.getFirOpBuilder().getContext(), + privatizers)); } static mlir::omp::SectionOp @@ -2554,7 +2704,9 @@ genSectionOp(Fortran::lower::AbstractConverter &converter, // all privatization is done within `omp.section` operations. return genOpWithBody( converter, eval, genNested, currentLocation, - /*outerCombined=*/false, §ionsClauseList); + /*outerCombined=*/false, §ionsClauseList, + /*genRegionEntryCB=*/nullptr, + /*dsp=*/nullptr); } static mlir::omp::SingleOp @@ -2575,8 +2727,8 @@ genSingleOp(Fortran::lower::AbstractConverter &converter, return genOpWithBody( converter, eval, genNested, currentLocation, - /*outerCombined=*/false, &beginClauseList, allocateOperands, - allocatorOperands, nowaitAttr); + /*outerCombined=*/false, &beginClauseList, /*genRegionEntryCB=*/nullptr, + /*dsp=*/nullptr, allocateOperands, allocatorOperands, nowaitAttr); } static mlir::omp::TaskOp @@ -2608,8 +2760,9 @@ genTaskOp(Fortran::lower::AbstractConverter &converter, return genOpWithBody( converter, eval, genNested, currentLocation, - /*outerCombined=*/false, &clauseList, ifClauseOperand, finalClauseOperand, - untiedAttr, mergeableAttr, + /*outerCombined=*/false, &clauseList, /*genRegionEntryCB=*/nullptr, + /*dsp=*/nullptr, ifClauseOperand, finalClauseOperand, untiedAttr, + mergeableAttr, /*in_reduction_vars=*/mlir::ValueRange(), /*in_reductions=*/nullptr, priorityClauseOperand, dependTypeOperands.empty() @@ -2631,7 +2784,8 @@ genTaskGroupOp(Fortran::lower::AbstractConverter &converter, currentLocation, llvm::omp::Directive::OMPD_taskgroup); return genOpWithBody( converter, eval, genNested, currentLocation, - /*outerCombined=*/false, &clauseList, + /*outerCombined=*/false, &clauseList, /*genRegionEntryCB=*/nullptr, + /*dsp=*/nullptr, /*task_reduction_vars=*/mlir::ValueRange(), /*task_reductions=*/nullptr, allocateOperands, allocatorOperands); } @@ -3015,6 +3169,8 @@ genTeamsOp(Fortran::lower::AbstractConverter &converter, return genOpWithBody( converter, eval, genNested, currentLocation, outerCombined, &clauseList, + /*genRegionEntryCB=*/nullptr, + /*dsp=*/nullptr, /*num_teams_lower=*/nullptr, numTeamsClauseOperand, ifClauseOperand, threadLimitClauseOperand, allocateOperands, allocatorOperands, reductionVars, @@ -3211,6 +3367,33 @@ static void convertLoopBounds(Fortran::lower::AbstractConverter &converter, } } +static llvm::SmallVector genCodeForIterVar( + mlir::Operation *op, Fortran::lower::AbstractConverter &converter, + mlir::Location &loc, + const llvm::SmallVector &args) { + fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder(); + auto ®ion = op->getRegion(0); + + std::size_t loopVarTypeSize = 0; + for (const Fortran::semantics::Symbol *arg : args) + loopVarTypeSize = std::max(loopVarTypeSize, arg->GetUltimate().size()); + mlir::Type loopVarType = getLoopVarType(converter, loopVarTypeSize); + llvm::SmallVector tiv(args.size(), loopVarType); + llvm::SmallVector locs(args.size(), loc); + firOpBuilder.createBlock(®ion, {}, tiv, locs); + // The argument is not currently in memory, so make a temporary for the + // argument, and store it there, then bind that location to the argument. + mlir::Operation *storeOp = nullptr; + for (auto [argIndex, argSymbol] : llvm::enumerate(args)) { + mlir::Value indexVal = fir::getBase(region.front().getArgument(argIndex)); + storeOp = + createAndSetPrivatizedLoopVar(converter, loc, indexVal, argSymbol); + } + firOpBuilder.setInsertionPointAfter(storeOp); + + return args; +} + static void createSimdLoop(Fortran::lower::AbstractConverter &converter, Fortran::lower::pft::Evaluation &eval, @@ -3258,9 +3441,14 @@ createSimdLoop(Fortran::lower::AbstractConverter &converter, auto *nestedEval = getCollapsedLoopEval( eval, Fortran::lower::getCollapseValue(loopOpClauseList)); + + auto ivCallback = [&](mlir::Operation *op) { + return genCodeForIterVar(op, converter, loc, iv); + }; + createBodyOfOp(simdLoopOp, converter, loc, *nestedEval, /*genNested=*/true, &loopOpClauseList, - iv, /*outer=*/false, &dsp); + ivCallback, /*outer=*/false, &dsp); } static void createWsLoop(Fortran::lower::AbstractConverter &converter, @@ -3333,8 +3521,14 @@ static void createWsLoop(Fortran::lower::AbstractConverter &converter, auto *nestedEval = getCollapsedLoopEval( eval, Fortran::lower::getCollapseValue(beginClauseList)); + + auto ivCallback = [&](mlir::Operation *op) { + return genCodeForIterVar(op, converter, loc, iv); + }; + createBodyOfOp(wsLoopOp, converter, loc, *nestedEval, - /*genNested=*/true, &beginClauseList, iv, + /*genNested=*/true, &beginClauseList, + ivCallback, /*outer=*/false, &dsp); } @@ -3413,8 +3607,8 @@ static void genOMP(Fortran::lower::AbstractConverter &converter, if ((llvm::omp::allParallelSet & llvm::omp::loopConstructSet) .test(ompDirective)) { validDirective = true; - genParallelOp(converter, eval, /*genNested=*/false, currentLocation, - loopOpClauseList, + genParallelOp(converter, symTable, eval, /*genNested=*/false, + currentLocation, loopOpClauseList, /*outerCombined=*/true); } } @@ -3502,8 +3696,8 @@ genOMP(Fortran::lower::AbstractConverter &converter, genOrderedRegionOp(converter, eval, /*genNested=*/true, currentLocation); break; case llvm::omp::Directive::OMPD_parallel: - genParallelOp(converter, eval, /*genNested=*/true, currentLocation, - beginClauseList); + genParallelOp(converter, symTable, eval, /*genNested=*/true, + currentLocation, beginClauseList); break; case llvm::omp::Directive::OMPD_single: genSingleOp(converter, eval, /*genNested=*/true, currentLocation, @@ -3562,8 +3756,8 @@ genOMP(Fortran::lower::AbstractConverter &converter, .test(directive.v)) { bool outerCombined = directive.v != llvm::omp::Directive::OMPD_target_parallel; - genParallelOp(converter, eval, /*genNested=*/false, currentLocation, - beginClauseList, outerCombined); + genParallelOp(converter, symTable, eval, /*genNested=*/false, + currentLocation, beginClauseList, outerCombined); combinedDirective = true; } if ((llvm::omp::workShareSet & llvm::omp::blockConstructSet) @@ -3646,7 +3840,7 @@ genOMP(Fortran::lower::AbstractConverter &converter, // Parallel wrapper of PARALLEL SECTIONS construct if (dir == llvm::omp::Directive::OMPD_parallel_sections) { - genParallelOp(converter, eval, + genParallelOp(converter, symTable, eval, /*genNested=*/false, currentLocation, sectionsClauseList, /*outerCombined=*/true); } else { @@ -3663,6 +3857,8 @@ genOMP(Fortran::lower::AbstractConverter &converter, /*genNested=*/false, currentLocation, /*outerCombined=*/false, /*clauseList=*/nullptr, + /*genRegionEntryCB=*/nullptr, + /*dsp=*/nullptr, /*reduction_vars=*/mlir::ValueRange(), /*reductions=*/nullptr, allocateOperands, allocatorOperands, nowaitClauseOperand); diff --git a/flang/lib/Optimizer/CodeGen/CodeGen.cpp b/flang/lib/Optimizer/CodeGen/CodeGen.cpp index f89f28c006dec..e285a9a72bd9b 100644 --- a/flang/lib/Optimizer/CodeGen/CodeGen.cpp +++ b/flang/lib/Optimizer/CodeGen/CodeGen.cpp @@ -3505,6 +3505,18 @@ struct ZeroOpConversion : public FIROpConversion { } }; +class DeclareOpConversion : public FIROpConversion { +public: + using FIROpConversion::FIROpConversion; + + mlir::LogicalResult + matchAndRewrite(fir::DeclareOp declareOp, OpAdaptor, + mlir::ConversionPatternRewriter &rewriter) const override { + rewriter.replaceOp(declareOp, declareOp.getMemref()); + return mlir::success(); + } +}; + /// `fir.unreachable` --> `llvm.unreachable` struct UnreachableOpConversion : public FIROpConversion { using FIROpConversion::FIROpConversion; @@ -3856,6 +3868,7 @@ class RenameMSVCLibmFuncs return mlir::success(); } }; + } // namespace namespace { @@ -3949,7 +3962,7 @@ class FIRToLLVMLowering UnboxCharOpConversion, UnboxProcOpConversion, UndefOpConversion, UnreachableOpConversion, UnrealizedConversionCastOpConversion, XArrayCoorOpConversion, XEmboxOpConversion, XReboxOpConversion, - ZeroOpConversion>(typeConverter, options); + ZeroOpConversion, DeclareOpConversion>(typeConverter, options); mlir::populateFuncToLLVMConversionPatterns(typeConverter, pattern); mlir::populateOpenMPToLLVMConversionPatterns(typeConverter, pattern); mlir::arith::populateArithToLLVMConversionPatterns(typeConverter, pattern); @@ -4002,7 +4015,8 @@ class FIRToLLVMLowering signalPassFailure(); } - // Run pass to add comdats to functions that have weak linkage on relevant platforms + // Run pass to add comdats to functions that have weak linkage on relevant + // platforms if (fir::getTargetTriple(mod).supportsCOMDAT()) { mlir::OpPassManager comdatPM("builtin.module"); comdatPM.addPass(mlir::LLVM::createLLVMAddComdats()); diff --git a/flang/test/Lower/OpenMP/FIR/delayed_privatization.f90 b/flang/test/Lower/OpenMP/FIR/delayed_privatization.f90 new file mode 100644 index 0000000000000..d17e3c6da3caf --- /dev/null +++ b/flang/test/Lower/OpenMP/FIR/delayed_privatization.f90 @@ -0,0 +1,183 @@ +! TODO Convert this file into a bunch of lit tests for each conversion step. + +! RUN: bbc -fopenmp -emit-fir --openmp-enable-delayed-privatization -hlfir=false %s -o - + +subroutine delayed_privatization() + implicit none + integer :: var1 + integer :: var2 + + var1 = 111 + var2 = 222 + +!$OMP PARALLEL FIRSTPRIVATE(var1, var2) + var1 = var1 + var2 + 2 +!$OMP END PARALLEL + +end subroutine + +! ----------------------------------------- +! ## This is what flang emits with the PoC: +! ----------------------------------------- +! +! ---------------------------- +! ### Conversion to FIR + OMP: +! ---------------------------- +!module { +! func.func @_QPdelayed_privatization() { +! %0 = fir.alloca i32 {bindc_name = "var1", uniq_name = "_QFdelayed_privatizationEvar1"} +! %1 = fir.alloca i32 {bindc_name = "var2", uniq_name = "_QFdelayed_privatizationEvar2"} +! %c111_i32 = arith.constant 111 : i32 +! fir.store %c111_i32 to %0 : !fir.ref +! %c222_i32 = arith.constant 222 : i32 +! fir.store %c222_i32 to %1 : !fir.ref +! omp.parallel private(@var1.privatizer %0, @var2.privatizer %1 : !fir.ref, !fir.ref) { +! ^bb0(%arg0: !fir.ref, %arg1: !fir.ref): +! %2 = fir.load %arg0 : !fir.ref +! %3 = fir.load %arg1 : !fir.ref +! %4 = arith.addi %2, %3 : i32 +! %c2_i32 = arith.constant 2 : i32 +! %5 = arith.addi %4, %c2_i32 : i32 +! fir.store %5 to %arg0 : !fir.ref +! omp.terminator +! } +! return +! } +! "omp.private"() <{function_type = (!fir.ref) -> !fir.ref, sym_name = "var1.privatizer"}> ({ +! ^bb0(%arg0: !fir.ref): +! %0 = fir.alloca i32 {bindc_name = "var1", pinned, uniq_name = "_QFdelayed_privatizationEvar1"} +! %1 = fir.load %arg0 : !fir.ref +! fir.store %1 to %0 : !fir.ref +! omp.yield(%0 : !fir.ref) +! }) : () -> () +! "omp.private"() <{function_type = (!fir.ref) -> !fir.ref, sym_name = "var2.privatizer"}> ({ +! ^bb0(%arg0: !fir.ref): +! %0 = fir.alloca i32 {bindc_name = "var2", pinned, uniq_name = "_QFdelayed_privatizationEvar2"} +! %1 = fir.load %arg0 : !fir.ref +! fir.store %1 to %0 : !fir.ref +! omp.yield(%0 : !fir.ref) +! }) : () -> () +! +! ----------------------------- +! ### Conversion to LLVM + OMP: +! ----------------------------- +!module { +! llvm.func @_QPdelayed_privatization() { +! %0 = llvm.mlir.constant(1 : i64) : i64 +! %1 = llvm.alloca %0 x i32 {bindc_name = "var1"} : (i64) -> !llvm.ptr +! %2 = llvm.mlir.constant(1 : i64) : i64 +! %3 = llvm.alloca %2 x i32 {bindc_name = "var2"} : (i64) -> !llvm.ptr +! %4 = llvm.mlir.constant(111 : i32) : i32 +! llvm.store %4, %1 : i32, !llvm.ptr +! %5 = llvm.mlir.constant(222 : i32) : i32 +! llvm.store %5, %3 : i32, !llvm.ptr +! omp.parallel private(@var1.privatizer %1, @var2.privatizer %3 : !llvm.ptr, !llvm.ptr) { +! ^bb0(%arg0: !llvm.ptr, %arg1: !llvm.ptr): +! %6 = llvm.load %arg0 : !llvm.ptr -> i32 +! %7 = llvm.load %arg1 : !llvm.ptr -> i32 +! %8 = llvm.add %6, %7 : i32 +! %9 = llvm.mlir.constant(2 : i32) : i32 +! %10 = llvm.add %8, %9 : i32 +! llvm.store %10, %arg0 : i32, !llvm.ptr +! omp.terminator +! } +! llvm.return +! } +! "omp.private"() <{function_type = (!llvm.ptr) -> !llvm.ptr, sym_name = "var1.privatizer"}> ({ +! ^bb0(%arg0: !llvm.ptr): +! %0 = llvm.mlir.constant(1 : i64) : i64 +! %1 = llvm.alloca %0 x i32 {bindc_name = "var1", pinned} : (i64) -> !llvm.ptr +! %2 = llvm.load %arg0 : !llvm.ptr -> i32 +! llvm.store %2, %1 : i32, !llvm.ptr +! omp.yield(%1 : !llvm.ptr) +! }) : () -> () +! "omp.private"() <{function_type = (!llvm.ptr) -> !llvm.ptr, sym_name = "var2.privatizer"}> ({ +! ^bb0(%arg0: !llvm.ptr): +! %0 = llvm.mlir.constant(1 : i64) : i64 +! %1 = llvm.alloca %0 x i32 {bindc_name = "var2", pinned} : (i64) -> !llvm.ptr +! %2 = llvm.load %arg0 : !llvm.ptr -> i32 +! llvm.store %2, %1 : i32, !llvm.ptr +! omp.yield(%1 : !llvm.ptr) +! }) : () -> () +!} +! +! -------------------------- +! ### Conversion to LLVM IR: +! -------------------------- +!%struct.ident_t = type { i32, i32, i32, i32, ptr } + +!@0 = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00", align 1 +!@1 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 22, ptr @0 }, align 8 + +!define void @_QPdelayed_privatization() { +! %structArg = alloca { ptr, ptr }, align 8 +! %1 = alloca i32, i64 1, align 4 +! %2 = alloca i32, i64 1, align 4 +! store i32 111, ptr %1, align 4 +! store i32 222, ptr %2, align 4 +! br label %entry + +!entry: ; preds = %0 +! %omp_global_thread_num = call i32 @__kmpc_global_thread_num(ptr @1) +! br label %omp_parallel + +!omp_parallel: ; preds = %entry +! %gep_ = getelementptr { ptr, ptr }, ptr %structArg, i32 0, i32 0 +! store ptr %1, ptr %gep_, align 8 +! %gep_2 = getelementptr { ptr, ptr }, ptr %structArg, i32 0, i32 1 +! store ptr %2, ptr %gep_2, align 8 +! call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @1, i32 1, ptr @_QPdelayed_privatization..omp_par, ptr %structArg) +! br label %omp.par.outlined.exit + +!omp.par.outlined.exit: ; preds = %omp_parallel +! br label %omp.par.exit.split + +!omp.par.exit.split: ; preds = %omp.par.outlined.exit +! ret void +!} + +!; Function Attrs: nounwind +!define internal void @_QPdelayed_privatization..omp_par(ptr noalias %tid.addr, ptr noalias %zero.addr, ptr %0) #0 { +!omp.par.entry: +! %gep_ = getelementptr { ptr, ptr }, ptr %0, i32 0, i32 0 +! %loadgep_ = load ptr, ptr %gep_, align 8 +! %gep_1 = getelementptr { ptr, ptr }, ptr %0, i32 0, i32 1 +! %loadgep_2 = load ptr, ptr %gep_1, align 8 +! %tid.addr.local = alloca i32, align 4 +! %1 = load i32, ptr %tid.addr, align 4 +! store i32 %1, ptr %tid.addr.local, align 4 +! %tid = load i32, ptr %tid.addr.local, align 4 +! %2 = alloca i32, i64 1, align 4 +! %3 = load i32, ptr %loadgep_, align 4 +! store i32 %3, ptr %2, align 4 +! %4 = alloca i32, i64 1, align 4 +! %5 = load i32, ptr %loadgep_2, align 4 +! store i32 %5, ptr %4, align 4 +! br label %omp.par.region + +!omp.par.region: ; preds = %omp.par.entry +! br label %omp.par.region1 + +!omp.par.region1: ; preds = %omp.par.region +! %6 = load i32, ptr %2, align 4 +! %7 = load i32, ptr %4, align 4 +! %8 = add i32 %6, %7 +! %9 = add i32 %8, 2 +! store i32 %9, ptr %2, align 4 +! br label %omp.region.cont + +!omp.region.cont: ; preds = %omp.par.region1 +! br label %omp.par.pre_finalize + +!omp.par.pre_finalize: ; preds = %omp.region.cont +! br label %omp.par.outlined.exit.exitStub + +!omp.par.outlined.exit.exitStub: ; preds = %omp.par.pre_finalize +! ret void +!} + +!; Function Attrs: nounwind +!declare i32 @__kmpc_global_thread_num(ptr) #0 + +!; Function Attrs: nounwind +!declare !callback !2 void @__kmpc_fork_call(ptr, i32, ptr, ...) #0 diff --git a/flang/test/Lower/OpenMP/FIR/delayed_privatization_hlfir.f90 b/flang/test/Lower/OpenMP/FIR/delayed_privatization_hlfir.f90 new file mode 100644 index 0000000000000..48022d95aa673 --- /dev/null +++ b/flang/test/Lower/OpenMP/FIR/delayed_privatization_hlfir.f90 @@ -0,0 +1,71 @@ +! TODO Convert this file into a bunch of lit tests for each conversion step. + +! RUN: bbc -fopenmp -emit-hlfir --openmp-enable-delayed-privatization %s -o - + +subroutine delayed_privatization() + implicit none + integer :: var1 + integer :: var2 + + var1 = 111 + var2 = 222 + +!$OMP PARALLEL FIRSTPRIVATE(var1, var2) + var1 = var1 + var2 + 2 +!$OMP END PARALLEL + +end subroutine + + +! ----------------------------------------- +! ## This is what flang emits with the PoC: +! ----------------------------------------- +! +! ---------------------------- +! ### Conversion to HLFIR + OMP: +! ---------------------------- +!module { +! func.func @_QPdelayed_privatization() { +! %0 = fir.alloca i32 {bindc_name = "var1", uniq_name = "_QFdelayed_privatizationEvar1"} +! %1:2 = hlfir.declare %0 {uniq_name = "_QFdelayed_privatizationEvar1"} : (!fir.ref) -> (!fir.ref, !fir.ref) +! %2 = fir.alloca i32 {bindc_name = "var2", uniq_name = "_QFdelayed_privatizationEvar2"} +! %3:2 = hlfir.declare %2 {uniq_name = "_QFdelayed_privatizationEvar2"} : (!fir.ref) -> (!fir.ref, !fir.ref) +! %c111_i32 = arith.constant 111 : i32 +! hlfir.assign %c111_i32 to %1#0 : i32, !fir.ref +! %c222_i32 = arith.constant 222 : i32 +! hlfir.assign %c222_i32 to %3#0 : i32, !fir.ref +! omp.parallel private(@var1.privatizer_0 %1#0, @var2.privatizer_0 %3#0 : !fir.ref, !fir.ref) { +! ^bb0(%arg0: !fir.ref, %arg1: !fir.ref): +! %4:2 = hlfir.declare %arg0 {uniq_name = "_QFdelayed_privatizationEvar1"} : (!fir.ref) -> (!fir.ref, !fir.ref) +! %5:2 = hlfir.declare %arg1 {uniq_name = "_QFdelayed_privatizationEvar2"} : (!fir.ref) -> (!fir.ref, !fir.ref) +! %6 = fir.load %4#0 : !fir.ref +! %7 = fir.load %5#0 : !fir.ref +! %8 = arith.addi %6, %7 : i32 +! %c2_i32 = arith.constant 2 : i32 +! %9 = arith.addi %8, %c2_i32 : i32 +! hlfir.assign %9 to %4#0 : i32, !fir.ref +! omp.terminator +! } +! return +! } +! "omp.private"() <{function_type = (!fir.ref) -> !fir.ref, sym_name = "var1.privatizer_0"}> ({ +! ^bb0(%arg0: !fir.ref): +! %0 = fir.alloca i32 {bindc_name = "var1", pinned, uniq_name = "_QFdelayed_privatizationEvar1"} +! %1:2 = hlfir.declare %0 {uniq_name = "_QFdelayed_privatizationEvar1"} : (!fir.ref) -> (!fir.ref, !fir.ref) +! %2 = fir.load %arg0 : !fir.ref +! hlfir.assign %2 to %1#0 temporary_lhs : i32, !fir.ref +! omp.yield(%1#0 : !fir.ref) +! }) : () -> () +! "omp.private"() <{function_type = (!fir.ref) -> !fir.ref, sym_name = "var2.privatizer_0"}> ({ +! ^bb0(%arg0: !fir.ref): +! %0 = fir.alloca i32 {bindc_name = "var2", pinned, uniq_name = "_QFdelayed_privatizationEvar2"} +! %1:2 = hlfir.declare %0 {uniq_name = "_QFdelayed_privatizationEvar2"} : (!fir.ref) -> (!fir.ref, !fir.ref) +! %2 = fir.load %arg0 : !fir.ref +! hlfir.assign %2 to %1#0 temporary_lhs : i32, !fir.ref +! omp.yield(%1#0 : !fir.ref) +! }) : () -> () +!} +! +! +! ### After lowring `hlfir` to `fir`, conversion to LLVM + OMP -> LLVM IR produces the exact same result as for +! `delayed_privatization.f90`. diff --git a/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td b/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td index ca36350548577..3ee3f8fe5df8a 100644 --- a/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td +++ b/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td @@ -16,6 +16,7 @@ include "mlir/IR/EnumAttr.td" include "mlir/IR/OpBase.td" +include "mlir/Interfaces/FunctionInterfaces.td" include "mlir/Interfaces/SideEffectInterfaces.td" include "mlir/Interfaces/ControlFlowInterfaces.td" include "mlir/IR/SymbolInterfaces.td" @@ -179,7 +180,9 @@ def ParallelOp : OpenMP_Op<"parallel", [ Variadic:$allocators_vars, Variadic:$reduction_vars, OptionalAttr:$reductions, - OptionalAttr:$proc_bind_val); + OptionalAttr:$proc_bind_val, + Variadic:$private_vars, + OptionalAttr:$privatizers); let regions = (region AnyRegion:$region); @@ -203,6 +206,10 @@ def ParallelOp : OpenMP_Op<"parallel", [ $allocators_vars, type($allocators_vars) ) `)` | `proc_bind` `(` custom($proc_bind_val) `)` + | `private` `(` + custom( + $private_vars, type($private_vars), $privatizers + ) `)` ) $region attr-dict }]; let hasVerifier = 1; @@ -612,7 +619,7 @@ def SimdLoopOp : OpenMP_Op<"simdloop", [AttrSizedOperandSegments, def YieldOp : OpenMP_Op<"yield", [Pure, ReturnLike, Terminator, ParentOneOf<["WsLoopOp", "ReductionDeclareOp", - "AtomicUpdateOp", "SimdLoopOp"]>]> { + "AtomicUpdateOp", "SimdLoopOp", "PrivateClauseOp"]>]> { let summary = "loop yield and termination operation"; let description = [{ "omp.yield" yields SSA values from the OpenMP dialect op region and @@ -1479,6 +1486,38 @@ def Target_UpdateDataOp: OpenMP_Op<"target_update_data", //===----------------------------------------------------------------------===// // 2.14.5 target construct //===----------------------------------------------------------------------===// +def PrivateClauseOp : OpenMP_Op<"private", [ + IsolatedFromAbove, FunctionOpInterface + ]> { + let summary = "TODO"; + let description = [{}]; + + let arguments = (ins SymbolNameAttr:$sym_name, + TypeAttrOf:$function_type); + + let regions = (region AnyRegion:$body); + + let builders = [OpBuilder<(ins + "::mlir::Type":$privateVarType, + "::llvm::StringRef":$privatizerName + )>]; + + let extraClassDeclaration = [{ + ::mlir::Region *getCallableRegion() { + return &getBody(); + } + + /// Returns the argument types of this function. + ArrayRef getArgumentTypes() { + return getFunctionType().getInputs(); + } + + /// Returns the result types of this function. + ArrayRef getResultTypes() { + return getFunctionType().getResults(); + } + }]; +} def TargetOp : OpenMP_Op<"target",[IsolatedFromAbove, MapClauseOwningOpInterface, OutlineableOpenMPOpInterface, AttrSizedOperandSegments]> { diff --git a/mlir/lib/Conversion/OpenMPToLLVM/OpenMPToLLVM.cpp b/mlir/lib/Conversion/OpenMPToLLVM/OpenMPToLLVM.cpp index 730858ffc67a7..d4ccbdf608293 100644 --- a/mlir/lib/Conversion/OpenMPToLLVM/OpenMPToLLVM.cpp +++ b/mlir/lib/Conversion/OpenMPToLLVM/OpenMPToLLVM.cpp @@ -46,6 +46,17 @@ struct RegionOpConversion : public ConvertOpToLLVMPattern { *this->getTypeConverter()))) return failure(); + if constexpr (std::is_same_v) { + auto llvmType = this->getTypeConverter()->convertType( + adaptor.getFunctionType().getInput(0)); + + if (!llvmType) + return rewriter.notifyMatchFailure(curOp, + "signature conversion failed"); + newOp.setFunctionType( + FunctionType::get(rewriter.getContext(), {llvmType}, {llvmType})); + } + rewriter.eraseOp(curOp); return success(); } @@ -231,11 +242,12 @@ void mlir::configureOpenMPToLLVMConversionLegality( mlir::omp::DataOp, mlir::omp::OrderedRegionOp, mlir::omp::ParallelOp, mlir::omp::WsLoopOp, mlir::omp::SimdLoopOp, mlir::omp::MasterOp, mlir::omp::SectionOp, mlir::omp::SectionsOp, mlir::omp::SingleOp, - mlir::omp::TaskGroupOp, mlir::omp::TaskOp>([&](Operation *op) { - return typeConverter.isLegal(&op->getRegion(0)) && - typeConverter.isLegal(op->getOperandTypes()) && - typeConverter.isLegal(op->getResultTypes()); - }); + mlir::omp::TaskGroupOp, mlir::omp::TaskOp, mlir::omp::PrivateClauseOp>( + [&](Operation *op) { + return typeConverter.isLegal(&op->getRegion(0)) && + typeConverter.isLegal(op->getOperandTypes()) && + typeConverter.isLegal(op->getResultTypes()); + }); target.addDynamicallyLegalOp< mlir::omp::AtomicReadOp, mlir::omp::AtomicWriteOp, mlir::omp::FlushOp, mlir::omp::ThreadprivateOp, mlir::omp::YieldOp, mlir::omp::EnterDataOp, @@ -275,6 +287,7 @@ void mlir::populateOpenMPToLLVMConversionPatterns(LLVMTypeConverter &converter, RegionOpConversion, RegionOpConversion, RegionOpConversion, RegionOpConversion, RegionOpConversion, RegionOpConversion, + RegionOpConversion, RegionLessOpWithVarOperandsConversion, RegionOpWithVarOperandsConversion, RegionLessOpWithVarOperandsConversion, diff --git a/mlir/lib/Conversion/SCFToOpenMP/SCFToOpenMP.cpp b/mlir/lib/Conversion/SCFToOpenMP/SCFToOpenMP.cpp index 2f8b3f7e11de1..889aa755d8ba4 100644 --- a/mlir/lib/Conversion/SCFToOpenMP/SCFToOpenMP.cpp +++ b/mlir/lib/Conversion/SCFToOpenMP/SCFToOpenMP.cpp @@ -420,7 +420,9 @@ struct ParallelOpLowering : public OpRewritePattern { /* allocators_vars = */ llvm::SmallVector{}, /* reduction_vars = */ llvm::SmallVector{}, /* reductions = */ ArrayAttr{}, - /* proc_bind_val = */ omp::ClauseProcBindKindAttr{}); + /* proc_bind_val = */ omp::ClauseProcBindKindAttr{}, + /*private_vars=*/mlir::ValueRange{}, + /*privatizers=*/nullptr); { OpBuilder::InsertionGuard guard(rewriter); diff --git a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp index 381f17d080419..5d4be49369ce5 100644 --- a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp +++ b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp @@ -989,8 +989,10 @@ void ParallelOp::build(OpBuilder &builder, OperationState &state, ParallelOp::build( builder, state, /*if_expr_var=*/nullptr, /*num_threads_var=*/nullptr, /*allocate_vars=*/ValueRange(), /*allocators_vars=*/ValueRange(), - /*reduction_vars=*/ValueRange(), /*reductions=*/nullptr, - /*proc_bind_val=*/nullptr); + /*reduction_vars=*/ValueRange(), + /*reductions=*/nullptr, + /*proc_bind_val=*/nullptr, /*private_vars=*/ValueRange(), + /*privatizers*/ nullptr); state.addAttributes(attributes); } @@ -1594,6 +1596,93 @@ LogicalResult DataBoundsOp::verify() { return success(); } +void PrivateClauseOp::build(OpBuilder &odsBuilder, OperationState &odsState, + Type privateVarType, StringRef privatizerName) { + FunctionType privatizerType = FunctionType::get( + odsBuilder.getContext(), {privateVarType}, {privateVarType}); + + build(odsBuilder, odsState, privatizerName, privatizerType); + + mlir::Block &block = odsState.regions.front()->emplaceBlock(); + block.addArgument(privateVarType, odsState.location); +} + +static ParseResult parsePrivateVarList( + OpAsmParser &parser, + llvm::SmallVector &privateVarsOperands, + llvm::SmallVector &privateVarsTypes, ArrayAttr &privatizersAttr) { + SymbolRefAttr privatizerSym; + OpAsmParser::UnresolvedOperand arg; + OpAsmParser::UnresolvedOperand blockArg; + Type argType; + + SmallVector privatizersVec; + + auto parsePrivatizers = [&]() -> ParseResult { + if (parser.parseAttribute(privatizerSym) || parser.parseOperand(arg)) { + return failure(); + } + + privatizersVec.push_back(privatizerSym); + privateVarsOperands.push_back(arg); + return success(); + }; + + auto parseTypes = [&]() -> ParseResult { + if (parser.parseType(argType)) + return failure(); + privateVarsTypes.push_back(argType); + return success(); + }; + + if (parser.parseCommaSeparatedList(parsePrivatizers)) + return failure(); + + SmallVector privatizers(privatizersVec.begin(), + privatizersVec.end()); + privatizersAttr = ArrayAttr::get(parser.getContext(), privatizers); + + if (parser.parseColon()) + return failure(); + + if (parser.parseCommaSeparatedList(parseTypes)) + return failure(); + + return success(); +} + +static void printPrivateVarList(OpAsmPrinter &printer, Operation *op, + OperandRange privateVars, + TypeRange privateVarTypes, + std::optional privatizersAttr) { + unsigned argIndex = 0; + // TODO Add an op verifier instead of this assertion. + assert( + privateVars.size() == privateVarTypes.size() && + ((privateVars.empty()) || + (*privatizersAttr && (privatizersAttr->size() == privateVars.size())))); + + for (const auto &privateVar : privateVars) { + assert(privatizersAttr); + const auto &privatizerSym = (*privatizersAttr)[argIndex]; + printer << privatizerSym << " " << privateVar; + + argIndex++; + if (argIndex < privateVars.size()) + printer << ", "; + } + + printer << " : "; + + argIndex = 0; + for (const auto &mapType : privateVarTypes) { + printer << mapType; + argIndex++; + if (argIndex < privateVarTypes.size()) + printer << ", "; + } +} + #define GET_ATTRDEF_CLASSES #include "mlir/Dialect/OpenMP/OpenMPOpsAttributes.cpp.inc" diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp index 79956f82ed141..4de3f32c17908 100644 --- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp +++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp @@ -1000,6 +1000,29 @@ convertOmpWsLoop(Operation &opInst, llvm::IRBuilderBase &builder, return success(); } +/// Replace the region arguments of the parallel op (which correspond to private +/// variables) with the actual private varibles they correspond to. This +/// prepares the parallel op so that it matches what is expected by the +/// OMPIRBuilder. +static void prepareOmpParallel(omp::ParallelOp opInst) { + auto ®ion = opInst.getRegion(); + auto privateVars = opInst.getPrivateVars(); + + auto privateVarsIt = privateVars.begin(); + for (size_t argIdx = 0; argIdx < region.getNumArguments(); + ++argIdx, ++privateVarsIt) { + for (auto &block : region) { + for (auto &op : block) { + op.replaceUsesOfWith(region.getArgument(argIdx), *privateVarsIt); + } + } + } + + for (size_t argIdx = 0; argIdx < region.getNumArguments(); ++argIdx) { + region.eraseArgument(argIdx); + } +} + /// Converts the OpenMP parallel operation to LLVM IR. static LogicalResult convertOmpParallel(omp::ParallelOp opInst, llvm::IRBuilderBase &builder, @@ -1008,6 +1031,7 @@ convertOmpParallel(omp::ParallelOp opInst, llvm::IRBuilderBase &builder, // TODO: support error propagation in OpenMPIRBuilder and use it instead of // relying on captured variables. LogicalResult bodyGenStatus = success(); + prepareOmpParallel(opInst); llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder(); auto bodyGenCB = [&](InsertPointTy allocaIP, InsertPointTy codeGenIP) { @@ -1092,6 +1116,75 @@ convertOmpParallel(omp::ParallelOp opInst, llvm::IRBuilderBase &builder, llvm::Value *&replacementValue) -> InsertPointTy { replacementValue = &vPtr; + // If this is a private value, this lambda will return the corresponding + // mlir value and its `PrivateClauseOp`. Otherwise, empty values are + // returned. + auto [privVar, + privInit] = [&]() -> std::pair { + if (!opInst.getPrivateVars().empty()) { + auto privVars = opInst.getPrivateVars(); + auto privInits = opInst.getPrivatizers(); + assert(privInits && privInits->size() == privVars.size()); + + const auto *privInitIt = privInits->begin(); + for (auto privVarIt = privVars.begin(); privVarIt != privVars.end(); + ++privVarIt, ++privInitIt) { + auto *llvmPrivVarOp = moduleTranslation.lookupValue(*privVarIt); + if (llvmPrivVarOp != &vPtr) { + continue; + } + + auto privSym = llvm::cast(*privInitIt); + auto privOp = + SymbolTable::lookupNearestSymbolFrom( + opInst, privSym); + + return {*privVarIt, privOp}; + } + } + + return {mlir::Value(), omp::PrivateClauseOp()}; + }(); + + if (privVar) { + + // Replace the privatizer block argument with mlir value being privatized. + // This way, the body of the privatizer will be changed from using the + // region/block argument to the value being privatized. + assert(privInit->getRegions().front().getNumArguments() == 1); + + auto arg = privInit->getRegions().front().getArgument(0); + for (auto &op : privInit->getRegions().front().front()) { + op.replaceUsesOfWith(arg, privVar); + } + + auto oldIP = builder.saveIP(); + builder.restoreIP(allocaIP); + + // Temporarily unlink the terminator from its parent since + // `inlineConvertOmpRegions` expects the insertion block to **not** + // contain a terminator. + auto &allocaTerminator = builder.GetInsertBlock()->back(); + assert(allocaTerminator.isTerminator()); + allocaTerminator.removeFromParent(); + + SmallVector yieldedValues; + if (failed(inlineConvertOmpRegions(privInit->getRegion(0), + "omp.privatizer", builder, + moduleTranslation, &yieldedValues))) { + // TODO proper error-handling. + builder.restoreIP(oldIP); + return codeGenIP; + } + + allocaTerminator.insertAfter(&builder.GetInsertBlock()->back()); + + assert(yieldedValues.size() == 1); + replacementValue = yieldedValues.front(); + + builder.restoreIP(oldIP); + } + return codeGenIP; }; @@ -3009,12 +3102,13 @@ LogicalResult OpenMPDialectLLVMIRTranslationInterface::convertOperation( .Case([&](omp::TargetOp) { return convertOmpTarget(*op, builder, moduleTranslation); }) - .Case([&](auto op) { - // No-op, should be handled by relevant owning operations e.g. - // TargetOp, EnterDataOp, ExitDataOp, DataOp etc. and then - // discarded - return success(); - }) + .Case( + [&](auto op) { + // No-op, should be handled by relevant owning operations e.g. + // TargetOp, EnterDataOp, ExitDataOp, DataOp etc. and then + // discarded + return success(); + }) .Default([&](Operation *inst) { return inst->emitError("unsupported OpenMP operation: ") << inst->getName(); diff --git a/mlir/test/Dialect/OpenMP/ops.mlir b/mlir/test/Dialect/OpenMP/ops.mlir index 65a704d18107b..0335e5c951f24 100644 --- a/mlir/test/Dialect/OpenMP/ops.mlir +++ b/mlir/test/Dialect/OpenMP/ops.mlir @@ -59,7 +59,7 @@ func.func @omp_parallel(%data_var : memref, %if_cond : i1, %num_threads : i // CHECK: omp.parallel num_threads(%{{.*}} : i32) allocate(%{{.*}} : memref -> %{{.*}} : memref) "omp.parallel"(%num_threads, %data_var, %data_var) ({ omp.terminator - }) {operandSegmentSizes = array} : (i32, memref, memref) -> () + }) {operandSegmentSizes = array} : (i32, memref, memref) -> () // CHECK: omp.barrier omp.barrier @@ -68,22 +68,22 @@ func.func @omp_parallel(%data_var : memref, %if_cond : i1, %num_threads : i // CHECK: omp.parallel if(%{{.*}}) allocate(%{{.*}} : memref -> %{{.*}} : memref) "omp.parallel"(%if_cond, %data_var, %data_var) ({ omp.terminator - }) {operandSegmentSizes = array} : (i1, memref, memref) -> () + }) {operandSegmentSizes = array} : (i1, memref, memref) -> () // test without allocate // CHECK: omp.parallel if(%{{.*}}) num_threads(%{{.*}} : i32) "omp.parallel"(%if_cond, %num_threads) ({ omp.terminator - }) {operandSegmentSizes = array} : (i1, i32) -> () + }) {operandSegmentSizes = array} : (i1, i32) -> () omp.terminator - }) {operandSegmentSizes = array, proc_bind_val = #omp} : (i1, i32, memref, memref) -> () + }) {operandSegmentSizes = array, proc_bind_val = #omp} : (i1, i32, memref, memref) -> () // test with multiple parameters for single variadic argument // CHECK: omp.parallel allocate(%{{.*}} : memref -> %{{.*}} : memref) "omp.parallel" (%data_var, %data_var) ({ omp.terminator - }) {operandSegmentSizes = array} : (memref, memref) -> () + }) {operandSegmentSizes = array} : (memref, memref) -> () return } diff --git a/mlir/test/Dialect/OpenMP/roundtrip.mlir b/mlir/test/Dialect/OpenMP/roundtrip.mlir new file mode 100644 index 0000000000000..c6e9fab6f7f98 --- /dev/null +++ b/mlir/test/Dialect/OpenMP/roundtrip.mlir @@ -0,0 +1,36 @@ +// RUN: fir-opt -verify-diagnostics %s | fir-opt | FileCheck %s + +// CHECK-LABEL: _QPprivate_clause +func.func @_QPprivate_clause() { + %0 = fir.alloca i32 {bindc_name = "x", uniq_name = "_QFprivate_clause_allocatableEx"} + %1 = fir.alloca i32 {bindc_name = "y", uniq_name = "_QFprivate_clause_allocatableEy"} + + // CHECK: omp.parallel private(@x.privatizer %0, @y.privatizer %1 : !fir.ref, !fir.ref) + omp.parallel private(@x.privatizer %0, @y.privatizer %1: !fir.ref, !fir.ref) { + omp.terminator + } + return +} + +// CHECK: "omp.private"() <{function_type = (!fir.ref) -> !fir.ref, sym_name = "x.privatizer"}> ({ +"omp.private"() <{function_type = (!fir.ref) -> !fir.ref, sym_name = "x.privatizer"}> ({ +// CHECK: ^bb0(%arg0: {{.*}}): +^bb0(%arg0: !fir.ref): + + // CHECK: %0 = fir.alloca i32 {bindc_name = "x", pinned, uniq_name = "_QFprivate_clause_allocatableEx"} + %0 = fir.alloca i32 {bindc_name = "x", pinned, uniq_name = "_QFprivate_clause_allocatableEx"} + + // CHECK: omp.yield(%0 : !fir.ref) + omp.yield(%0 : !fir.ref) +}) : () -> () + +// CHECK: "omp.private"() <{function_type = (!fir.ref) -> !fir.ref, sym_name = "y.privatizer"}> ({ +"omp.private"() <{function_type = (!fir.ref) -> !fir.ref, sym_name = "y.privatizer"}> ({ +^bb0(%arg0: !fir.ref): + + // CHECK: %0 = fir.alloca i32 {bindc_name = "y", pinned, uniq_name = "_QFprivate_clause_allocatableEy"} + %0 = fir.alloca i32 {bindc_name = "y", pinned, uniq_name = "_QFprivate_clause_allocatableEy"} + + // CHECK: omp.yield(%0 : !fir.ref) + omp.yield(%0 : !fir.ref) +}) : () -> ()