8000 Hierarchical scheduling visualization · verilator/verilator@af6a8f4 · GitHub
[go: up one dir, main page]

Skip to content

Commit af6a8f4

Browse files
committed
Hierarchical scheduling visualization
Signed-off-by: Bartłomiej Chmiel <bchmiel@antmicro.com>
1 parent 6b42d78 commit af6a8f4

File tree

2 files changed

+114
-43
lines changed

2 files changed

+114
-43
lines changed

src/V3ExecGraph.cpp

Lines changed: 113 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -97,7 +97,8 @@ class ThreadSchedule final {
9797
VL_UNCOPYABLE(ThreadSchedule);
9898

9999
// Debugging
100-
void dumpDotFile(const V3Graph& graph, const string& filename) const {
100+
static void dumpDotFile(const std::vector<ThreadSchedule>& schedules, const V3Graph& graph,
101+
const string& filename, uint32_t nThreads) {
101102
// This generates a file used by graphviz, https://www.graphviz.org
102103
const std::unique_ptr<std::ofstream> logp{V3File::new_ofstream(filename)};
103104
if (logp->fail()) v3fatal("Can't write file: " << filename);
@@ -109,52 +110,122 @@ class ThreadSchedule final {
109110

110111
// Thread labels
111112
*logp << "\n // Threads\n";
112-
const int threadBoxWidth = 2;
113-
for (int i = 0; i < v3Global.opt.threads(); i++) {
114-
*logp << " t" << i << " [label=\"Thread " << i << "\" width=" << threadBoxWidth
115-
<< " pos=\"" << (-threadBoxWidth / 2) << "," << -i
116-
<< "!\" style=\"filled\" fillcolor=\"grey\"] \n";
113+
114+
const auto emitBlock
115+
= [&](const string& name, const string& label, double width, double height,
116+
double xPos, double yPos, const string& fillColor) {
117+
*logp << " " << name << " [label=\"" << label << "\" width=" << width
118+
<< " height=" << height << " pos=\"" << xPos << "," << yPos
119+
<< "!\" style=\"filled\" fillcolor=\"" << fillColor << "\"]\n";
120+
};
121+
122+
constexpr double threadBoxWidth = 2.0;
123+
constexpr double threadBoxHeight = 1.5;
124+
constexpr double horizontalGap = threadBoxWidth / 2;
125+
for (uint32_t i = 0; i < nThreads; ++i) {
126+
const string name = "t" + std::to_string(i);
127+
const string label = "Thread " + std::to_string(i);
128+
constexpr double posX = -horizontalGap;
129+
const double posY = -i * threadBoxHeight;
130+
emitBlock(name, label, threadBoxWidth, threadBoxHeight, posX, posY, "grey");
117131
}
118132

119133
// MTask nodes
120134
*logp << "\n // MTasks\n";
121135

122-
// Find minimum cost MTask for scaling MTask node widths
123-
uint32_t minCost = UINT32_MAX;
124-
for (const V3GraphVertex& vtx : graph.vertices()) {
125-
if (const ExecMTask* const mtaskp = vtx.cast<const ExecMTask>()) {
126-
minCost = minCost > mtaskp->cost() ? mtaskp->cost() : minCost;
136+
const auto emitMTask = [&](const ExecMTask* const mtaskp, int index,
137+
const ThreadSchedule& schedule) {
138+
for (int i = 0; i < mtaskp->threads(); ++i) {
139+
// Keep original name for the original thread of hierarchical task to keep
140+
// dependency tracking, add '_' for the rest to differentiate them.
141+
const string name
142+
= i == 0 ? mtaskp->name() : mtaskp->name() + '_' + std::to_string(i);
143+
const string label = mtaskp->name() + " (" + std::to_string(startTime(mtaskp))
144+
+ ':' + std::to_string(endTime(mtaskp)) + ')'
145+
+ "\\ncost=" + std::to_string(mtaskp->cost())
146+
+ "\\npriority=" + std::to_string(mtaskp->priority());
147+
const double xPos = (threadBoxWidth + horizontalGap) * index + horizontalGap;
148+
const double yPos
149+
= -threadBoxHeight
150+
* static_cast<double>(threadId(mtaskp) + i * schedule.threads.size());
151+
const string fillColor = i == 0 ? "white" : "lightgreen";
152+
emitBlock(name, label, threadBoxWidth, threadBoxHeight, xPos, yPos, fillColor);
127153
}
128-
}
129-
const double minWidth = 2.0;
130-
const auto mtaskXPos = [&](const ExecMTask* mtaskp, const double nodeWidth) {
131-
const double startPosX = (minWidth * startTime(mtaskp)) / minCost;
132-
return nodeWidth / minWidth + startPosX;
133154
};
134155

135-
const auto emitMTask = [&](const ExecMTask* mtaskp) {
136-
const int thread = threadId(mtaskp);
137-
const double nodeWidth = minWidth * (static_cast<double>(mtaskp->cost()) / minCost);
138-
const double x = mtaskXPos(mtaskp, nodeWidth);
139-
const int y = -thread;
140-
const string label = "label=\"" + mtaskp->name() + " (" + cvtToStr(startTime(mtaskp))
141-
+ ":" + std::to_string(endTime(mtaskp)) + ")" + "\"";
142-
*logp << " " << mtaskp->name() << " [" << label << " width=" << nodeWidth << " pos=\""
143-
<< x << "," << y << "!\"]\n";
156+
const auto emitFork = [&](int index) {
157+
const string& name = "fork_" + std::to_string(index);
158+
constexpr double width = threadBoxWidth / 8;
159+
const double height = threadBoxHeight * nThreads;
160+
const double xPos = index * (threadBoxWidth + horizontalGap) - horizontalGap / 2;
161+
const double yPos
162+
= -static_cast<double>(nThreads) / 2 * threadBoxHeight + threadBoxHeight / 2;
163+
emitBlock(name, "", width, height, xPos, yPos, "black");
144164
};
145165

146-
// Emit MTasks
147-
for (const V3GraphVertex& vtx : graph.vertices()) {
148-
if (const ExecMTask* const mtaskp = vtx.cast<const ExecMTask>()) emitMTask(mtaskp);
149-
}
166+
// Create columns of tasks whose execution intervals overlaps
167+
int offset = 0;
168+
for (const ThreadSchedule& schedule : schedules) {
169+
std::vector<std::vector<const ExecMTask*>> columns = {{}};
170+
171+
// Order tasks based on their start time
172+
struct Cmp final {
173+
bool operator()(const ExecMTask* const a, const ExecMTask* const b) const {
174+
if (startTime(a) == startTime(b)) return threadId(a) < threadId(b);
175+
return startTime(a) < startTime(b);
176+
}
177+
};
178+
const std::multiset<const ExecMTask*, Cmp> tasks(schedule.mtasks.begin(),
179+
schedule.mtasks.end());
180+
UASSERT(!tasks.empty(), "Thread schedule should have tasks");
181+
182+
for (const ExecMTask* const mtaskp : tasks) {
183+
std::vector<const ExecMTask*>& column = columns.back();
184+
UASSERT(column.size() <= nThreads, "Invalid partitioning");
185+
bool intersects = true;
186+
for (const ExecMTask* const earlierMtask : column) {
187+
if (endTime(mtaskp) <= startTime(earlierMtask)
188+
|| startTime(mtaskp) >= endTime(earlierMtask)) {
189+
intersects = false;
190+
break;
191+
}
192+
}
193+
if (intersects) {
194+
column.emplace_back(mtaskp);
195+
} else {
196+
columns.emplace_back(std::vector<const ExecMTask*>{mtaskp});
197+
}
198+
}
199+
200+
UASSERT(!columns.front().empty(), "Should be populated by mtasks");
201+
202+
for (const auto& column : columns) {
203+
for (const ExecMTask* const mtask : column) emitMTask(mtask, offset, schedule);
204+
++offset;
205+
}
206+
emitFork(offset);
207+
208+
// Emit MTask dependency edges
209+
*logp << "\n // MTask dependencies\n";
150210

151-
// Emit MTask dependency edges
152-
*logp << "\n // MTask dependencies\n";
153-
for (const V3GraphVertex& vtx : graph.vertices()) {
154-
if (const ExecMTask* const mtaskp = vtx.cast<const ExecMTask>()) {
155-
for (const V3GraphEdge& edge : mtaskp->outEdges()) {
156-
const V3GraphVertex* const top = edge.top();
157-
*logp << " " << vtx.name() << " -> " << top->name() << "\n";
211+
for (const std::vector<const ExecMTask*>& thread : schedule.threads) {
212+
if (thread.empty()) break; // No more threads
213+
214+
// Show that schedule ends when all tasks are finished
215+
*logp << " " << thread.back()->name() << " -> fork_" << offset << "\n";
216+
217+
// Show that tasks from the same thread are executed in a sequence
218+
for (size_t i = 1; i < thread.size(); ++i)
219+
*logp << " " << thread[i - 1]->name() << " -> " << thread[i]->name() << "\n";
220+
221+
// Emit cross-task dependencies
222+
for (const ExecMTask* const mtaskp : thread) {
223+
for (const V3GraphEdge& edge : mtaskp->outEdges()) {
224+
const ExecMTask* const topMTaskp = edge.top()->cast<const ExecMTask>();
225+
if (topMTaskp && schedule.contains(topMTaskp)
226+
&& threadId(topMTaskp) != threadId(mtaskp))
227+
*logp << " " << mtaskp->name() << " -> " << topMTaskp->name() << "\n";
228+
}
158229
}
159230
}
160231
}
@@ -165,8 +236,10 @@ class ThreadSchedule final {
165236
}
166237

167238
// Variant of dumpDotFilePrefixed without --dump option check
168-
void dumpDotFilePrefixedAlways(const V3Graph& graph, const string& nameComment) const {
169-
dumpDotFile(graph, v3Global.debugFilename(nameComment) + ".dot");
239+
static void dumpDotFilePrefixedAlways(const std::vector<ThreadSchedule>& schedules,
240+
const V3Graph& graph, const string& nameComment,
241+
uint32_t nThreads) {
242+
dumpDotFile(schedules, graph, v3Global.debugFilename(nameComment) + ".dot", nThreads);
170243
}
171244

172245
public:
@@ -424,7 +497,8 @@ class PackThreads final {
424497
}
425498

426499
// All schedules are combined on a single graph
427-
if (dumpGraphLevel() >= 4) result.back().dumpDotFilePrefixedAlways(mtaskGraph, "schedule");
500+
if (dumpGraphLevel() >= 4)
501+
ThreadSchedule::dumpDotFilePrefixedAlways(result, mtaskGraph, "schedule", m_nThreads);
428502

429503
return result;
430504
}

src/V3OrderParallel.cpp

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1749,10 +1749,7 @@ class DpiThreadsVisitor final : public VNVisitorConst {
17491749
m_threads = std::max(m_threads, V3Config::getHierWorkers(nodep->cname()));
17501750
iterateChildrenConst(nodep);
17511751
}
1752-
void visit(AstNodeCCall* nodep) override {
1753-
iterateChildrenConst(nodep);
1754-
iterateConst(nodep->funcp());
1755-
}
1752+
void visit(AstNodeCCall* nodep) override { iterateConst(nodep->funcp()); }
17561753
void visit(AstNode* nodep) override { iterateChildrenConst(nodep); }
17571754

17581755
public:

0 commit comments

Comments
 (0)
0