@@ -97,7 +97,8 @@ class ThreadSchedule final {
97
97
VL_UNCOPYABLE (ThreadSchedule);
98
98
99
99
// Debugging
100
- void dumpDotFile (const V3Graph& graph, const string& filename) const {
100
+ static void dumpDotFile (const std::vector<ThreadSchedule>& schedules, const V3Graph& graph,
101
+ const string& filename, uint32_t nThreads) {
101
102
// This generates a file used by graphviz, https://www.graphviz.org
102
103
const std::unique_ptr<std::ofstream> logp{V3File::new_ofstream (filename)};
103
104
if (logp->fail ()) v3fatal (" Can't write file: " << filename);
@@ -109,52 +110,122 @@ class ThreadSchedule final {
109
110
110
111
// Thread labels
111
112
*logp << " \n // Threads\n " ;
112
- const int threadBoxWidth = 2 ;
113
- for (int i = 0 ; i < v3Global.opt .threads (); i++) {
114
- *logp << " t" << i << " [label=\" Thread " << i << " \" width=" << threadBoxWidth
115
- << " pos=\" " << (-threadBoxWidth / 2 ) << " ," << -i
116
- << " !\" style=\" filled\" fillcolor=\" grey\" ] \n " ;
113
+
114
+ const auto emitBlock
115
+ = [&](const string& name, const string& label, double width, double height,
116
+ double xPos, double yPos, const string& fillColor) {
117
+ *logp << " " << name << " [label=\" " << label << " \" width=" << width
118
+ << " height=" << height << " pos=\" " << xPos << " ," << yPos
119
+ << " !\" style=\" filled\" fillcolor=\" " << fillColor << " \" ]\n " ;
120
+ };
121
+
122
+ constexpr double threadBoxWidth = 2.0 ;
123
+ constexpr double threadBoxHeight = 1.5 ;
124
+ constexpr double horizontalGap = threadBoxWidth / 2 ;
125
+ for (uint32_t i = 0 ; i < nThreads; ++i) {
126
+ const string name = " t" + std::to_string (i);
127
+ const string label = " Thread " + std::to_string (i);
128
+ constexpr double posX = -horizontalGap;
129
+ const double posY = -i * threadBoxHeight;
130
+ emitBlock (name, label, threadBoxWidth, threadBoxHeight, posX, posY, " grey" );
117
131
}
118
132
119
133
// MTask nodes
120
134
*logp << " \n // MTasks\n " ;
121
135
122
- // Find minimum cost MTask for scaling MTask node widths
123
- uint32_t minCost = UINT32_MAX;
124
- for (const V3GraphVertex& vtx : graph.vertices ()) {
125
- if (const ExecMTask* const mtaskp = vtx.cast <const ExecMTask>()) {
126
- minCost = minCost > mtaskp->cost () ? mtaskp->cost () : minCost;
136
+ const auto emitMTask = [&](const ExecMTask* const mtaskp, int index ,
137
+ const ThreadSchedule& schedule) {
138
+ for (int i = 0 ; i < mtaskp->threads (); ++i) {
139
+ // Keep original name for the original thread of hierarchical task to keep
140
+ // dependency tracking, add '_' for the rest to differentiate them.
141
+ const string name
142
+ = i == 0 ? mtaskp->name () : mtaskp->name () + ' _' + std::to_string (i);
143
+ const string label = mtaskp->name () + " (" + std::to_string (startTime (mtaskp))
144
+ + ' :' + std::to_string (endTime (mtaskp)) + ' )'
145
+ + " \\ ncost=" + std::to_string (mtaskp->cost ())
146
+ + " \\ npriority=" + std::to_string (mtaskp->priority ());
147
+ const double xPos = (threadBoxWidth + horizontalGap) * index + horizontalGap;
148
+ const double yPos
149
+ = -threadBoxHeight
150
+ * static_cast <double >(threadId (mtaskp) + i * schedule.threads .size ());
151
+ const string fillColor = i == 0 ? " white" : " lightgreen" ;
152
+ emitBlock (name, label, threadBoxWidth, threadBoxHeight, xPos, yPos, fillColor);
127
153
}
128
- }
129
- const double minWidth = 2.0 ;
130
- const auto mtaskXPos = [&](const ExecMTask* mtaskp, const double nodeWidth) {
131
- const double startPosX = (minWidth * startTime (mtaskp)) / minCost;
132
- return nodeWidth / minWidth + startPosX;
133
154
};
134
155
135
- const auto emitMTask = [&](const ExecMTask* mtaskp) {
136
- const int thread = threadId (mtaskp);
137
- const double nodeWidth = minWidth * (static_cast <double >(mtaskp->cost ()) / minCost);
138
- const double x = mtaskXPos (mtaskp, nodeWidth);
139
- const int y = -thread;
140
- const string label = " label=\" " + mtaskp->name () + " (" + cvtToStr (startTime (mtaskp))
141
- + " :" + std::to_string (endTime (mtaskp)) + " )" + " \" " ;
142
- *logp << " " << mtaskp->name () << " [" << label << " width=" << nodeWidth << " pos=\" "
143
- << x << " ," << y << " !\" ]\n " ;
156
+ const auto emitFork = [&](int index ) {
157
+ const string& name = " fork_" + std::to_string (index );
158
+ constexpr double width = threadBoxWidth / 8 ;
159
+ const double height = threadBoxHeight * nThreads;
160
+ const double xPos = index * (threadBoxWidth + horizontalGap) - horizontalGap / 2 ;
161
+ const double yPos
162
+ = -static_cast <double >(nThreads) / 2 * threadBoxHeight + threadBoxHeight / 2 ;
163
+ emitBlock (name, " " , width, height, xPos, yPos, " black" );
144
164
};
145
165
146
- // Emit MTasks
147
- for (const V3GraphVertex& vtx : graph.vertices ()) {
148
- if (const ExecMTask* const mtaskp = vtx.cast <const ExecMTask>()) emitMTask (mtaskp);
149
- }
166
+ // Create columns of tasks whose execution intervals overlaps
167
+ int offset = 0 ;
168
+ for (const ThreadSchedule& schedule : schedules) {
169
+ std::vector<std::vector<const ExecMTask*>> columns = {{}};
170
+
171
+ // Order tasks based on their start time
172
+ struct Cmp final {
173
+ bool operator ()(const ExecMTask* const a, const ExecMTask* const b) const {
174
+ if (startTime (a) == startTime (b)) return threadId (a) < threadId (b);
175
+ return startTime (a) < startTime (b);
176
+ }
177
+ };
178
+ const std::multiset<const ExecMTask*, Cmp> tasks (schedule.mtasks .begin (),
179
+ schedule.mtasks .end ());
180
+ UASSERT (!tasks.empty (), " Thread schedule should have tasks" );
181
+
182
+ for (const ExecMTask* const mtaskp : tasks) {
183
+ std::vector<const ExecMTask*>& column = columns.back ();
184
+ UASSERT (column.size () <= nThreads, " Invalid partitioning" );
185
+ bool intersects = true ;
186
+ for (const ExecMTask* const earlierMtask : column) {
187
+ if (endTime (mtaskp) <= startTime (earlierMtask)
188
+ || startTime (mtaskp) >= endTime (earlierMtask)) {
189
+ intersects = false ;
190
+ break ;
191
+ }
192
+ }
193
+ if (intersects) {
194
+ column.emplace_back (mtaskp);
195
+ } else {
196
+ columns.emplace_back (std::vector<const ExecMTask*>{mtaskp});
197
+ }
198
+ }
199
+
200
+ UASSERT (!columns.front ().empty (), " Should be populated by mtasks" );
201
+
202
+ for (const auto & column : columns) {
203
+ for (const ExecMTask* const mtask : column) emitMTask (mtask, offset, schedule);
204
+ ++offset;
205
+ }
206
+ emitFork (offset);
207
+
208
+ // Emit MTask dependency edges
209
+ *logp << " \n // MTask dependencies\n " ;
150
210
151
- // Emit MTask dependency edges
152
- *logp << " \n // MTask dependencies\n " ;
153
- for (const V3GraphVertex& vtx : graph.vertices ()) {
154
- if (const ExecMTask* const mtaskp = vtx.cast <const ExecMTask>()) {
155
- for (const V3GraphEdge& edge : mtaskp->outEdges ()) {
156
- const V3GraphVertex* const top = edge.top ();
157
- *logp << " " << vtx.name () << " -> " << top->name () << " \n " ;
211
+ for (const std::vector<const ExecMTask*>& thread : schedule.threads ) {
212
+ if (thread.empty ()) break ; // No more threads
213
+
214
+ // Show that schedule ends when all tasks are finished
215
+ *logp << " " << thread.back ()->name () << " -> fork_" << offset << " \n " ;
216
+
217
+ // Show that tasks from the same thread are executed in a sequence
218
+ for (size_t i = 1 ; i < thread.size (); ++i)
219
+ *logp << " " << thread[i - 1 ]->name () << " -> " << thread[i]->name () << " \n " ;
220
+
221
+ // Emit cross-task dependencies
222
+ for (const ExecMTask* const mtaskp : thread) {
223
+ for (const V3GraphEdge& edge : mtaskp->outEdges ()) {
224
+ const ExecMTask* const topMTaskp = edge.top ()->cast <const ExecMTask>();
225
+ if (topMTaskp && schedule.contains (topMTaskp)
226
+ && threadId (topMTaskp) != threadId (mtaskp))
227
+ *logp << " " << mtaskp->name () << " -> " << topMTaskp->name () << " \n " ;
228
+ }
158
229
}
159
230
}
160
231
}
@@ -165,8 +236,10 @@ class ThreadSchedule final {
165
236
}
166
237
167
238
// Variant of dumpDotFilePrefixed without --dump option check
168
- void dumpDotFilePrefixedAlways (const V3Graph& graph, const string& nameComment) const {
169
- dumpDotFile (graph, v3Global.debugFilename (nameComment) + " .dot" );
239
+ static void dumpDotFilePrefixedAlways (const std::vector<ThreadSchedule>& schedules,
240
+ const V3Graph& graph, const string& nameComment,
241
+ uint32_t nThreads) {
242
+ dumpDotFile (schedules, graph, v3Global.debugFilename (nameComment) + " .dot" , nThreads);
170
243
}
171
244
172
245
public:
@@ -424,7 +497,8 @@ class PackThreads final {
424
497
}
425
498
426
499
// All schedules are combined on a single graph
427
- if (dumpGraphLevel () >= 4 ) result.back ().dumpDotFilePrefixedAlways (mtaskGraph, " schedule" );
500
+ if (dumpGraphLevel () >= 4 )
501
+ ThreadSchedule::dumpDotFilePrefixedAlways (result, mtaskGraph, " schedule" , m_nThreads);
428
502
429
503
return result;
430
504
}
0 commit comments