8000 Produce split FST files when using multiple threads · verilator/verilator@34eecc9 · GitHub
[go: up one dir, main page]

Skip to content

Commit 34eecc9

Browse files
committed
Produce split FST files when using multiple threads
1 parent 51a97cc commit 34eecc9

15 files changed

+426
-90
lines changed

include/verilated_fst_c.cpp

Lines changed: 165 additions & 41 deletions
Large diffs are not rendered by default.

include/verilated_fst_c.h

Lines changed: 14 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -26,12 +26,14 @@
2626

2727
#include <list>
2828
#include <map>
29+
#include <memory>
2930
#include <string>
3031
#include <vector>
3132

3233
typedef uint32_t vlFstHandle;
3334
typedef uint32_t vlFstEnumHandle;
3435

36+
class VerilatedFstWriter;
3537
class VerilatedFstBuffer;
3638

3739
//=============================================================================
@@ -49,11 +51,11 @@ class VerilatedFst final : public VerilatedTrace<VerilatedFst, VerilatedFstBuffe
4951
//=========================================================================
5052
// FST-specific internals
5153

52-
void* m_fst = nullptr;
54+
std::vector<VerilatedFstWriter*> m_writerps;
5355
std::map<uint32_t, vlFstHandle> m_code2symbol;
5456
std::map<int, vlFstEnumHandle> m_local2fstdtype;
5557
vlFstHandle* m_symbolp = nullptr; // same as m_code2symbol, but as an array
56-
char* m_strbufp = nullptr; // String buffer long enough to hold maxBits() chars
58+
std::vector<std::unique_ptr<char[]>> m_strbufps; // Buffers able to hold maxBits() chars
5759

5860
bool m_useFstWriterThread = false; // Whether to use the separate FST writer thread
5961

@@ -63,9 +65,9 @@ class VerilatedFst final : public VerilatedTrace<VerilatedFst, VerilatedFstBuffe
6365

6466
// CONSTRUCTORS
6567
VL_UNCOPYABLE(VerilatedFst);
66-
void declare(uint32_t code, const char* name, int dtypenum, VerilatedTraceSigDirection,
67-
VerilatedTraceSigKind, VerilatedTraceSigType, bool array, int arraynum,
68-
bool bussed, int msb, int lsb);
68+
void declare(uint32_t code, uint32_t fidx, const char* name, int dtypenum,
69+
VerilatedTraceSigDirection, VerilatedTraceSigKind, VerilatedTraceSigType,
70+
bool array, int arraynum, bool bussed, int msb, int lsb);
6971

7072
protected:
7173
//=========================================================================
@@ -101,7 +103,7 @@ class VerilatedFst final : public VerilatedTrace<VerilatedFst, VerilatedFstBuffe
101103
// Flush any remaining data to this file
102104
void flush() VL_MT_SAFE_EXCLUDES(m_mutex);
103105
// Return if file is open
104-
bool isOpen() const VL_MT_SAFE { return m_fst != nullptr; }
106+
bool isOpen() const VL_MT_SAFE { return !m_writerps.empty(); }
105107

106108
//=========================================================================
107109
// Internal interface to Verilator generated code
@@ -161,15 +163,17 @@ class VerilatedFstBuffer VL_NOT_FINAL {
161163
VerilatedFst& m_owner; // Trace file owning this buffer. Required by subclasses.
162164

163165
// The FST file handle
164-
void* const m_fst = m_owner.m_fst;
166+
VerilatedFstWriter& m_writer;
165167
// code to fstHande map, as an array
166168
const vlFstHandle* const m_symbolp = m_owner.m_symbolp;
167169
// String buffer long enough to hold maxBits() chars
168-
char* const m_strbufp = m_owner.m_strbufp;
170+
char* const m_strbufp;
169171

170172
// CONSTRUCTOR
171-
explicit VerilatedFstBuffer(VerilatedFst& owner)
172-
: m_owner{owner} {}
173+
explicit VerilatedFstBuffer(VerilatedFst& owner, uint32_t fidx)
174+
: m_owner{owner}
175+
, m_writer{*m_owner.m_writerps.at(fidx)}
176+
, m_strbufp{m_owner.m_strbufps.at(fidx).get()} {}
173177
virtual ~VerilatedFstBuffer() = default;
174178

175179
//=========================================================================

include/verilated_saif_c.cpp

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -509,11 +509,10 @@ void VerilatedSaif::popPrefix() {
509509
m_prefixStack.pop_back();
510510
}
511511

512-
void VerilatedSaif::declare(const uint32_t code, uint32_t fidx, const char* name,
512+
void VerilatedSaif::declare(const uint32_t code, uint32_t, const char* name,
513513
const char* wirep, const bool array, const int arraynum,
514514
const bool bussed, const int msb, const int lsb) {
515-
assert(m_activityAccumulators.size() > fidx);
516-
VerilatedSaifActivityAccumulator& accumulator = *m_activityAccumulators.at(fidx);
515+
VerilatedSaifActivityAccumulator& accumulator = *m_activityAccumulators.at(0);
517516

518517
const int bits = ((msb > lsb) ? (msb - lsb) : (lsb - msb)) + 1;
519518

@@ -569,7 +568,7 @@ void VerilatedSaif::declDouble(const uint32_t code, const uint32_t fidx, const c
569568
//=============================================================================
570569
// Get/commit trace buffer
571570

572-
VerilatedSaif::Buffer* VerilatedSaif::getTraceBuffer(uint32_t fidx) { return new Buffer{*this}; }
571+
VerilatedSaif::Buffer* VerilatedSaif::getTraceBuffer(uint32_t fidx) { return new Buffer{*this, 0}; }
573572

574573
void VerilatedSaif::commitTraceBuffer(VerilatedSaif::Buffer* bufp) { delete bufp; }
575574

include/verilated_saif_c.h

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -193,9 +193,6 @@ class VerilatedSaifBuffer VL_NOT_FINAL {
193193
uint32_t m_fidx; // Index of target activity accumulator
194194

195195
// CONSTRUCTORS
196-
explicit VerilatedSaifBuffer(VerilatedSaif& owner)
197-
: m_owner{owner}
198-
, m_fidx{0} {}
199196
explicit VerilatedSaifBuffer(VerilatedSaif& owner, uint32_t fidx)
200197
: m_owner{owner}
201198
, m_fidx{fidx} {}

include/verilated_trace.h

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -175,11 +175,14 @@ class VerilatedTraceConfig final {
175175
const bool m_useParallel; // Use parallel tracing
176176
const bool m_useOffloading; // Offloading trace rendering
177177
const bool m_useFstWriterThread; // Use the separate FST writer thread
178+
const uint32_t m_nSplits; // Number of split trace files to use
178179

179-
VerilatedTraceConfig(bool useParallel, bool useOffloading, bool useFstWriterThread)
180+
VerilatedTraceConfig(bool useParallel, bool useOffloading, bool useFstWriterThread,
181+
uint32_t nSplits)
180182
: m_useParallel{useParallel}
181183
, m_useOffloading{useOffloading}
182-
, m_useFstWriterThread{useFstWriterThread} {}
184+
, m_useFstWriterThread{useFstWriterThread}
185+
, m_nSplits{nSplits} {}
183186
};
184187

185188
//=============================================================================
@@ -251,10 +254,11 @@ class VerilatedTrace VL_NOT_FINAL {
251254

252255
bool m_offload = false; // Use the offload thread
253256
bool m_parallel = false; // Use parallel tracing
257+
uint32_t m_nSplits = 1; // Number of split tracefiles to use
254258

255259
struct ParallelWorkerData final {
256260
const dumpCb_t m_cb; // The callback
257-
void* const m_userp; // The use pointer to pass to the callback
261+
void* const m_userp; // The user pointer to pass to the callback
258262
Buffer* const m_bufp; // The buffer pointer to pass to the callback
259263
std::atomic<bool> m_ready{false}; // The ready flag
260264
mutable VerilatedMutex m_mutex; // Mutex for suspension until ready
@@ -374,6 +378,10 @@ class VerilatedTrace VL_NOT_FINAL {
374378

375379
6377 bool offload() const { return m_offload; }
376380
bool parallel() const { return m_parallel; }
381+
bool split() const { return m_nSplits > 1; }
382+
uint32_t nSplits() const { return m_nSplits; }
383+
384+
VerilatedContext* contextp() const { return m_contextp; }
377385

378386
// Return last ' ' separated word. Assumes string does not end in ' '.
379387
static std::string lastWord(const std::string& str) {
@@ -458,7 +466,7 @@ class VerilatedTraceBuffer VL_NOT_FINAL : public T_Buffer {
458466
uint32_t* const m_sigs_oldvalp; // Previous value store
459467
EData* const m_sigs_enabledp; // Bit vector of enabled codes (nullptr = all on)
460468

461-
explicit VerilatedTraceBuffer(Trace& owner);
469+
explicit VerilatedTraceBuffer(Trace& owner, uint32_t fidx);
462470
~VerilatedTraceBuffer() override = default;
463471

464472
public:

include/verilated_trace_imp.h

Lines changed: 17 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727
#include "verilated_intrinsics.h"
2828
#include "verilated_trace.h"
2929
#include "verilated_threads.h"
30+
#include <algorithm>
3031
#include <list>
3132

3233
#if 0
@@ -468,26 +469,31 @@ VL_ATTR_NOINLINE void VerilatedTrace<VL_SUB_T, VL_BUF_T>::ParallelWorkerData::wa
468469

469470
template <>
470471
void VerilatedTrace<VL_SUB_T, VL_BUF_T>::runCallbacks(const std::vector<CallbackRecord>& cbVec) {
471-
if (parallel()) {
472+
if (parallel() || split()) {
472473
// If tracing in parallel, dispatch to the thread pool
473474
VlThreadPool* threadPoolp = static_cast<VlThreadPool*>(m_contextp->threadPoolp());
474475
// List of work items for thread (std::list, as ParallelWorkerData is not movable)
475476
std::list<ParallelWorkerData> workerData;
476477
// We use the whole pool + the main thread
477-
const unsigned threads = threadPoolp->numThreads() + 1;
478+
const unsigned threads = [&]() {
479+
const unsigned maxThreads = threadPoolp->numThreads() + 1;
480+
if (split()) return std::min(maxThreads, nSplits());
481+
return maxThreads;
482+
}();
478483
// Main thread executes all jobs with index % threads == 0
479484
std::vector<ParallelWorkerData*> mainThreadWorkerData;
480485
// Enqueue all the jobs
481486
for (const CallbackRecord& cbr : cbVec) {
487+
const unsigned idx = cbr.m_fidx % threads;
482488
// Always get the trace buffer on the main thread
483-
Buffer* const bufp = getTraceBuffer(cbr.m_fidx);
489+
Buffer* const bufp = getTraceBuffer(idx);
484490
// Create new work item
485491
workerData.emplace_back(cbr.m_dumpCb, cbr.m_userp, bufp);
486492
// Grab the new work item
487493
ParallelWorkerData* const itemp = &workerData.back();
488494
// Enqueue task to thread pool, or main thread
489-
if (unsigned rem = cbr.m_fidx % threads) {
490-
threadPoolp->workerp(rem - 1)->addTask(parallelWorkerTask, itemp);
495+
if (idx) {
496+
threadPoolp->workerp(idx - 1)->addTask(parallelWorkerTask, itemp);
491497
} else {
492498
mainThreadWorkerData.push_back(itemp);
493499
}
@@ -507,6 +513,7 @@ void VerilatedTrace<VL_SUB_T, VL_BUF_T>::runCallbacks(const std::vector<Callback
507513
// Done
508514
return;
509515
}
516+
510517
// Fall back on sequential execution
511518
for (const CallbackRecord& cbr : cbVec) {
512519
Buffer* const traceBufferp = getTraceBuffer(cbr.m_fidx);
@@ -662,6 +669,8 @@ void VerilatedTrace<VL_SUB_T, VL_BUF_T>::addModel(VerilatedModel* modelp)
662669
m_offload = configp->m_useOffloading;
663670
// If at least one model requests parallel tracing, then use it
664671
m_parallel |= configp->m_useParallel;
672+
// Use as man splits as requried be the largest model
673+
m_nSplits = configp->m_nSplits > m_nSplits ? configp->m_nSplits : m_nSplits;
665674

666675
if (VL_UNCOVERABLE(m_parallel && m_offload)) { // LCOV_EXCL_START
667676
VL_FATAL_MT(__FILE__, __LINE__, "", "Cannot use parallel tracing with offloading");
@@ -812,8 +821,8 @@ static inline void cvtQDataToStr(char* dstp, QData value) {
812821
// VerilatedTraceBuffer
813822

814823
template <>
815-
VerilatedTraceBuffer<VL_BUF_T>::VerilatedTraceBuffer(Trace& owner)
816-
: VL_BUF_T{owner}
824+
VerilatedTraceBuffer<VL_BUF_T>::VerilatedTraceBuffer(Trace& owner, uint32_t fidx)
825+
: VL_BUF_T{owner, fidx}
817826
, m_sigs_oldvalp{owner.m_sigs_oldvalp}
818827
, m_sigs_enabledp{owner.m_sigs_enabledp} {}
819828

@@ -898,7 +907,7 @@ void VerilatedTraceBuffer<VL_BUF_T>::fullDouble(uint32_t* oldp, double newval) {
898907

899908
template <>
900909
VerilatedTraceOffloadBuffer<VL_BUF_T>::VerilatedTraceOffloadBuffer(VL_SUB_T& owner)
901-
: VerilatedTraceBuffer<VL_BUF_T>{owner}
910+
: VerilatedTraceBuffer<VL_BUF_T>{owner, 0}
902911
, m_offloadBufferWritep{owner.m_offloadBufferWritep}
903912
, m_offloadBufferEndp{owner.m_offloadBufferEndp} {
904913
if (m_offloadBufferWritep) {

include/verilated_vcd_c.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -465,7 +465,7 @@ void VerilatedVcd::declDouble(uint32_t code, uint32_t fidx, const char* name, in
465465
// Get/commit trace buffer
466466

467467
VerilatedVcd::Buffer* VerilatedVcd::getTraceBuffer(uint32_t fidx) {
468-
VerilatedVcd::Buffer* const bufp = new Buffer{*this};
468+
VerilatedVcd::Buffer* const bufp = new Buffer{*this, fidx};
469469
if (parallel()) {
470470
// Note: This is called from VerilatedVcd::dump, which already holds the lock
471471
// If no buffer available, allocate a new one

include/verilated_vcd_c.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -206,7 +206,7 @@ class VerilatedVcdBuffer VL_NOT_FINAL {
206206
void finishLine(uint32_t code, char* writep);
207207

208208
// CONSTRUCTOR
209-
explicit VerilatedVcdBuffer(VerilatedVcd& owner)
209+
explicit VerilatedVcdBuffer(VerilatedVcd& owner, uint32_t fidx)
210210
: m_owner{owner} {}
211211
virtual ~VerilatedVcdBuffer() = default;
212212

src/V3EmitCModel.cpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -521,7 +521,9 @@ class EmitCModel final : public EmitCFunc {
521521
puts("return std::unique_ptr<VerilatedTraceConfig>{new VerilatedTraceConfig{");
522522
puts(v3Global.opt.useTraceParallel() ? "true" : "false");
523523
puts(v3Global.opt.useTraceOffload() ? ", true" : ", false");
524-
puts(v3Global.opt.useFstWriterThread() ? ", true" : ", false");
524+
puts(", false"); //v3Global.opt.useFstWriterThread() ? ", true" : ", false");
525+
const uint32_t nSplits = v3Global.opt.traceFormat().vcd() ? 1 : v3Global.opt.threads();
526+
puts(", " + std::to_string(nSplits));
525527
puts("}};\n");
526528
puts("};\n");
527529
}

src/V3Options.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -612,7 +612,7 @@ class V3Options final {
612612
int traceMaxArray() const { return m_traceMaxArray; }
613613
int traceMaxWidth() const { return m_traceMaxWidth; }
614614
int traceThreads() const { return m_traceThreads; }
615-
bool useTraceOffload() const { return trace() && traceFormat().fst() && traceThreads() > 1; }
615+
bool useTraceOffload() const { return false; }
616616
bool useTraceParallel() const {
617617
return trace() && traceFormat().vcd() && (threads() > 1 || hierChild() > 1);
618618
}

0 commit comments

Comments
 (0)
0