8000 Produce split FST files when using multiple threads by gezalore · Pull Request #5806 · verilator/verilator · GitHub
[go: up one dir, main page]

Skip to content

Produce split FST files when using multiple threads #5806

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 2 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
206 changes: 165 additions & 41 deletions include/verilated_fst_c.cpp

Large diffs are not rendered by default.

24 changes: 14 additions & 10 deletions include/verilated_fst_c.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,12 +26,14 @@

#include <list>
#include <map>
#include <memory>
#include <string>
#include <vector>

typedef uint32_t vlFstHandle;
typedef uint32_t vlFstEnumHandle;

class VerilatedFstWriter;
class VerilatedFstBuffer;

//=============================================================================
Expand All @@ -49,11 +51,11 @@ class VerilatedFst final : public VerilatedTrace<VerilatedFst, VerilatedFstBuffe
//=========================================================================
// FST-specific internals

void* m_fst = nullptr;
std::vector<VerilatedFstWriter*> m_writerps;
std::map<uint32_t, vlFstHandle> m_code2symbol;
std::map<int, vlFstEnumHandle> m_local2fstdtype;
vlFstHandle* m_symbolp = nullptr; // same as m_code2symbol, but as an array
char* m_strbufp = nullptr; // String buffer long enough to hold maxBits() chars
std::vector<std::unique_ptr<char[]>> m_strbufps; // Buffers able to hold maxBits() chars

bool m_useFstWriterThread = false; // Whether to use the separate FST writer thread

Expand All @@ -63,9 +65,9 @@ class VerilatedFst final : public VerilatedTrace<VerilatedFst, VerilatedFstBuffe

// CONSTRUCTORS
VL_UNCOPYABLE(VerilatedFst);
void declare(uint32_t code, const char* name, int dtypenum, VerilatedTraceSigDirection,
VerilatedTraceSigKind, VerilatedTraceSigType, bool array, int arraynum,
bool bussed, int msb, int lsb);
void declare(uint32_t code, uint32_t fidx, const char* name, int dtypenum,
VerilatedTraceSigDirection, VerilatedTraceSigKind, VerilatedTraceSigType,
bool array, int arraynum, bool bussed, int msb, int lsb);

protected:
//=========================================================================
Expand Down Expand Up @@ -101,7 +103,7 @@ class VerilatedFst final : public VerilatedTrace<VerilatedFst, VerilatedFstBuffe
// Flush any remaining data to this file
void flush() VL_MT_SAFE_EXCLUDES(m_mutex);
// Return if file is open
bool isOpen() const VL_MT_SAFE { return m_fst != nullptr; }
bool isOpen() const VL_MT_SAFE { return !m_writerps.empty(); }

//=========================================================================
// Internal interface to Verilator generated code
Expand Down Expand Up @@ -161,15 +163,17 @@ class VerilatedFstBuffer VL_NOT_FINAL {
VerilatedFst& m_owner; // Trace file owning this buffer. Required by subclasses.

// The FST file handle
void* const m_fst = m_owner.m_fst;
VerilatedFstWriter& m_writer;
// code to fstHande map, as an array
const vlFstHandle* const m_symbolp = m_owner.m_symbolp;
// String buffer long enough to hold maxBits() chars
char* const m_strbufp = m_owner.m_strbufp;
char* const m_strbufp;

// CONSTRUCTOR
explicit VerilatedFstBuffer(VerilatedFst& owner)
: m_owner{owner} {}
explicit VerilatedFstBuffer(VerilatedFst& owner, uint32_t fidx)
: m_owner{owner}
, m_writer{*m_owner.m_writerps.at(fidx)}
, m_strbufp{m_owner.m_strbufps.at(fidx).get()} {}
virtual ~VerilatedFstBuffer() = default;

//=========================================================================
Expand Down
13 changes: 7 additions & 6 deletions include/verilated_saif_c.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -509,11 +509,10 @@ void VerilatedSaif::popPrefix() {
m_prefixStack.pop_back();
}

void VerilatedSaif::declare(const uint32_t code, uint32_t fidx, const char* name,
const char* wirep, const bool array, const int arraynum,
const bool bussed, const int msb, const int lsb) {
assert(m_activityAccumulators.size() > fidx);
VerilatedSaifActivityAccumulator& accumulator = *m_activityAccumulators.at(fidx);
void VerilatedSaif::declare(const uint32_t code, uint32_t, const char* name, const char* wirep,
const bool array, const int arraynum, const bool bussed, const int msb,
const int lsb) {
VerilatedSaifActivityAccumulator& accumulator = *m_activityAccumulators.at(0);

const int bits = ((msb > lsb) ? (msb - lsb) : (lsb - msb)) + 1;

Expand Down Expand Up @@ -569,7 +568,9 @@ void VerilatedSaif::declDouble(const uint32_t code, const uint32_t fidx, const c
//=============================================================================
// Get/commit trace buffer

VerilatedSaif::Buffer* VerilatedSaif::getTraceBuffer(uint32_t fidx) { return new Buffer{*this}; }
VerilatedSaif::Buffer* VerilatedSaif::getTraceBuffer(uint32_t fidx) {
return new Buffer{*this, 0};
}

void VerilatedSaif::commitTraceBuffer(VerilatedSaif::Buffer* bufp) { delete bufp; }

Expand Down
3 changes: 0 additions & 3 deletions include/verilated_saif_c.h
Original file line number Diff line number Diff line change
Expand Up @@ -193,9 +193,6 @@ class VerilatedSaifBuffer VL_NOT_FINAL {
uint32_t m_fidx; // Index of target activity accumulator

// CONSTRUCTORS
explicit VerilatedSaifBuffer(VerilatedSaif& owner)
: m_owner{owner}
, m_fidx{0} {}
explicit VerilatedSaifBuffer(VerilatedSaif& owner, uint32_t fidx)
: m_owner{owner}
, m_fidx{fidx} {}
Expand Down
16 changes: 12 additions & 4 deletions include/verilated_trace.h
Original file line number Diff line number Diff line change
Expand Up @@ -175,11 +175,14 @@ class VerilatedTraceConfig final {
const bool m_useParallel; // Use parallel tracing
const bool m_useOffloading; // Offloading trace rendering
const bool m_useFstWriterThread; // Use the separate FST writer thread
const uint32_t m_nSplits; // Number of split trace files to use

VerilatedTraceConfig(bool useParallel, bool useOffloading, bool useFstWriterThread)
VerilatedTraceConfig(bool useParallel, bool useOffloading, bool useFstWriterThread,
uint32_t nSplits)
: m_useParallel{useParallel}
, m_useOffloading{useOffloading}
, m_useFstWriterThread{useFstWriterThread} {}
, m_useFstWriterThread{useFstWriterThread}
, m_nSplits{nSplits} {}
};

//=============================================================================
Expand Down Expand Up @@ -251,10 +254,11 @@ class VerilatedTrace VL_NOT_FINAL {

bool m_offload = false; // Use the offload thread
bool m_parallel = false; // Use parallel tracing
uint32_t m_nSplits = 1; // Number of split tracefiles to use

struct ParallelWorkerData final {
const dumpCb_t m_cb; // The callback
void* const m_userp; // The use pointer to pass to the callback
void* const m_userp; // The user pointer to pass to the callback
Buffer* const m_bufp; // The buffer pointer to pass to the callback
std::atomic<bool> m_ready{false}; // The ready flag
mutable VerilatedMutex m_mutex; // Mutex for suspension until ready
Expand Down Expand Up @@ -374,6 +378,10 @@ class VerilatedTrace VL_NOT_FINAL {

bool offload() const { return m_offload; }
bool parallel() const { return m_parallel; }
bool split() const { return m_nSplits > 1; }
uint32_t nSplits() const { return m_nSplits; }

VerilatedContext* contextp() const { return m_contextp; }

// Return last ' ' separated word. Assumes string does not end in ' '.
static std::string lastWord(const std::string& str) {
Expand Down Expand Up @@ -458,7 +466,7 @@ class VerilatedTraceBuffer VL_NOT_FINAL : public T_Buffer {
uint32_t* const m_sigs_oldvalp; // Previous value store
EData* const m_sigs_enabledp; // Bit vector of enabled codes (nullptr = all on)

explicit VerilatedTraceBuffer(Trace& owner);
explicit VerilatedTraceBuffer(Trace& owner, uint32_t fidx);
~VerilatedTraceBuffer() override = default;

public:
Expand Down
25 changes: 17 additions & 8 deletions include/verilated_trace_imp.h
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
#include "verilated_intrinsics.h"
#include "verilated_trace.h"
#include "verilated_threa F438 ds.h"
#include <algorithm>
#include <list>

#if 0
Expand Down Expand Up @@ -468,26 +469,31 @@ VL_ATTR_NOINLINE void VerilatedTrace<VL_SUB_T, VL_BUF_T>::ParallelWorkerData::wa

template <>
void VerilatedTrace<VL_SUB_T, VL_BUF_T>::runCallbacks(const std::vector<CallbackRecord>& cbVec) {
if (parallel()) {
if (parallel() || split()) {
// If tracing in parallel, dispatch to the thread pool
VlThreadPool* threadPoolp = static_cast<VlThreadPool*>(m_contextp->threadPoolp());
// List of work items for thread (std::list, as ParallelWorkerData is not movable)
std::list<ParallelWorkerData> workerData;
// We use the whole pool + the main thread
const unsigned threads = threadPoolp->numThreads() + 1;
const unsigned threads = [&]() {
const unsigned maxThreads = threadPoolp->numThreads() + 1;
if (split()) return std::min(maxThreads, nSplits());
return maxThreads;
}();
// Main thread executes all jobs with index % threads == 0
std::vector<ParallelWorkerData*> mainThreadWorkerData;
// Enqueue all the jobs
for (const CallbackRecord& cbr : cbVec) {
const unsigned idx = cbr.m_fidx % threads;
// Always get the trace buffer on the main thread
Buffer* const bufp = getTraceBuffer(cbr.m_fidx);
Buffer* const bufp = getTraceBuffer(idx);
// Create new work item
workerData.emplace_back(cbr.m_dumpCb, cbr.m_userp, bufp);
// Grab the new work item
ParallelWorkerData* const itemp = &workerData.back();
// Enqueue task to thread pool, or main thread
if (unsigned rem = cbr.m_fidx % threads) {
threadPoolp->workerp(rem - 1)->addTask(parallelWorkerTask, itemp);
if (idx) {
threadPoolp->workerp(idx - 1)->addTask(parallelWorkerTask, itemp);
} else {
mainThreadWorkerData.push_back(itemp);
}
Expand All @@ -507,6 +513,7 @@ void VerilatedTrace<VL_SUB_T, VL_BUF_T>::runCallbacks(const std::vector<Callback
// Done
return;
}

// Fall back on sequential execution
for (const CallbackRecord& cbr : cbVec) {
Buffer* const traceBufferp = getTraceBuffer(cbr.m_fidx);
Expand Down Expand Up @@ -662,6 +669,8 @@ void VerilatedTrace<VL_SUB_T, VL_BUF_T>::addModel(VerilatedModel* modelp)
m_offload = configp->m_useOffloading;
// If at least one model requests parallel tracing, then use it
m_parallel |= configp->m_useParallel;
// Use as man splits as requried be the largest model
m_nSplits = configp->m_nSplits > m_nSplits ? configp->m_nSplits : m_nSplits;

if (VL_UNCOVERABLE(m_parallel && m_offload)) { // LCOV_EXCL_START
VL_FATAL_MT(__FILE__, __LINE__, "", "Cannot use parallel tracing with offloading");
Expand Down Expand Up @@ -812,8 +821,8 @@ static inline void cvtQDataToStr(char* dstp, QData value) {
// VerilatedTraceBuffer

template <>
VerilatedTraceBuffer<VL_BUF_T>::VerilatedTraceBuffer(Trace& owner)
: VL_BUF_T{owner}
VerilatedTraceBuffer<VL_BUF_T>::VerilatedTraceBuffer(Trace& owner, uint32_t fidx)
: VL_BUF_T{owner, fidx}
, m_sigs_oldvalp{owner.m_sigs_oldvalp}
, m_sigs_enabledp{owner.m_sigs_enabledp} {}

Expand Down Expand Up @@ -898,7 +907,7 @@ void VerilatedTraceBuffer<VL_BUF_T>::fullDouble(uint32_t* oldp, double newval) {

template <>
VerilatedTraceOffloadBuffer<VL_BUF_T>::VerilatedTraceOffloadBuffer(VL_SUB_T& owner)
: VerilatedTraceBuffer<VL_BUF_T>{owner}
: VerilatedTraceBuffer<VL_BUF_T>{owner, 0}
, m_offloadBufferWritep{owner.m_offloadBufferWritep}
, m_offloadBufferEndp{owner.m_offloadBufferEndp} {
if (m_offloadBufferWritep) {
Expand Down
2 changes: 1 addition & 1 deletion include/verilated_vcd_c.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -465,7 +465,7 @@ void VerilatedVcd::declDouble(uint32_t code, uint32_t fidx, const char* name, in
// Get/commit trace buffer

VerilatedVcd::Buffer* VerilatedVcd::getTraceBuffer(uint32_t fidx) {
VerilatedVcd::Buffer* const bufp = new Buffer{*this};
VerilatedVcd::Buffer* const bufp = new Buffer{*this, fidx};
if (parallel()) {
// Note: This is called from VerilatedVcd::dump, which already holds the lock
// If no buffer available, allocate a new one
Expand Down
2 changes: 1 addition & 1 deletion include/verilated_vcd_c.h
Original file line number Diff line number Diff line change
Expand Up @@ -206,7 +206,7 @@ class VerilatedVcdBuffer VL_NOT_FINAL {
void finishLine(uint32_t code, char* writep);

// CONSTRUCTOR
explicit VerilatedVcdBuffer(VerilatedVcd& owner)
explicit VerilatedVcdBuffer(VerilatedVcd& owner, uint32_t fidx)
: m_owner{owner} {}
virtual ~VerilatedVcdBuffer() = default;

Expand Down
4 changes: 3 additions & 1 deletion src/V3EmitCModel.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -521,7 +521,9 @@ class EmitCModel final : public EmitCFunc {
puts("return std::unique_ptr<VerilatedTraceConfig>{new VerilatedTraceConfig{");
puts(v3Global.opt.useTraceParallel() ? "true" : "false");
puts(v3Global.opt.useTraceOffload() ? ", true" : ", false");
puts(v3Global.opt.useFstWriterThread() ? ", true" : ", false");
puts(", false"); //v3Global.opt.useFstWriterThread() ? ", true" : ", false");
const uint32_t nSplits = v3Global.opt.traceFormat().vcd() ? 1 : v3Global.opt.threads();
puts(", " + std::to_string(nSplits));
puts("}};\n");
puts("};\n");
}
Expand Down
2 changes: 1 addition & 1 deletion src/V3Options.h
Original file line number Diff line number Diff line change
Expand Up @@ -612,7 +612,7 @@ class V3Options final {
int traceMaxArray() const { return m_traceMaxArray; }
int traceMaxWidth() const { return m_traceMaxWidth; }
int traceThreads() const { return m_traceThreads; }
bool useTraceOffload() const { return trace() && traceFormat().fst() && traceThreads() > 1; }
bool useTraceOffload() const { return false; }
bool useTraceParallel() const {
return trace() && traceFormat().vcd() && (threads() > 1 || hierChild() > 1);
}
Expand Down
3 changes: 1 addition & 2 deletions src/V3Trace.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -185,8 +185,7 @@ class TraceVisitor final : public VNVisitor {
bool m_finding = false; // Pass one of algorithm?

// Trace parallelism. Only VCD tracing can be parallelized at this time.
const uint32_t m_parallelism
= v3Global.opt.useTraceParallel() ? static_cast<uint32_t>(v3Global.opt.threads()) : 1;
const uint32_t m_parallelism = v3Global.opt.threads();

VDouble0 m_statSetters; // Statistic tracking
VDouble0 m_statSettersSlow; // Statistic tracking
Expand Down
Loading
0