8000 src: consolidate C++ ReadFileSync/WriteFileSync utilities · nodejs/node@28905b9 · GitHub
[go: up one dir, main page]

Skip to content

Commit 28905b9

Browse files
joyeecheungaduh95
authored andcommitted
src: consolidate C++ ReadFileSync/WriteFileSync utilities
This patch moves `ReadFileSync` and `WriteFileSync` from `src/util.cc` to `src/node_file_utils.cc`, consolidates the implementation to reuse code, and adds a few more enhancements: For `ReadFileSync`: - Use fstat-based pre-allocation to minimize buffer resizing and repeated reads for bigger files. - Handle various potential overflows in size conversions. - Handle fallback for 0-byte special files. For `WriteFileSync`: - Handle potential partial writes for big enough files and support non-seekable files (with -1 as offset). - Handle 0-byte writes correctly. In both cases, this now avoids hard aborts and return error code for the caller to handle as much as possible, except `std::vector<char> ReadFileSync(FILE* fp)` which is part of the embedder API. This patch uses the new `ReadFileSync` to address a TODO in node_sea.bin.cc. PR-URL: #61662 Reviewed-By: Rafael Gonzaga <rafael.nunu@hotmail.com> Reviewed-By: Stephen Belanger <admin@stephenbelanger.com> Reviewed-By: Anna Henningsen <anna@addaleax.net> Reviewed-By: James M Snell <jasnell@gmail.com>
1 parent 41ec451 commit 28905b9

File tree

7 files changed

+291
-106
lines changed

7 files changed

+291
-106
lines changed

node.gyp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -125,6 +125,7 @@
125125
'src/node_errors.cc',
126126
'src/node_external_reference.cc',
127127
'src/node_file.cc',
128+
'src/node_file_utils.cc',
128129
'src/node_http_parser.cc',
129130
'src/node_http2.cc',
130131
'src/node_i18n.cc',

src/node_file_utils.cc

Lines changed: 248 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,248 @@
1+
#include "node_file_utils.h"
2+
3+
#include <climits>
4+
#include <cstdio>
5+
#include <cstring>
6+
#include <functional>
7+
#include <string>
8+
#include <vector>
9+
10+
#include "util-inl.h"
11+
12+
#ifdef _WIN32
13+
#include <io.h> // _S_IREAD _S_IWRITE
14+
#ifndef S_IRUSR
15+
#define S_IRUSR _S_IREAD
16+
#endif // S_IRUSR
17+
#ifndef S_IWUSR
18+
#define S_IWUSR _S_IWRITE
19+
#endif // S_IWUSR
20+
#endif
21+
22+
namespace node {
23+
24+
int WriteFileSync(const char* path, uv_buf_t buf) {
25+
return WriteFileSync(path, &buf, 1);
26+
}
27+
28+
constexpr int64_t kCurrentFileOffset = -1;
29+
int WriteFileSync(const char* path, uv_buf_t* bufs, size_t buf_count) {
30+
uv_fs_t req;
31+
int fd = uv_fs_open(nullptr,
32+
&req,
33+
path,
34+
O_WRONLY | O_CREAT | O_TRUNC,
35+
S_IWUSR | S_IRUSR,
36+
nullptr);
37+
uv_fs_req_cleanup(&req);
38+
if (fd < 0) {
39+
return fd;
40+
}
41+
42+
// Handle potential partial writes by looping until all data is written.
43+
std::vector<uv_buf_t> iovs(bufs, bufs + buf_count);
44+
size_t idx = 0;
45+
46+
while (idx < iovs.size()) {
47+
// Skip empty buffers.
48+
if (iovs[idx].len == 0) {
49+
idx++;
50+
continue;
51+
}
52+
53+
uv_fs_write(nullptr,
54+
&req,
55+
fd,
56+
iovs.data() + idx,
57+
iovs.size() - idx,
58+
kCurrentFileOffset,
59+
nullptr);
60+
if (req.result <= 0) { // Error during write.
61+
// UV_EIO should not happen unless the file system is full.
62+
int err = req.result < 0 ? req.result : UV_EIO;
63+
uv_fs_req_cleanup(&req);
64+
uv_fs_close(nullptr, &req, fd, nullptr);
65+
uv_fs_req_cleanup(&req);
66+
return err;
67+
}
68+
size_t written = req.result;
69+
uv_fs_req_cleanup(&req);
70+
71+
// Consume written bytes from buffers.
72+
while (written > 0 && idx < iovs.size()) {
73+
if (written >= iovs[idx].len) {
74+
written -= iovs[idx].len;
75+
idx++;
76+
} else {
77+
iovs[idx].base += written;
78+
iovs[idx].len -= written;
79+
written = 0;
80+
}
81+
}
82+
}
83+
84+
int err = uv_fs_close(nullptr, &req, fd, nullptr);
85+
uv_fs_req_cleanup(&req);
86+
return err;
87+
}
88+
89+
int WriteFileSync(v8::Isolate* isolate,
90+
const char* path,
91+
v8::Local<v8::String> string) {
92+
node::Utf8Value utf8(isolate, string);
93+
uv_buf_t buf = uv_buf_init(utf8.out(), utf8.length());
94+
return WriteFileSync(path, buf);
95+
}
96+
97+
// Default size used if fstat reports a file size of 0 for special files.
98+
static constexpr size_t kDefaultReadSize = 8192;
99+
100+
// The resize_buffer callback is called with the file size after fstat, and must
101+
// return a pointer to a buffer of at least that size. If the file grows during
102+
// reading, resize_buffer may be called again with a larger size; the callback
103+
// must preserve existing content and release old storage if needed.
104+
// After reading completes, resize_buffer may be called with the actual bytes
105+
// read.
106+
template <typename ResizeBuffer>
107+
int ReadFileSyncImpl(const char* path, ResizeBuffer resize_buffer) {
108+
uv_fs_t req;
109+
110+
uv_file file = uv_fs_open(nullptr, &req, path, O_RDONLY, 0, nullptr);
111+
if (req.result < 0) {
112+
int err = req.result;
113+
uv_fs_req_cleanup(&req);
114+
return err;
115+
}
116+
uv_fs_req_cleanup(&req);
117+
118+
// Get the file size first, which should be cheap enough on an already opened
119+
// files, and saves us from repeated reallocations/reads.
120+
int err = uv_fs_fstat(nullptr, &req, file, nullptr);
121+
if (err < 0) {
122+
uv_fs_req_cleanup(&req);
123+
uv_fs_close(nullptr, &req, file, nullptr);
124+
uv_fs_req_cleanup(&req);
125+
return err;
126+
}
127+
// SIZE_MAX is ~18 exabytes on 64-bit and ~4 GB on 32-bit systems.
128+
// In both cases, the process is unlikely to have that much memory
129+
// to hold the file content, so we just error with UV_EFBIG.
130+
if (req.statbuf.st_size > static_cast<uint64_t>(SIZE_MAX)) {
131+
uv_fs_req_cleanup(&req);
132+
uv_fs_close(nullptr, &req, file, nullptr);
133+
uv_fs_req_cleanup(&req);
134+
return UV_EFBIG;
135+
}
136+
size_t size = static_cast<size_t>(req.statbuf.st_size);
137+
uv_fs_req_cleanup(&req);
138+
139+
// If the file is reported as 0 bytes for special files, use a default
140+
// size to start reading.
141+
if (size == 0) {
142+
size = kDefaultReadSize;
143+
}
144+
145+
char* buffer = resize_buffer(size);
146+
if (buffer == nullptr) {
147+
uv_fs_close(nullptr, &req, file, nullptr);
148+
uv_fs_req_cleanup(&req);
149+
return UV_ENOMEM;
150+
}
151+
size_t total_read = 0;
152+
while (true) {
153+
size_t remaining = size - total_read;
154+
// On Windows, uv_buf_t uses ULONG which may truncate the
155+
// length for large buffers. Limit the individual read request size to
156+
// INT_MAX to be safe. The loop will issue multiple reads for larger files.
157+
if (remaining > INT_MAX) {
158+
remaining = INT_MAX;
159+
}
160+
uv_buf_t buf = uv_buf_init(buffer + total_read, remaining);
161+
uv_fs_read(
162+
nullptr, &req, file, &buf, 1 /* nbufs */, kCurrentFileOffset, nullptr);
163+
ssize_t bytes_read = req.result;
164+
uv_fs_req_cleanup(&req);
165+
if (bytes_read < 0) {
166+
uv_fs_close(nullptr, &req, file, nullptr);
167+
uv_fs_req_cleanup(&req);
168+
return bytes_read;
169+
}
170+
if (bytes_read == 0) {
171+
// EOF, stop reading.
172+
break;
173+
}
174+
total_read += bytes_read;
175+
if (total_read == size) {
176+
// Buffer is full, the file may have grown during reading.
177+
// Try increasing the buffer size and reading more.
178+
if (size == SIZE_MAX) {
179+
uv_fs_close(nullptr, &req, file, nullptr);
180+
uv_fs_req_cleanup(&req);
181+
return UV_EFBIG;
182+
}
183+
if (size > SIZE_MAX / 2) {
184+
size = SIZE_MAX;
185+
} else {
186+
size *= 2;
187+
}
188+
buffer = resize_buffer(size);
189+
if (buffer == nullptr) {
190+
uv_fs_close(nullptr, &req, file, nullptr);
191+
uv_fs_req_cleanup(&req);
192+
return UV_ENOMEM;
193+
}
194+
}
195+
}
196+
197+
int close_err = uv_fs_close(nullptr, &req, file, nullptr);
198+
uv_fs_req_cleanup(&req);
199+
if (close_err < 0) {
200+
return close_err;
201+
}
202+
203+
// Truncate the actual size read if necessary.
204+
if (total_read != size) {
205+
buffer = resize_buffer(total_read);
206+
if (buffer == nullptr && total_read != 0) {
207+
return UV_ENOMEM;
208+
}
209+
}
210+
return 0;
211+
}
212+
213+
int ReadFileSync(const char* path, std::string* result) {
214+
return ReadFileSyncImpl(path, [result](size_t size) {
215+
result->resize(size);
216+
return result->data();
217+
});
218+
}
219+
220+
// Legacy interface. TODO(joyeecheung): update the callers to pass path first,
221+
// output parameters second.
222+
int ReadFileSync(std::string* result, const char* path) {
223+
return ReadFileSync(path, result);
224+
}
225+
226+
int ReadFileSync(const char* path, std::vector<uint8_t>* result) {
227+
return ReadFileSyncImpl(path, [result](size_t size) {
228+
result->resize(size);
229+
return reinterpret_cast<char*>(result->data());
230+
});
231+
}
232+
233+
std::vector<char> ReadFileSync(FILE* fp) {
234+
CHECK_EQ(ftell(fp), 0);
235+
int err = fseek(fp, 0, SEEK_END);
236+
CHECK_EQ(err, 0);
237+
size_t size = ftell(fp);
238+
CHECK_NE(size, static_cast<size_t>(-1L));
239+
err = fseek(fp, 0, SEEK_SET);
240+
CHECK_EQ(err, 0);
241+
242+
std::vector<char> contents(size);
243+
size_t num_read = fread(contents.data(), size, 1, fp);
244+
CHECK_EQ(num_read, 1);
245+
return contents;
246+
}
247+
248+
} // namespace node

src/node_file_utils.h

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
#ifndef SRC_NODE_FILE_UTILS_H_
2+
#define SRC_NODE_FILE_UTILS_H_
3+
4+
#if defined(NODE_WANT_INTERNALS) && NODE_WANT_INTERNALS
5+
6+
#include <cstdio>
7+
#include <functional>
8+
#include <string>
9+
#include <vector>
10+
11+
#include "uv.h"
12+
#include "v8.h"
13+
14+
namespace node {
15+
16+
// Synchronously writes to a file. If the file exists, it is replaced
17+
// (truncated).
18+
int WriteFileSync(const char* path, uv_buf_t buf);
19+
int WriteFileSync(const char* path, uv_buf_t* bufs, size_t buf_count);
20+
int WriteFileSync(v8::Isolate* isolate,
21+
const char* path,
22+
v8::Local<v8::String> string);
23+
24+
// Synchronously reads the entire contents of a file.
25+
int ReadFileSync(const char* path, std::string* result);
26+
int ReadFileSync(const char* path, std::vector<uint8_t>* result);
27+
28+
// Legacy interface. TODO(joyeecheung): update the callers to pass path first,
29+
// output parameters second.
30+
int ReadFileSync(std::string* result, const char* path);
31+
32+
// This is currently only used by embedder APIs that take a FILE*.
33+
std::vector<char> ReadFileSync(FILE* fp);
34+
35+
} // namespace node
36+
37+
#endif // defined(NODE_WANT_INTERNALS) && NODE_WANT_INTERNALS
38+
39+
#endif // SRC_NODE_FILE_UTILS_H_

src/node_internals.h

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727
#include "env.h"
2828
#include "node.h"
2929
#include "node_binding.h"
30+
#include "node_file_utils.h"
3031
#include "node_mutex.h"
3132
#include "tracing/trace_event.h"
3233
#include "util.h"
@@ -411,11 +412,6 @@ typedef struct tm TIME_TYPE;
411412
#endif
412413

413414
double GetCurrentTimeInMicroseconds();
414-
int WriteFileSync(const char* path, uv_buf_t* bufs, size_t buf_count);
415-
int WriteFileSync(const char* path, uv_buf_t buf);
416-
int WriteFileSync(v8::Isolate* isolate,
417-
const char* path,
418-
v8::Local<v8::String> string);
419415

420416
class DiagnosticFilename {
421417
public:

src/node_sea_bin.cc

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -405,8 +405,8 @@ ExitCode BuildSingleExecutable(const std::string& sea_config_path,
405405
int src_mode = static_cast<int>(req.statbuf.st_mode);
406406
uv_fs_req_cleanup(&req);
407407

408-
std::string exe;
409-
r = ReadFileSync(&exe, config.executable_path.c_str());
408+
std::vector<uint8_t> exe_data;
409+
r = ReadFileSync(config.executable_path.c_str(), &exe_data);
410410
if (r != 0) {
411411
FPrintF(stderr,
412412
"Error: Couldn't read executable %s: %s\n",
@@ -415,9 +415,6 @@ ExitCode BuildSingleExecutable(const std::string& sea_config_path,
415415
return ExitCode::kGenericUserError;
416416
}
417417

418-
// TODO(joyeecheung): add a variant of ReadFileSync that reads into
419-
// vector<uint8_t> directly and avoid this copy.
420-
std::vector<uint8_t> exe_data(exe.begin(), exe.end());
421418
std::vector<char> sea_blob;
422419
ExitCode code =
423420
GenerateSingleExecutableBlob(&sea_blob, config, args, exec_args);

0 commit comments

Comments
 (0)
0