8000 Improve startup file copying of JavaScript files (#14512) · arangodb/arangodb@ceaec2a · GitHub
[go: up one dir, main page]

Skip to content

Commit ceaec2a

Browse files
authored
Improve startup file copying of JavaScript files (#14512)
1 parent 14f612f commit ceaec2a

File tree

7 files changed

+281
-187
lines changed

7 files changed

+281
-187
lines changed

CHANGELOG

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,12 @@
11
devel
22
-----
33

4+
* Make `--javascript.copy-installation` also copy the `node_modules` sub
5+
directory. This is required so we have a full copy of the JavaScript
6+
dependencies and not one that excludes some infrequently changed modules.
7+
In addition, file copying now intentionally excludes .map files as they
8+
are not needed.
9+
410
* Fixed BTS-408: treat positive or negative signed numbers as constants
511
immediately during AQL query parsing.
612
Previously, a value of `-1` was parsed initially as `unary minus(value(1))`,

arangod/V8Server/V8DealerFeature.cpp

Lines changed: 30 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -557,9 +557,9 @@ void V8DealerFeature::copyInstallationFiles() {
557557

558558
_nodeModulesDirectory = _startupDirectory;
559559

560-
const std::string checksumFile =
560+
std::string const checksumFile =
561561
FileUtils::buildFilename(_startupDirectory, StaticStrings::checksumFileJs);
562-
const std::string copyChecksumFile =
562+
std::string const copyChecksumFile =
563563
FileUtils::buildFilename(copyJSPath, StaticStrings::checksumFileJs);
564564

565565
bool overwriteCopy = false;
@@ -569,7 +569,7 @@ void V8DealerFeature::copyInstallationFiles() {
569569
} else {
570570
try {
571571
overwriteCopy =
572-
(FileUtils::slurp(copyChecksumFile) != FileUtils::slurp(checksumFile));
572+
(StringUtils::trim(FileUtils::slurp(copyChecksumFile)) != StringUtils::trim(FileUtils::slurp(checksumFile)));
573573
} catch (basics::Exception const& e) {
574574
LOG_TOPIC("efa47", ERR, Logger::V8) << "Error reading '" << StaticStrings::checksumFileJs
575575
<< "' from disk: " << e.what();
@@ -578,7 +578,7 @@ void V8DealerFeature::copyInstallationFiles() {
578578
}
579579

580580
if (overwriteCopy) {
581-
// basics security checks before removing an existing directory:
581+
// basic security checks before removing an existing directory:
582582
// check if for some reason we will be trying to remove the entire database
583583
// directory...
584584
if (FileUtils::exists(FileUtils::buildFilename(copyJSPath, "ENGINE"))) {
@@ -587,7 +587,7 @@ void V8DealerFeature::copyInstallationFiles() {
587587
FATAL_ERROR_EXIT();
588588
}
589589

590-
LOG_TOPIC("dd1c0", DEBUG, Logger::V8)
590+
LOG_TOPIC("dd1c0", INFO, Logger::V8)
591591
<< "Copying JS installation files from '" << _startupDirectory
592592
<< "' to '" << copyJSPath << "'";
593593
auto res = TRI_ERROR_NO_ERROR;
@@ -606,40 +606,46 @@ void V8DealerFeature::copyInstallationFiles() {
606606
FATAL_ERROR_EXIT();
607607
}
608608

609-
// intentionally do not copy js/node/node_modules...
609+
// intentionally do not copy js/node/node_modules/estlint!
610610
// we avoid copying this directory because it contains 5000+ files at the
611-
// moment, and copying them one by one is darn slow at least on Windows...
611+
// moment, and copying them one by one is slow. In addition, eslint is not
612+
// needed in release builds
612613
std::string const versionAppendix =
613614
std::regex_replace(rest::Version::getServerVersion(),
614615
std::regex("-.*$"), "");
615-
std::string const nodeModulesPath =
616-
FileUtils::buildFilename("js", "node", "node_modules");
617-
std::string const nodeModulesPathVersioned =
618-
basics::FileUtils::buildFilename("js", versionAppendix, "node",
619-
"node_modules");
616+
std::string const eslintPath =
617+
FileUtils::buildFilename("js", "node", "node_modules", "eslint");
620618

621-
std::regex const binRegex("[/\\\\]\\.bin[/\\\\]", std::regex::ECMAScript);
619+
// .bin directories could be harmful, and .map files are large and unnecessary
620+
std::string const binDirectory = std::string(TRI_DIR_SEPARATOR_STR) + ".bin" + TRI_DIR_SEPARATOR_STR;
622621

623-
auto filter = [&nodeModulesPath, &nodeModulesPathVersioned, &binRegex](std::string const& filename) -> bool {
624-
if (std::regex_search(filename, binRegex)) {
622+
size_t copied = 0;
623+
624+
auto filter = [&eslintPath, &binDirectory, &copied](std::string const& filename) -> bool {
625+
if (filename.size() >= 4 && filename.compare(filename.size() - 4, 4, ".map") == 0) {
626+
// filename ends with ".map". filter it out!
627+
return true;
628+
}
629+
if (filename.find(binDirectory) != std::string::npos) {
625630
// don't copy files in .bin
626631
return true;
627632
}
633+
628634
std::string normalized = filename;
629635
FileUtils::normalizePath(normalized);
630-
if ((!nodeModulesPath.empty() &&
631-
normalized.size() >= nodeModulesPath.size() &&
632-
normalized.substr(normalized.size() - nodeModulesPath.size(), nodeModulesPath.size()) == nodeModulesPath) ||
633-
(!nodeModulesPathVersioned.empty() &&
634-
normalized.size() >= nodeModulesPathVersioned.size() &&
635-
normalized.substr(normalized.size() - nodeModulesPathVersioned.size(), nodeModulesPathVersioned.size()) == nodeModulesPathVersioned)) {
636+
if ((normalized.size() >= eslintPath.size() &&
637+
normalized.compare(normalized.size() - eslintPath.size(), eslintPath.size(), eslintPath) == 0)) {
636638
// filter it out!
637639
return true;
638640
}
641+
639642
// let the file/directory pass through
643+
++copied;
640644
return false;
641645
};
642646

647+
double start = TRI_microtime();
648+
643649
std::string error;
644650
if (!FileUtils::copyRecursive(_startupDirectory, copyJSPath, filter, error)) {
645651
LOG_TOPIC("45261", FATAL, Logger::V8) << "Error copying JS installation files to '"
@@ -662,6 +668,9 @@ void V8DealerFeature::copyInstallationFiles() {
662668 10000
<< copyJSPath << "': " << error;
663669
}
664670
}
671+
672+
LOG_TOPIC("38e1e", INFO, Logger::V8)
673+
<< "copying " << copied << " JS installation file(s) took " << Logger::FIXED(TRI_microtime() - start, 6) << "s";
665674
}
666675
_startupDirectory = copyJSPath;
667676
}

lib/Basics/FileUtils.cpp

Lines changed: 115 additions & 77 deletions
Original file line numberDiff line numberDiff line change
@@ -64,8 +64,50 @@
6464
namespace {
6565
std::function<bool(std::string const&)> const passAllFilter =
6666
[](std::string const&) { return false; };
67+
68+
enum class StatResultType {
69+
Error, // in case it cannot be determined
70+
Directory,
71+
SymLink,
72+
File,
73+
Other // potentially file
74+
};
75+
76+
StatResultType statResultType(TRI_stat_t const& stbuf) {
77+
#ifdef _WIN32
78+
if ((stbuf.st_mode & S_IFMT) == S_IFDIR) {
79+
return StatResultType::Directory;
80+
}
81+
#else
82+
if (S_ISDIR(stbuf.st_mode)) {
83+
return StatResultType::Directory;
84+
}
85+
#endif
86+
87+
#ifndef TRI_HAVE_WIN32_SYMBOLIC_LINK
88+
if (S_ISLNK(stbuf.st_mode)) {
89+
return StatResultType::SymLink;
90+
}
91+
#endif
92+
93+
if ((stbuf.st_mode & S_IFMT) == S_IFREG) {
94+
return StatResultType::File;
95+
}
96+
97+
return StatResultType::Other;
6798
}
6899

100+
StatResultType statResultType(std::string const& path) {
101+
TRI_stat_t stbuf;
102+
int res = TRI_STAT(path.c_str(), &stbuf);
103+
if (res != 0) {
104+
return StatResultType::Error;
105+
}
106+
return statResultType(stbuf);
107+
}
108+
109+
} // namespace
110+
69111
namespace arangodb {
70112
namespace basics {
71113
namespace FileUtils {
@@ -401,12 +443,15 @@ bool copyRecursive(std::string const& source, std::string const& target,
401443
bool copyDirectoryRecursive(std::string const& source, std::string const& target,
402444
std::function<TRI_copy_recursive_e(std::string const&)> const& filter,
403445
std::string& error) {
404-
char* fn = nullptr;
405446
bool rc_bool = true;
447+
448+
// these strings will be recycled over and over
449+
std::string dst = target + TRI_DIR_SEPARATOR_STR;
450+
size_t const dstPrefixLength = dst.size();
451+
std::string src = source + TRI_DIR_SEPARATOR_STR;
452+
size_t const srcPrefixLength = src.size();
453+
406454

407-
auto isSubDirectory = [](std::string const& name) -> bool {
408-
return isDirectory(name);
409-
};
410455
#ifdef TRI_HAVE_WIN32_LIST_FILES
411456
struct _wfinddata_t oneItem;
412457
intptr_t handle;
@@ -427,7 +472,7 @@ bool copyDirectoryRecursive(std::string const& source, std::string const& target
427472
rcs.clear();
428473
icu::UnicodeString d((wchar_t*)oneItem.name, static_cast<int32_t>(wcslen(oneItem.name)));
429474
d.toUTF8String<std::string>(rcs);
430-
fn = (char*)rcs.c_str();
475+
char const* fn = (char*)rcs.c_str();
431476
#else
432477
DIR* filedir = opendir(source.c_str());
433478

@@ -445,57 +490,79 @@ bool copyDirectoryRecursive(std::string const& source, std::string const& target
445490
// to be thread-safe in reality, and newer versions of POSIX may require its
446491
// thread-safety formally, and in addition obsolete readdir_r() altogether
447492
while ((oneItem = (readdir(filedir))) != nullptr && rc_bool) {
448-
fn = oneItem->d_name;
493+
char const* fn = oneItem->d_name;
449494
#endif
450495

451496
// Now iterate over the items.
452497
// check its not the pointer to the upper directory:
453498
if (!strcmp(fn, ".") || !strcmp(fn, "..")) {
454499
continue;
455500
}
456-
std::string dst = target + TRI_DIR_SEPARATOR_STR + fn;
457-
std::string src = source + TRI_DIR_SEPARATOR_STR + fn;
458501

459-
switch (filter(src)) {
460-
case TRI_COPY_IGNORE:
461-
break;
502+
// add current filename to prefix
503+
src.resize(srcPrefixLength);
504+
TRI_ASSERT(src.back() == TRI_DIR_SEPARATOR_CHAR);
505+
src.append(fn);
506+
507+
auto filterResult = filter(src);
508+
509+
if (filterResult != TRI_COPY_IGNORE) {
510+
// prepare dst filename
511+
dst.resize(dstPrefixLength);
512+
TRI_ASSERT(dst.back() == TRI_DIR_SEPARATOR_CHAR);
513+
dst.append(fn);
514+
515+
// figure out the type of the directory entry.
516+
StatResultType type = StatResultType::Error;
517+
TRI_stat_t stbuf;
518+
int res = TRI_STAT(src.c_str(), &stbuf);
519+
if (res == 0) {
520+
type = ::statResultType(stbuf);
521+
}
462522

463-
case TRI_COPY_COPY:
464-
// Handle subdirectories:
465-
if (isSubDirectory(src)) {
466-
long systemError;
467-
auto rc = TRI_CreateDirectory(dst.c_str(), systemError, error);
468-
if (rc != TRI_ERROR_NO_ERROR && rc != TRI_ERROR_FILE_EXISTS) {
469-
rc_bool = false;
470-
break;
471-
}
472-
if (!copyDirectoryRecursive(src, dst, filter, error)) {
473-
rc_bool = false;
474-
break;
475-
}
476-
if (!TRI_CopyAttributes(src, dst, error)) {
477-
rc_bool = false;
478-
break;
479-
}
480-
#ifndef _WIN32
481-
} else if (isSymbolicLink(src)) {
482-
if (!TRI_CopySymlink(src, dst, error)) {
483-
rc_bool = false;
484-
}
523+
switch (filterResult) {
524+
case TRI_COPY_IGNORE:
525+
TRI_ASSERT(false);
526+
break;
527+
528+
case TRI_COPY_COPY:
529+
// Handle subdirectories:
530+
if (type == StatResultType::Directory) {
531+
long systemError;
532+
auto rc = TRI_CreateDirectory(dst.c_str(), systemError, error);
533+
if (rc != TRI_ERROR_NO_ERROR && rc != TRI_ERROR_FILE_EXISTS) {
534+
rc_bool = false;
535+
break;
536+
}
537+
if (!copyDirectoryRecursive(src, dst, filter, error)) {
538+
rc_bool = false;
539+
break;
540+
}
541+
if (!TRI_CopyAttributes(src, dst, error)) {
542+
rc_bool = false;
543+
break;
544+
}
545+
} else if (type == StatResultType::SymLink) {
546+
if (!TRI_CopySymlink(src, dst, error)) {
547+
rc_bool = false;
548+
}
549+
} else {
550+
#ifdef _WIN32
551+
rc_bool = TRI_CopyFile(src, dst, error);
552+
#else
553+
// optimized version that reuses the already retrieved stat data
554+
rc_bool = TRI_CopyFile(src, dst, error, &stbuf);
485555
#endif
486-
} else {
487-
if (!TRI_CopyFile(src, dst, error)) {
488-
rc_bool = false;
489556
}
490-
}
491-
break;
557+
break;
492558

493-
case TRI_COPY_LINK:
494-
if (!TRI_CreateHardlink(src, dst, error)) {
495-
rc_bool = false;
496-
} // if
497-
break;
498-
} // switch
559+
case TRI_COPY_LINK:
560+
if (!TRI_CreateHardlink(src, dst, error)) {
561+
rc_bool = false;
562+
} // if
563+
break;
564+
} // switch
565+
}
499566
#ifdef TRI_HAVE_WIN32_LIST_FILES
500567
} while (_wfindnext(handle, &oneItem) != -1 && rc_bool);
501568

@@ -578,48 +645,19 @@ std::vector<std::string> listFiles(std::string const& directory) {
578645
}
579646

580647
bool isDirectory(std::string const& path) {
581-
TRI_stat_t stbuf;
582-
int res = TRI_STAT(path.c_str(), &stbuf);
583-
584-
#ifdef _WIN32
585-
return (res == 0) && ((stbuf.st_mode & S_IFMT) == S_IFDIR);
586-
#else
587-
return (res == 0) && S_ISDIR(stbuf.st_mode);
588-
#endif
648+
return ::statResultType(path) == ::StatResultType::Directory;
589649
}
590650

591651
bool isSymbolicLink(std::string const& path) {
592-
#ifdef TRI_HAVE_WIN32_SYMBOLIC_LINK
593-
594-
// .....................................................................
595-
// TODO: On the NTFS file system, there are the following file links:
596-
// hard links -
597-
// junctions -
598-
// symbolic links -
599-
// .....................................................................
600-
return false;
601-
602-
#else
603-
604-
struct stat stbuf;
605-
int res = TRI_STAT(path.c_str(), &stbuf);
606-
607-
return (res == 0) && S_ISLNK(stbuf.st_mode);
608-
609-
#endif
652+
return ::statResultType(path) == ::StatResultType::SymLink;
610653
}
611654

612655
bool isRegularFile(std::string const& path) {
613-
TRI_stat_t stbuf;
614-
int res = TRI_STAT(path.c_str(), &stbuf);
615-
return (res == 0) && ((stbuf.st_mode & S_IFMT) == S_IFREG);
656+
return ::statResultType(path) == ::StatResultType::File;
616657
}
617658

618659
bool exists(std::string const& path) {
619-
TRI_stat_t stbuf;
620-
int res = TRI_STAT(path.c_str(), &stbuf);
621-
622-
return res == 0;
660+
return ::statResultType(path) != ::StatResultType::Error;
623661
}
624662

625663
off_t size(std::string const& path) {

0 commit comments

Comments
 (0)
0