8000 Roll forward of https://github.com/bazelbuild/bazel/commit/a58fe3fa6b… · coderabbit-test/bazel@1744d45 · GitHub
[go: up one dir, main page]

Skip to content

Commit 1744d45

Browse files
fmeumcopybara-github
authored andcommitted
Roll forward of bazelbuild@a58fe3f: Fix most Unicode encoding bugs.
NEW: Relative to the original CL, the only changes are in latin1_jni_path.cc and NativePosixFiles.java. In latin1_jni_path.cc, the former implementation of GetStringLatin1Chars with a fallback for strings with a UTF-16 coder is restored, with a BugReport sent whenever any such string is detected. In NativePosixFiles.java, a private method is added to be called from JNI to send the BugReport. *** Original change description *** Automated rollback of commit a58fe3f. *** Reason for rollback *** Causing crashes for internal software that uses Bazel's VFS stuff. *** Original change description *** Fix most Unicode encoding bugs Bazel aims to support arbitrary file system path encodings (even raw byte sequences) by attempting to force the JVM to use a Latin-1 locale for OS interactions. As a result, Bazel internally encodes Strings as raw byte arrays with a Latin-1 coder and no encoding information. Whenever it interacts with encoding-aware APIs, this may require a reencoding of the String contents, depending on the OS and availability of a Latin-1 locale. *** PiperOrigin-RevId: 696524066 Change-Id: Ifdddacc08c1a81ad719b1aeac2a93882cbafbcd2
1 parent 93b6265 commit 1744d45

File tree

98 files changed

+1415
-801
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

98 files changed

+1415
-801
lines changed

src/main/cpp/BUILD

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -143,7 +143,12 @@ cc_binary(
143143

144144
cc_library(
145145
name = "option_processor",
146-
srcs = ["option_processor.cc"],
146+
srcs = [
147+
"option_processor.cc",
148+
] + select({
149+
"//src/conditions:windows": ["option_processor_windows.cc"],
150+
"//conditions:default": ["option_processor_unix.cc"],
151+
}),
147152
hdrs = [
148153
"option_processor.h",
149154
"option_processor-internal.h",
@@ -179,6 +184,7 @@ cc_library(
179184
"//src/main/cpp/util",
180185
"//src/main/cpp/util:blaze_exit_code",
181186
"//src/main/cpp/util:logging",
187+
"@abseil-cpp//absl/strings",
182188
],
183189
)
184190

src/main/cpp/blaze.cc

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -372,7 +372,9 @@ static vector<string> GetServerExeArgs(const blaze_util::Path &jvm_path,
372372
}
373373
result.push_back(java_library_path.str());
374374

375-
// Force use of latin1 for file names.
375+
// TODO: Investigate whether this still has any effect. File name encoding is
376+
// governed by sun.jnu.encoding in JDKs with JEP 400, which can't be
377+
// influenced by setting a property.
376378
result.push_back("-Dfile.encoding=ISO-8859-1");
377379
// Force into the root locale to ensure consistent behavior of string
378380
// operations across machines (e.g. in the tr_TR locale, capital ASCII 'I'

src/main/cpp/blaze_util_windows.cc

Lines changed: 11 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -12,9 +12,7 @@
1212
// See the License for the specific language governing permissions and
1313
// limitations under the License.
1414

15-
#ifndef WIN32_LEAN_AND_MEAN
1615
#define WIN32_LEAN_AND_MEAN
17-
#endif
1816
#include <windows.h>
1917

2018
#include <fcntl.h>
@@ -937,14 +935,15 @@ void CreateSecureOutputRoot(const blaze_util::Path& path) {
937935
}
938936

939937
string GetEnv(const string& name) {
940-
DWORD size = ::GetEnvironmentVariableA(name.c_str(), nullptr, 0);
938+
std::wstring wname = blaze_util::CstringToWstring(name);
939+
DWORD size = ::GetEnvironmentVariableW(wname.c_str(), nullptr, 0);
941940
if (size == 0) {
942941
return string(); // unset or empty envvar
943942
}
944943

945-
unique_ptr<char[]> value(new char[size]);
946-
::GetEnvironmentVariableA(name.c_str(), value.get(), size);
947-
return string(value.get());
944+
unique_ptr<WCHAR[]> value(new WCHAR[size]);
945+
::GetEnvironmentVariableW(wname.c_str(), value.get(), size);
946+
return blaze_util::WstringToCstring(value.get());
948947
}
949948

950949
string GetPathEnv(const string& name) {
@@ -970,11 +969,14 @@ bool ExistsEnv(const string& name) {
970969
}
971970

972971
void SetEnv(const string& name, const string& value) {
973-
// _putenv_s both calls ::SetEnvionmentVariableA and updates environ(5).
974-
_putenv_s(name.c_str(), value.c_str());
972+
::SetEnvironmentVariableW(blaze_util::CstringToWstring(name).c_str(),
973+
blaze_util::CstringToWstring(value).c_str());
975974
}
976975

977-
void UnsetEnv(const string& name) { SetEnv(name, ""); }
976+
void UnsetEnv(const string& name) {
977+
::SetEnvironmentVariableW(blaze_util::CstringToWstring(name).c_str(),
978+
nullptr);
979+
}
978980

979981
bool WarnIfStartedFromDesktop() {
980982
// GetConsoleProcessList returns:

src/main/cpp/main.cc

Lines changed: 22 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,9 +19,10 @@
1919
#include "src/main/cpp/blaze_util_platform.h"
2020
#include "src/main/cpp/option_processor.h"
2121
#include "src/main/cpp/startup_options.h"
22+
#include "src/main/cpp/util/strings.h"
2223
#include "src/main/cpp/workspace_layout.h"
2324

24-
int main(int argc, char **argv) {
25+
int main_impl(int argc, char **argv) {
2526
uint64_t start_time = blaze::GetMillisecondsMonotonic();
2627
std::unique_ptr<blaze::WorkspaceLayout> workspace_layout(
2728
new blaze::WorkspaceLayout());
@@ -32,3 +33,23 @@ int main(int argc, char **argv) {
3233
std::move(startup_options)),
3334
start_time);
3435
}
36+
37+
#ifdef _WIN32
38+
// Define wmain to support Unicode command line arguments on Windows
39+
// regardless of the current code page.
40+
int wmain(int argc, wchar_t **argv) {
41+
std::vector<std::string> args;
42+
for (int i = 0; i < argc; ++i) {
43+
args.push_back(blaze_util::WstringToCstring(argv[i]));
44+
}
45+
std::vector<char *> c_args;
46+
for (const std::string &arg : args) {
47+
c_args.push_back(const_cast<char *>(arg.c_str()));
48+
}
49+
c_args.push_back(nullptr);
50+
// Account for the null terminator.
51+
return main_impl(c_args.size() - 1, c_args.data());
52+
}
53+
#else
54+
int main(int argc, char **argv) { return main_impl(argc, argv); }
55+
#endif

src/main/cpp/option_processor-internal.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,8 @@ std::string FindRcAlongsideBinary(const std::string& cwd,
5858

5959
blaze_exit_code::ExitCode ParseErrorToExitCode(RcFile::ParseError parse_error);
6060

61+
std::vector<std::string> GetProcessedEnv();
62+
6163
} // namespace internal
6264
} // namespace blaze
6365

src/main/cpp/option_processor.cc

Lines changed: 7 additions & 67 deletions
< D7AE /tr>
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,11 @@
3939
extern char **environ;
4040
#endif
4141

42+
#ifdef _WIN32
43+
#define WIN32_LEAN_AND_MEAN
44+
#include <windows.h>
45+
#endif
46+
4247
namespace blaze {
4348

4449
using std::map;
@@ -48,7 +53,6 @@ using std::vector;
4853

4954
constexpr char WorkspaceLayout::WorkspacePrefix[];
5055
static constexpr const char* kRcBasename = ".bazelrc";
51-
static std::vector<std::string> GetProcessedEnv();
5256

5357
OptionProcessor::OptionProcessor(
5458
const WorkspaceLayout* workspace_layout,
@@ -512,8 +516,8 @@ blaze_exit_code::ExitCode OptionProcessor::ParseOptions(
512516
return parse_startup_options_exit_code;
513517
}
514518

515-
blazerc_and_env_command_args_ =
516-
GetBlazercAndEnvCommandArgs(cwd, rc_file_ptrs, GetProcessedEnv());
519+
blazerc_and_env_command_args_ = GetBlazercAndEnvCommandArgs(
520+
cwd, rc_file_ptrs, blaze::internal::GetProcessedEnv());
517521
return blaze_exit_code::SUCCESS;
518522
}
519523

@@ -583,70 +587,6 @@ blaze_exit_code::ExitCode OptionProcessor::ParseStartupOptions(
583587
return startup_options_->ProcessArgs(rcstartup_flags, error);
584588
}
585589

586-
static bool IsValidEnvName(const char* p) {
587-
#if defined(_WIN32) || defined(__CYGWIN__)
588-
for (; *p && *p != '='; ++p) {
589-
if (!((*p >= 'a' && *p <= 'z') || (*p >= 'A' && *p <= 'Z') ||
590-
(*p >= '0' && *p <= '9') || *p == '_' || *p == '(' || *p == ')')) {
591-
return false;
592-
}
593-
}
594-
#endif
595-
return true;
596-
}
597-
598-
#if defined(_WIN32)
599-
static void PreprocessEnvString(string* env_str) {
600-
static constexpr const char* vars_to_uppercase[] = {"PATH", "SYSTEMROOT",
601-
"SYSTEMDRIVE",
602-
"TEMP", "TEMPDIR", "TMP"};
603-
604-
int pos = env_str->find_first_of('=');
605-
if (pos == string::npos) return;
606-
607-
string name = env_str->substr(0, pos);
608-
// We do not care about locale. All variable names are ASCII.
609-
std::transform(name.begin(), name.end(), name.begin(), ::toupper);
610-
if (std::find(std::begin(vars_to_uppercase), std::end(vars_to_uppercase),
611-
name) != std::end(vars_to_uppercase)) {
612-
env_str->assign(name + "=" + env_str->substr(pos + 1));
613-
}
614-
}
615-
616-
#elif defined(__CYGWIN__) // not defined(_WIN32)
617-
618-
static void PreprocessEnvString(string* env_str) {
619-
int pos = env_str->find_first_of('=');
620-
if (pos == string::npos) return;
621-
string name = env_str->substr(0, pos);
622-
if (name == "PATH") {
623-
env_str->assign("PATH=" + env_str->substr(pos + 1));
624-
} else if (name == "TMP") {
625-
// A valid Windows path "c:/foo" is also a valid Unix path list of
626-
// ["c", "/foo"] so must use ConvertPath here. See GitHub issue #1684.
627-
env_str->assign("TMP=" + blaze_util::ConvertPath(env_str->substr(pos + 1)));
628-
}
629-
}
630-
631-
#else // Non-Windows platforms.
632-
633-
static void PreprocessEnvString(const string* env_str) {
634-
// do nothing.
635-
}
636-
#endif // defined(_WIN32)
637-
638-
static std::vector<std::string> GetProcessedEnv() {
639-
std::vector<std::string> processed_env;
640-
for (char** env = environ; *env != nullptr; env++) {
641-
string env_str(*env);
642-
if (IsValidEnvName(*env)) {
643-
PreprocessEnvString(&env_str);
644-
processed_env.push_back(std::move(env_str));
645-
}
646-
}
647-
return processed_env;
648-
}
649-
650590
// IMPORTANT: The options added here do not come from the user. In order for
651591
// their source to be correctly tracked, the options must either be passed
652592
// as --default_override=0, 0 being "client", or must be listed in

src/main/cpp/option_processor_unix.cc

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
// Copyright 2024 The Bazel Authors. All rights reserved.
2+
//
3+
// Licensed under the Apache License, Version 2.0 (the "License");
4+
// you may not use this file except in compliance with the License.
5+
// You may obtain a copy of the License at
6+
//
7+
// http://www.apache.org/licenses/LICENSE-2.0
8+
//
9+
// Unless required by applicable law or agreed to in writing, software
10+
// distributed under the License is distributed on an "AS IS" BASIS,
11+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
// See the License for the specific language governing permissions and
13+
// limitations under the License.
14+
15+
#include <string>
16+
#include <vector>
17+
18+
#include "src/main/cpp/option_processor-internal.h"
19+
20+
// On OSX, there apparently is no header that defines this.
21+
#ifndef environ
22+
extern char** environ;
23+
#endif
24+
25+
namespace blaze::internal {
26+
27+
std::vector<std::string> GetProcessedEnv() {
28+
std::vector<std::string> processed_env;
29+
for (char** env = environ; *env != nullptr; env++) {
30+
processed_env.emplace_back(*env);
31+
}
32+
return processed_env;
33+
}
34+
35+
} // namespace blaze::internal
Lines changed: 94 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,94 @@
1+
// Copyright 2024 The Bazel Authors. All rights reserved.
2+
//
3+
// Licensed under the Apache License, Version 2.0 (the "License");
4+
// you may not use this file except in compliance with the License.
5+
// You may obtain a copy of the License at
6+
//
7+
// http://www.apache.org/licenses/LICENSE-2.0
8+
//
9+
// Unless required by applicable law or agreed to in writing, software
10+
// distributed under the License is distributed on an "AS IS" BASIS,
11+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
// See the License for the specific language governing permissions and
13+
// limitations under the License.
14+
15+
#define WIN32_LEAN_AND_MEAN
16+
#include <windows.h>
17+
18+
#include <algorithm>
19+
#include <string>
20+
#include <string_view>
21+
22+
#include "absl/strings/ascii.h"
23+
#include "src/main/cpp/option_processor-internal.h"
24+
#include "src/main/cpp/util/strings.h"
25+
26+
namespace blaze::internal {
27+
28+
#if defined(__CYGWIN__)
29+
30+
static void PreprocessEnvString(std::string* env_str) {
31+
int pos = env_str->find_first_of('=');
32+
if (pos == string::npos) {
33+
return;
34+
}
35+
std::string name = env_str->substr(0, pos);
36+
if (name == "PATH") {
37+
env_str->assign("PATH=" + env_str->substr(pos + 1));
38+
} else if (name == "TMP") {
39+
// A valid Windows path "c:/foo" is also a valid Unix path list of
40+
// ["c", "/foo"] so must use ConvertPath here. See GitHub issue #1684.
41+
env_str->assign("TMP=" + blaze_util::ConvertPath(env_str->substr(pos + 1)));
42+
}
43+
}
44+
45+
#else // not defined(__CYGWIN__)
46+
47+
static void PreprocessEnvString(std::string* env_str) {
48+
static constexpr const char* vars_to_uppercase[] = {
49+
"PATH", "SYSTEMROOT", "SYSTEMDRIVE", "TEMP", "TEMPDIR", "TMP"};
50+
51+
std::size_t pos = env_str->find_first_of('=');
52+
if (pos == std::string::npos) {
53+
return;
54+
}
55+
56+
std::string name = absl::AsciiStrToUpper(env_str->substr(0, pos));
57+
if (std::find(std::begin(vars_to_uppercase), std::end(vars_to_uppercase),
58+
name) != std::end(vars_to_uppercase)) {
59+
env_str->assign(name + "=" + env_str->substr(pos + 1));
60+
}
61+
}
62+
63+
#endif // defined(__CYGWIN__)
64+
65+
static bool IsValidEnvName(std::string_view s) {
66+
std::string_view name = s.substr(0, s.find('='));
67+
return std::all_of(name.begin(), name.end(), [](char c) {
68+
return (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') ||
69+
(c >= '0' && c <= '9') || c == '_' || c == '(' || c == ')';
70+
});
71+
}
72+
73+
// Use GetEnvironmentStringsW to get the environment variables to support
74+
// Unicode regardless of the current code page.
75+
std::vector<std::string> GetProcessedEnv() {
76+
std::vector<std::string> processed_env;
77+
wchar_t* env = GetEnvironmentStringsW();
78+
if (env == nullptr) {
79+
return processed_env;
80+
}
81+
82+
for (wchar_t* p = env; *p != L'\0'; p += wcslen(p) + 1) {
83+
std::string env_str = blaze_util::WstringToCstring(p);
84+
if (IsValidEnvName(env_str)) {
85+
PreprocessEnvString(&env_str);
86+
processed_env.push_back(std::move(env_str));
87+
}
88+
}
89+
90+
FreeEnvironmentStringsW(env);
91+
return processed_env;
92+
}
93+
94+
} // namespace blaze::internal

src/main/java/com/google/devtools/build/lib/BUILD

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -477,6 +477,7 @@ java_library(
477477
"//src/main/java/com/google/devtools/build/lib/util:process",
478478
"//src/main/java/com/google/devtools/build/lib/util:shallow_object_size_computer",
479479
"//src/main/java/com/google/devtools/build/lib/util:string",
480+
"//src/main/java/com/google/devtools/build/lib/util:string_encoding",
480481
"//src/main/java/com/google/devtools/build/lib/util/io",
481482
"//src/main/java/com/google/devtools/build/lib/util/io:io-proto",
482483
"//src/main/java/com/google/devtools/build/lib/util/io:out-err",

src/main/java/com/google/devtools/build/lib/actions/BUILD

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -190,7 +190,6 @@ java_library(
190190
"//src/main/java/com/google/devtools/build/lib/util:filetype",
191191
"//src/main/java/com/google/devtools/build/lib/util:os",
192192
"//src/main/java/com/google/devtools/build/lib/util:shell_escaper",
193-
"//src/main/java/com/google/devtools/build/lib/util:string",
194193
"//src/main/java/com/google/devtools/build/lib/util:var_int",
195194
"//src/main/java/com/google/devtools/build/lib/util/io",
196195
"//src/main/java/com/google/devtools/build/lib/vfs",

0 commit comments

Comments
 (0)
0