8000 Table format has been broken when output column contains east asian fullwidth characters · Issue #6290 · PowerShell/PowerShell · GitHub
[go: up one dir, main page]

Skip to content
Table format has been broken when output column contains east asian fullwidth characters #6290
@stknohg

Description

@stknohg

Steps to reproduce

# make directories contains east asian fullwidth characters.
# in this example, I use Japanese.
mkdir .\Test
cd .\Test
mkdir Hello
mkdir HelloWorld
mkdir Helloワールド
mkdir ハロー
mkdir ハローワールド

# invoke Get-ChildItem (output table format by default)
Get-ChildItem

Expected behavior

Table layout is not broken.

e.g. on Windows PowerShell 5.1

error02

Actual behavior

Table layout has been broken.

error01

In this case, the last column(Name) is broken.
But the column other than the last column will also be broken.

error03

Environment data

> $PSVersionTable
Name                           Value
----                           -----
PSVersion                      6.0.1
PSEdition                      Core
GitCommitId                    v6.0.1
OS                             Microsoft Windows 10.0.16299
Platform                       Win32NT
PSCompatibleVersions           {1.0, 2.0, 3.0, 4.0...}
PSRemotingProtocolVersion      2.3
SerializationVersion           1.1.0.1
WSManStackVersion              3.0

This problem occurs on all platforms.

Cause

The root cause is ConsoleControl.LengthInBufferCells() method returns invalid value when str parameter contains east asian fullwidth characters.

internal static int LengthInBufferCells(string str, int offset, bool checkEscapeSequences)
{
Dbg.Assert(offset >= 0, "offset >= 0");
Dbg.Assert(string.IsNullOrEmpty(str) || (offset < str.Length), "offset < str.Length");
var escapeSequenceAdjustment = 0;
if (checkEscapeSequences)
{
int i = 0;
while (i < offset)
{
ControlSequenceLength(str, ref i);
}
// If offset != i, we're in the middle of a sequence, which the caller should avoid,
// but we'll tolerate.
while (i < str.Length)
{
escapeSequenceAdjustment += ControlSequenceLength(str, ref i);
}
}
return str.Length - offset - escapeSequenceAdjustment;
}

When TableWriter creates table row line, ConsoleControl.LengthInBufferCells() method is called to get row character width.

ConsoleControl.LengthInBufferCells() method is expected to return a value according to the width of the character.
Generally, east asian fullwidth characters(e.g. Japanese) are expected to return 2 or more per a character, but current version of ConsoleControl.LengthInBufferCells() simply returns character length.

// length expected 6(3 fullwidth characters), but actual returns 3.
ConsoleControl.LengthInBufferCells("ハロー", 0, false); 

So, the padding in the last column(Name) has be greater than expected and then Table layout has been broken.

# expected 120 characters width
"d-----       2018/03/02     15:52                Hello                                                                 "
# actual 123 characters width
"d-----       2018/03/02     15:52                ハロー                                                                   "            

Reference 1

The past version of ConsoleControl.LengthInBufferCells() method calls LengthInBufferCellsFE() method internally. (FE means Far East?)

private static int LengthInBufferCellsFE(char c, ref HWND hwnd, ref HDC hDC, ref bool istmInitialized, ref TEXTMETRIC tm)
{
if (0x20 <= c && c <= 0x7e)
{
/* ASCII */
return 1;
}
else if (0x3041 <= c && c <= 0x3094)
{
/* Hiragana */
return 2;
}
else if (0x30a1 <= c && c <= 0x30f6)
{
/* Katakana */
return 2;
}
else if (0x3105 <= c && c <= 0x312c)
{
/* Bopomofo */
return 2;
}
else if (0x3131 <= c && c <= 0x318e)
{
/* Hangul Elements */
return 2;
}
else if (0xac00 <= c && c <= 0xd7a3)
{
/* Korean Hangul Syllables */
return 2;
}
else if (0xff01 <= c && c <= 0xff5e)
{
/* Fullwidth ASCII variants */
return 2;
}
else if (0xff61 <= c && c <= 0xff9f)
{
/* Halfwidth Katakana variants */
return 1;
}
else if ((0xffa0 <= c && c <= 0xffbe) ||
(0xffc2 <= c && c <= 0xffc7) ||
(0xffca <= c && c <= 0xffcf) ||
(0xffd2 <= c && c <= 0xffd7) ||
(0xffda <= c && c <= 0xffdc))
{
/* Halfwidth Hangule variants */
return 1;
}
else if (0xffe0 <= c && c <= 0xffe6)
{
/* Fullwidth symbol variants */
return 2;
}
else if (0x4e00 <= c && c <= 0x9fa5)
{
/* Han Ideographic */
return 2;
}
else if (0xf900 <= c && c <= 0xfa2d)
{
/* Han Compatibility Ideographs */
return 2;
}
else
{
// GetTextMetrics / GetCharWidth32 exist in an extension API set 'ext-ms-win-gdi-font-l1-1-1.dll', which is not available in NanoServer.
#if !CORECLR
/* Unknown character: need to use GDI*/
if (hDC == (IntPtr)0)
{
hwnd = NativeMethods.GetConsoleWindow();
if ((IntPtr)0 == hwnd)
{
int err = Marshal.GetLastWin32Error();
//Don't throw exception so that output can continue
tracer.TraceError("Win32 Error 0x{0:X} occurred when getting the window handle to the console.",
err);
return 1;
}
hDC = NativeMethods.GetDC(hwnd);
if ((IntPtr)0 == hDC)
{
int err = Marshal.GetLastWin32Error();
//Don't throw exception so that output can continue
tracer.TraceError("Win32 Error 0x{0:X} occurred when getting the Device Context of the console window.",
err);
return 1;
}
}
bool result = true;
if (!istmInitialized)
{
result = NativeMethods.GetTextMetrics(hDC, out tm);
if (!result)
{
int err = Marshal.GetLastWin32Error();
//Don't throw exception so that output can continue
tracer.TraceError("Win32 Error 0x{0:X} occurred when getting the Text Metric of the console window's Device Context.",
err);
return 1;
}
istmInitialized = true;
}
int width;
result = NativeMethods.GetCharWidth32(hDC, (uint)c, (uint)c, out width);
if (!result)
{
int err = Marshal.GetLastWin32Error();
//Don't throw exception so that output can continue
tracer.TraceError("Win32 Error 0x{0:X} occurred when getting the width of a char.",
err);
return 1;
}
if (width >= tm.tmMaxCharWidth)
{
return 2;
}
#endif
}
tracer.WriteLine("failed to locate char {0}, return 1", (int)c);
return 1;
}
internal static int LengthInBufferCells(char c)
{
uint codePage = NativeMethods.GetConsoleOutputCP();
return LengthInBufferCells(c, codePage);
}
/// <summary>
/// From IsConsoleFullWidth in \windows\core\ntcon\server\dbcs.c
/// </summary>
/// <param name="c"></param>
/// <param name="codePage"></param>
/// <returns></returns>
[SuppressMessage("Microsoft.Usage", "CA1806:DoNotIgnoreMethodResults",
MessageId = "Microsoft.PowerShell.ConsoleControl+NativeMethods.ReleaseDC(System.IntPtr,System.IntPtr)")]
private static int LengthInBufferCells(char c, uint codePage)
{
if (!IsAvailableFarEastCodePage(codePage))
{
return 1;
}
HWND hwnd = (HWND)0;
HDC hDC = (HDC)0;
bool istmInitialized = false;
TEXTMETRIC tm = new TEXTMETRIC(); ;
try
{
return LengthInBufferCellsFE(c, ref hwnd, ref hDC, ref istmInitialized, ref tm);
}
finally
{
if (hwnd != (IntPtr)0 && hDC != (IntPtr)0)
{
NativeMethods.ReleaseDC(hwnd, hDC);
}
}
}

This LengthInBufferCellsFE() method handles the length of east asian fullwidth characters correctly, but it was removed at f604b0e .

Reference 2

Technical report about East Asian Width.

Metadata

Metadata

Assignees

No one assigned

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions

      0