8000 Add explicit ContentType detection to Invoke-RestMethod by dantraMSFT · Pull Request #4692 · PowerShell/PowerShell · GitHub
[go: up one dir, main page]

Skip to content

Add explicit ContentType detection to Invoke-RestMethod #4692

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Aug 31, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -45,13 +45,28 @@ internal override void ProcessResponse(HttpResponseMessage response)
{
// determine the response type
RestReturnType returnType = CheckReturnType(response);
// get the response encoding
Encoding encoding = ContentHelper.GetEncoding(response);

// Try to get the response encoding from the ContentType header.
Encoding encoding = null;
string charSet = response.Content.Headers.ContentType?.CharSet;
if (!string.IsNullOrEmpty(charSet))
{
// NOTE: Don't use ContentHelper.GetEncoding; it returns a
// default which bypasses checking for a meta charset value.
StreamHelper.TryGetEncoding(charSet, out encoding);
}

object obj = null;
Exception ex = null;

string str = StreamHelper.DecodeStream(responseStream, ref encoding);
// NOTE: Tests use this verbose output to verify the encoding.
WriteVerbose(string.Format
(
System.Globalization.CultureInfo.InvariantCulture,
"Content encoding: {0}",
string.IsNullOrEmpty(encoding.HeaderName) ? encoding.EncodingName : encoding.HeaderName)
);
bool convertSuccess = false;

// On CoreCLR, we need to explicitly load Json.NET
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -450,7 +450,7 @@ internal static string DecodeStream(Stream stream, string characterSet, out Enco
return DecodeStream(stream, ref encoding);
}

static bool TryGetEncoding(string characterSet, out Encoding encoding)
internal static bool TryGetEncoding(string characterSet, out Encoding encoding)
{
bool result = false;
try
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -296,6 +296,69 @@ function ExecuteWebRequest
return $result
}

[string] $verboseEncodingPrefix = 'Content encoding: '
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Move $verboseEncodingPrefix into the function body so it does not become orphaned.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think I want a locally scoped variable that has to be initialized each call.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

fair enough

# This function calls Invoke-WebRequest with the given uri and
# parses the verbose output to determine the encoding used for the content.
function ExecuteRestMethod
{
param
(
[Parameter(Mandatory)]
[string]
$Uri,

[switch] $UseBasicParsing
)
$result = @{Output = $null; Error = $null; Encoding = $null; Content = $null}
$verbosePreferenceSave = $VerbosePreference
$VerbosePreference = 'Continue'
try
{

$verboseFile = Join-Path $TestDrive -ChildPath ExecuteRestMethod.verbose.txt
$result.Output = Invoke-RestMethod -Uri $Uri -TimeoutSec 5 -UseBasicParsing:$UseBasicParsing.IsPresent -Verbose 4>$verboseFile
$result.Content = $result.Output

if (Test-Path -Path $verboseFile)
{
$result.Verbose = Get-Content -Path $verboseFile
foreach ($item in $result.Verbose)
{
$line = $item.Trim()
if ($line.StartsWith($verboseEncodingPrefix))
{
$encodingName = $item.SubString($verboseEncodingPrefix.Length).Trim()
$result.Encoding = [System.Text.Encoding]::GetEncoding($encodingName)
break
}
}
if ($result.Encoding -eq $null)
{
throw "Encoding not found in verbose output. Lines: $($result.Verbose.Count) Content:$($result.Verbose)"
}
}

if ($result.Verbose -eq $null)
{
throw "No verbose output was found"
}
}
catch
{
$result.Error = $_ | select-object * | Out-String
}
finally
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

since the $TESTDRIVE is removed by pester, you only need set the verbosePreference back.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The test drive isn't cleared until the end of the context block. line 322 uses a test for the file's existence. With the current configuration, subsequent calls to ExecuteRestMethod could result in false positives for the test on line 322 if the file is not removed at the end at the end of the function. This cleanup action would need to remain or the filename generated on each call to ExecuteRestMethod would need to be unique.

Copy link
Contributor Author
@dantraMSFT dantraMSFT Aug 29, 2017

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

As @markekraus indicates, this is by design and simpler than generating file names.
it also occurs to me that I shouldn't be relying on test/context lifetime for this function; I don't know if it will be called once or multiple times.

I'll be leaving the code as is.

{
$VerbosePreference = $verbosePreferenceSave
if (Test-Path -Path $verboseFile)
{
Remove-Item -Path $verboseFile -ErrorAction SilentlyContinue
}
}

return $result
}

<#
Defines the list of redirect codes to test as well as the
expected Method when the redirection is handled.
Expand Down Expand Up @@ -1678,6 +1741,224 @@ Describe "Invoke-RestMethod tests" -Tags "Feature" {

#endregion Client Certificate Authentication

#region charset encoding tests

Context "Invoke-RestMethod Encoding tests with BasicHtmlWebResponseObject response" {
It "Verifies Invoke-RestMethod detects charset meta value when the ContentType header does not define it." {
$output = '<html><head><meta charset="Unicode"></head></html>'
$expectedEncoding = [System.Text.Encoding]::GetEncoding('Unicode')
$response = ExecuteRestMethod -Uri "http://localhost:8081/PowerShell?test=response&output=$output" -UseBasicParsing

$response.Error | Should BeNullOrEmpty
$response.Encoding.EncodingName | Should Be $expectedEncoding.EncodingName
}

It "Verifies Invoke-WebRequest detects charset meta value when newlines are encountered in the element." {
$output = @'
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

just a question, would:

"<html>`n<head>`n<meta`n   charset=""Unicode""  `n>`n</head>`n</html>`n"

be the same?

Copy link
Contributor Author
@dantraMSFT dantraMSFT Aug 29, 2017

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, but I find the HERE string much easier to read and it avoids me making silly escaping errors.
I'll be leaving it as is.

<html>
<head>
<meta
charset="Unicode"
>
</head>
</html>
'@
$expectedEncoding = [System.Text.Encoding]::GetEncoding('Unicode')
$response = ExecuteRestMethod -Uri "http://localhost:8081/PowerShell?test=response&output=$output" -UseBasicParsing

$response.Error | Should BeNullOrEmpty
$response.Encoding.EncodingName | Should Be $expectedEncoding.EncodingName
}

It "Verifies Invoke-RestMethod detects charset meta value when the attribute value is unquoted." {
$output = '<html><head><meta charset = Unicode></head></html>'
$expectedEncoding = [System.Text.Encoding]::GetEncoding('Unicode')
$response = ExecuteRestMethod -Uri "http://localhost:8081/PowerShell?test=response&output=$output" -UseBasicParsing

$response.Error | Should BeNullOrEmpty
$response.Encoding.EncodingName | Should Be $expectedEncoding.EncodingName
}

It "Verifies Invoke-RestMethod detects http-equiv charset meta value when the ContentType header does not define it." {
$output = @'
<html><head>
<meta http-equiv="content-type" content="text/html; charset=Unicode">
</head>
</html>
'@
$expectedEncoding = [System.Text.Encoding]::GetEncoding('Unicode')
$response = ExecuteRestMethod -Uri "http://localhost:8081/PowerShell?test=response&output=$output" -UseBasicParsing

$response.Error | Should BeNullOrEmpty
$response.Encoding.EncodingName | Should Be $expectedEncoding.EncodingName
}

It "Verifies Invoke-RestMethod detects http-equiv charset meta value newlines are encountered in the element." {
$output = @'
<html><head>
<meta
http-equiv="content-type"
content="text/html; charset=Unicode">
</head>
</html>
'@
$expectedEncoding = [System.Text.Encoding]::GetEncoding('Unicode')
$response = ExecuteRestMethod -Uri "http://localhost:8081/PowerShell?test=response&output=$output" -UseBasicParsing

$response.Error | Should BeNullOrEmpty
$response.Encoding.EncodingName | Should Be $expectedEncoding.EncodingName
}

It "Verifies Invoke-RestMethod ignores meta charset value when Content-Type header defines it." {
$output = '<html><head><meta charset="utf-32"></head></html>'
# NOTE: meta charset should be ignored
$expectedEncoding = [System.Text.Encoding]::UTF8
$response = ExecuteRestMethod -Uri "http://localhost:8081/PowerShell?test=response&contenttype=text/html; charset=utf-8&output=$output" -UseBasicParsing

$response.Error | Should BeNullOrEmpty
$response.Encoding.EncodingName | Should Be $expectedEncoding.EncodingName
}

It "Verifies Invoke-RestMethod honors non-utf8 charsets in the Content-Type header" {
$output = '<html><head><meta charset="utf-32"></head></html>'
# NOTE: meta charset should be ignored
$expectedEncoding = [System.Text.Encoding]::GetEncoding('utf-16')
$response = ExecuteRestMethod -Uri "http://localhost:8081/PowerShell?test=response&contenttype=text/html; charset=utf-16&output=$output" -UseBasicParsing

$response.Error | Should BeNullOrEmpty
$response.Encoding.EncodingName | Should Be $expectedEncoding.EncodingName
}

It "Verifies Invoke-RestMethod defaults to iso-8859-1 when an unsupported/invalid charset is declared" {
$output = '<html><head><meta charset="invalid"></head></html>'
$expectedEncoding = [System.Text.Encoding]::GetEncoding('iso-8859-1')
$response = ExecuteRestMethod -Uri "http://localhost:8081/PowerShell?test=response&contenttype=text/html&output=$output" -UseBasicParsing

$response.Error | Should BeNullOrEmpty
$response.Encoding.EncodingName | Should Be $expectedEncoding.EncodingName
}

It "Verifies Invoke-RestMethod defaults to iso-8859-1 when an unsupported/invalid charset is declared using http-equiv" {
$output = @'
<html><head>
<meta http-equiv="content-type" content="text/html; charset=Invalid">
</head>
</html>
'@
$expectedEncoding = [System.Text.Encoding]::GetEncoding('iso-8859-1')
$response = ExecuteRestMethod -Uri "http://localhost:8081/PowerShell?test=response&contenttype=text/html&output=$output" -UseBasicParsing

$response.Error | Should BeNullOrEmpty
$response.Encoding.EncodingName | Should Be $expectedEncoding.EncodingName
}
}

Context "Invoke-RestMethod Encoding tests with HtmlWebResponseObject response" {
# these tests are dependent on https://github.com/PowerShell/PowerShell/issues/2867
# Currently, all paths return BasicHtmlWebResponseObject
It "Verifies Invoke-RestMethod detects charset meta value when the ContentType header does not define it." -Pending {
$output = '<html><head><meta charset="Unicode"></head></html>'
$expectedEncoding = [System.Text.Encoding]::GetEncoding('Unicode')
$response = ExecuteRestMethod -Uri "http://localhost:8081/PowerShell?test=response&output=$output"

$response.Error | Should BeNullOrEmpty
$response.Encoding.EncodingName | Should Be $expectedEncoding.EncodingName
}

It "Verifies Invoke-RestMethod detects charset meta value when newlines are encountered in the element." -Pending {
$output = @'
<html>
<head>
<meta
charset="Unicode"
>
</head>
</html>
'@
$expectedEncoding = [System.Text.Encoding]::GetEncoding('Unicode')
$response = ExecuteRestMethod -Uri "http://localhost:8081/PowerShell?test=response&output=$output"

$response.Error | Should BeNullOrEmpty
$response.Encoding.EncodingName | Should Be $expectedEncoding.EncodingName
}

It "Verifies Invoke-RestMethod ignores meta charset value when Content-Type header defines it." -Pending {
$output = '<html><head><meta charset="utf-16"></head></html>'
# NOTE: meta charset should be ignored
$expectedEncoding = [System.Text.Encoding]::UTF8
# Update to test for HtmlWebResponseObject when mshtl dependency has been resolved.
$response = ExecuteRestMethod -Uri "http://localhost:8081/PowerShell?test=response&contenttype=text/html; charset=utf-8&output=$output"

$response.Error | Should BeNullOrEmpty
$response.Encoding.EncodingName | Should Be $expectedEncoding.EncodingName
}

It "Verifies Invoke-RestMethod detects http-equiv charset meta value when the ContentType header does not define it." -Pending {
$output = @'
<html><head>
<meta http-equiv="content-type" content="text/html; charset=Unicode">
</head>
</html>
'@
$expectedEncoding = [System.Text.Encoding]::GetEncoding('Unicode')
$response = ExecuteRestMethod -Uri "http://localhost:8081/PowerShell?test=response&output=$output"

$response.Error | Should BeNullOrEmpty
$response.Encoding.EncodingName | Should Be $expectedEncoding.EncodingName
}

It "Verifies Invoke-RestMethod detects http-equiv charset meta value newlines are encountered in the element." -Pending {
$output = @'
<html><head>
<meta
http-equiv="content-type"
content="text/html; charset=Unicode">
</head>
</html>
'@
$expectedEncoding = [System.Text.Encoding]::GetEncoding('Unicode')
$response = ExecuteRestMethod -Uri "http://localhost:8081/PowerShell?test=response&output=$output"

$response.Error | Should BeNullOrEmpty
$response.Encoding.EncodingName | Should Be $expectedEncoding.EncodingName
}

It "Verifies Invoke-RestMethod honors non-utf8 charsets in the Content-Type header" -Pending {
$output = '<html><head><meta charset="utf-32"></head></html>'
# NOTE: meta charset should be ignored
$expectedEncoding = [System.Text.Encoding]::GetEncoding('utf-16')
$response = ExecuteRestMethod -Uri "http://localhost:8081/PowerShell?test=response&contenttype=text/html; charset=utf-16&output=$output"

$response.Error | Should BeNullOrEmpty
$response.Encoding.EncodingName | Should Be $expectedEncoding.EncodingName
}

It "Verifies Invoke-RestMethod defaults to iso-8859-1 when an unsupported/invalid charset is declared" -Pending {
$output = '<html><head><meta charset="invalid"></head></html>'
$expectedEncoding = [System.Text.Encoding]::GetEncoding('iso-8859-1')
$response = ExecuteRestMethod -Uri "http://localhost:8081/PowerShell?test=response&contenttype=text/html&output=$output"

$response.Error | Should BeNullOrEmpty
$response.Encoding.EncodingName | Should Be $expectedEncoding.EncodingName
}

It "Verifies Invoke-RestMethod defaults to iso-8859-1 when an unsupported/invalid charset is declared using http-equiv" -Pending {
$output = @'
<html><head>
<meta http-equiv="content-type" content="text/html; charset=Invalid">
</head>
</html>
'@
$expectedEncoding = [System.Text.Encoding]::GetEncoding('iso-8859-1')
$response = ExecuteRestMethod -Uri "http://localhost:8081/PowerShell?test=response&contenttype=text/html&output=$output"

$response.Error | Should BeNullOrEmpty
$response.Encoding.EncodingName | Should Be $expectedEncoding.EncodingName
}
}

#endregion charset encoding tests

BeforeEach {
if ($env:http_proxy) {
$savedHttpProxy = $env:http_proxy
Expand Down
0