8000 Merge Main into Feature/4.0 by JakeRadMSFT · Pull Request #6747 · dotnet/machinelearning · GitHub
[go: up one dir, main page]

Skip to content

Merge Main into Feature/4.0 #6747

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 43 commits into from
Oct 5, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
43 commits
Select commit Hold shift + click to select a range
4c799ab
Update build templates to handle feature branches (#6744)
JakeRadMSFT Jun 28, 2023
9aea1ce
Update version to 4.0 for feature branch (#6743)
JakeRadMSFT Jun 27, 2023
443ceb9
Add missing implementation for datetime relevant arrow type into data…
asmirnov82 Jul 6, 2023
53c0f26
Fix the behavior or column SetName method (#6676)
asmirnov82 Jul 6, 2023
26c2446
Fix DataFrame to allow to store columns with size more than 2 Gb (#6710)
asmirnov82 Jul 6, 2023
36f87d1
avoid empty dataset (#6756)
LittleLittleCloud Jul 6, 2023
69eca56
Fix dataframe arithmetics for columns having several value buffers (c…
asmirnov82 Jul 6, 2023
d9e1ee1
Run tests that requires more than 2 Gb of Memory only on 64-bit env (…
asmirnov82 Jul 7, 2023
caee3c2
Reduce coupling of Data.Analysis.Tests project (#6759)
asmirnov82 Jul 7, 2023
578d7bc
Provide ability to filter dataframe column by null via ElementWise Me…
asmirnov82 Jul 7, 2023
69cc4bc
Fix incorrect DataFrame min max computation with NULL (#6734)
asmirnov82 Jul 7, 2023
321158d
Clean DataFrame meaningless code (#6761)
asmirnov82 Jul 11, 2023
65c7ca9
Add NameEntityRecognition and Q&A deep learning tasks. (#6760)
michaelgsharp Jul 24, 2023
7b6af06
fix issue (#6768)
LittleLittleCloud Jul 24, 2023
8952994
fixed mac build and minor torch sharp changes (#6776)
michaelgsharp Jul 28, 2023
a823199
Improve DataFrame Arithmetics implementation (#6763)
asmirnov82 Jul 28, 2023
ea84d42
Add QA sweepable estimator in AutoML (#6781)
zewditu Aug 3, 2023
077a6b8
Modernized some argument checks that still used string literals for p…
Lehonti Aug 7, 2023
c28d5af
removed deprecated yosemite brew (#6805)
michaelgsharp Aug 24, 2023
179f7dc
Add TargetType to Type_convert (#6785)
zewditu Aug 25, 2023
92eccad
File-scoped namespaces in files under `Environment` (`Microsoft.ML.Co…
Lehonti Aug 25, 2023
43a6a81
File-scoped namespaces in files under `EntryPoints` (`Microsoft.ML.Co…
Lehonti Aug 25, 2023
39235a7
Fix issue with addIndexColumn in DataFrame.LoadCsv (#6769)
asmirnov82 Aug 25, 2023
e3f53a4
Fix DataFrame.LoadCsv can not load CSV with duplicate column names (#…
asmirnov82 Aug 31, 2023
34389b6
File-scoped namespaces in files under `ComponentModel` (`Microsoft.ML…
Lehonti Aug 31, 2023
aaf226c
File-scoped namespaces in files under `Data` (`Microsoft.ML.Core`) (#…
Lehonti Aug 31, 2023
e6a88c4
Fix inconsistent null handling in DataFrame Arithmetics (#6770)
asmirnov82 Aug 31, 2023
ccf34e3
File-scoped namespaces in files under `Prediction` (`Microsoft.ML.Cor…
Lehonti Sep 1, 2023
d9dbf99
Allow to define CultureInfo for parsing values on reading DataFrame f…
asmirnov82 Sep 1, 2023
d692751
Append dataframe rows based on column names (#6808)
asmirnov82 Sep 2, 2023
09b80f8
removed codecov token (#6811)
michaelgsharp Sep 8, 2023
49824f3
Fix wrong type conversion on PrimitiveDataFrameColumn (#6834)
novelhawk Sep 25, 2023
a052146
update interactive kernel version (#6836)
LittleLittleCloud Sep 26, 2023
15e6a55
Add performance benchmarks for dataframe arithmetic operations (#6827)
asmirnov82 Sep 26, 2023
5648c89
Improve performance of column cloning inside DataFrame arithmetics (#…
asmirnov82 Sep 27, 2023
85ee6e5
Simplify tt files for PrimitiveDataFrameColumnAritmetics (#6830)
asmirnov82 Sep 27, 2023
66eed89
Addresses #6533 (#6838)
rgesteve Sep 27, 2023
97926a8
Update dependencies (#6837)
ericstj Sep 27, 2023
7fe293d
PrimitiveDataFrameColumn.Clone method crashes when is used with IEnum…
asmirnov82 Sep 27, 2023
3c625bf
6847 incorrectly sets column value (#6849)
asmirnov82 Oct 2, 2023
5cf6051
Increase performance of arithmetic operations by enhancing calculatio…
asmirnov82 Oct 3, 2023
64d7ebd
Fixes incorrect work of DataFrame with VBufferColumn when number of e…
asmirnov82 Oct 4, 2023
e72e985
Merge branch 'main' into feature/4.0
JakeRadMSFT Oct 4, 2023
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 14 additions & 0 deletions .vsts-dotnet-ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,20 @@
# ML.NET's PR validation build
################################################################################

pr:
branches:
include:
- main
- feature/*
- release/*

trigger:
branches:
include:
- main
- feature/*
- release/*

resources:
containers:
- container: CentosContainer
Expand Down
11 changes: 11 additions & 0 deletions Microsoft.ML.sln
Original file line number Diff line number Diff line change
Expand Up @@ -168,6 +168,8 @@ Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Microsoft.ML.Tokenizers.Tes
EndProject
Project("{6EC3EE1D-3C4E-46DD-8F32-0CC8E7565705}") = "Microsoft.ML.FSharp.Tests", "test\Microsoft.ML.FSharp.Tests\Microsoft.ML.FSharp.Tests.fsproj", "{041CB5CD-5832-413E-A894-D9DBED210B16}"
EndProject
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Microsoft.Data.Analysis.PerformanceTests", "test\Microsoft.Data.Analysis.PerformanceTests\Microsoft.Data.Analysis.PerformanceTests.csproj", "{FB8A8823-CC6C-4C2F-8539-05FBFB7C91CD}"
EndProject
Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution
Debug|Any CPU = Debug|Any CPU
Expand Down Expand Up @@ -788,6 +790,14 @@ Global
{041CB5CD-5832-413E-A894-D9DBED210B16}.Release|Any CPU.Build.0 = Release|Any CPU
{041CB5CD-5832-413E-A894-D9DBED210B16}.Release|x64.ActiveCfg = Release|Any CPU
{041CB5CD-5832-413E-A894-D9DBED210B16}.Release|x64.Build.0 = Release|Any CPU
{FB8A8823-CC6C-4C2F-8539-05FBFB7C91CD}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{FB8A8823-CC6C-4C2F-8539-05FBFB7C91CD}.Debug|Any CPU.Build.0 = Debug|Any CPU
{FB8A8823-CC6C-4C2F-8539-05FBFB7C91CD}.Debug|x64.ActiveCfg = Debug|Any CPU
{FB8A8823-CC6C-4C2F-8539-05FBFB7C91CD}.Debug|x64.Build.0 = Debug|Any CPU
{FB8A8823-CC6C-4C2F-8539-05FBFB7C91CD}.Release|Any CPU.ActiveCfg = Release|Any CPU
{FB8A8823-CC6C-4C2F-8539-05FBFB7C91CD}.Release|Any CPU.Build.0 = Release|Any CPU
{FB8A8823-CC6C-4C2F-8539-05FBFB7C91CD}.Release|x64.ActiveCfg = Release|Any CPU
{FB8A8823-CC6C-4C2F-8539-05FBFB7C91CD}.Release|x64.Build.0 = Release|Any CPU
EndGlobalSection
GlobalSection(SolutionProperties) = preSolution
HideSolutionNode = FALSE
Expand Down Expand Up @@ -870,6 +880,7 @@ Global
{BBC3A950-BD68-45AC-9DBD-A8F4D8847745} = {09EADF06-BE25-4228-AB53-95AE3E15B530}
{C3D82402-F207-4F19-8C57-5AF0FBAF9682} = {AED9C836-31E3-4F3F-8ABC-929555D3F3C4}
{041CB5CD-5832-413E-A894-D9DBED210B16} = {AED9C836-31E3-4F3F-8ABC-929555D3F3C4}
{FB8A8823-CC6C-4C2F-8539-05FBFB7C91CD} = {AED9C836-31E3-4F3F-8ABC-929555D3F3C4}
EndGlobalSection
GlobalSection(ExtensibilityGlobals) = postSolution
SolutionGuid = {41165AF1-35BB-4832-A189-73060F82B01D}
Expand Down
21 changes: 21 additions & 0 deletions THIRD-PARTY-NOTICES.TXT
Original file line number Diff line number Diff line change
Expand Up @@ -85,3 +85,24 @@ distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.


License notice for BitUtility
------------------------------------------

https://github.com/apache/arrow/blob/main/csharp/src/Apache.Arrow/BitUtility.cs

Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
5 changes: 2 additions & 3 deletions build/.night-build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,8 @@ schedules:
branches:
include:
- main
- releases/1.6.0
- features/automl
- features/integrationPackage
- feature/*
- release/*
always: true

resources:
Expand Down
5 changes: 2 additions & 3 deletions build/.outer-loop-build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,8 @@ schedules:
branches:
include:
- main
- releases/1.6.0
- features/automl
- features/integrationPackage
- feature/*
- release/*
always: true


Expand Down
3 changes: 1 addition & 2 deletions build/ci/job-template.yml
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,6 @@ jobs:
${{ if eq(parameters.codeCoverage, 'false') }}:
hardLink: '/p:CreateHardLinksForCopyLocalIfPossible=True'
testTargetFramework: '/p:TestTargetFramework=$(_targetFramework)'
CODECOV_TOKEN: 03031e35-fe75-4e4c-87ee-e919ae601748
strategy:
matrix:
${{ if eq(parameters.customMatrixes, '') }}:
Expand All @@ -68,7 +67,7 @@ jobs:
steps:
# Extra MacOS step required to install OS-specific dependencies
- ${{ if and(contains(parameters.pool.vmImage, 'macOS'), not(contains(parameters.name, 'cross'))) }}:
- script: export HOMEBREW_NO_INSTALLED_DEPENDENTS_CHECK=TRUE && brew update && brew unlink libomp && brew install $(Build.SourcesDirectory)/build/libomp.rb --build-from-source --formula
- script: export HOMEBREW_NO_INSTALLED_DEPENDENTS_CHECK=TRUE && brew unlink libomp && brew install $(Build.SourcesDirectory)/build/libomp.rb --build-from-source --formula
displayName: Install MacOS build dependencies
# Extra Apple MacOS step required to install OS-specific dependencies
- ${{ if and(contains(parameters.pool.vmImage, 'macOS'), contains(parameters.name, 'cross')) }}:
Expand Down
14 changes: 14 additions & 0 deletions build/codecoverage-ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,20 @@
# ML.NET's Code Coverage validation build
################################################################################

pr:
branches:
include:
- main
- feature/*
- release/*

trigger:
branches:
include:
- main
- feature/*
- release/*

jobs:
- template: /build/ci/job-template.yml
parameters:
Expand Down
1 change: 0 additions & 1 deletion build/libomp.rb
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@ class Libomp < Formula
end

depends_on "cmake" => :build
depends_on macos: :yosemite

def install
system "cmake", ".", *std_cmake_args
Expand Down
2 changes: 1 addition & 1 deletion build/vsts-ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,7 @@ stages:
pool:
vmImage: macOS-12
steps:
- script: export HOMEBREW_NO_INSTALLED_DEPENDENTS_CHECK=1 && brew update && rm '/usr/local/bin/2to3-3.11' && brew unlink libomp && brew install $(Build.SourcesDirectory)/build/libomp.rb --build-from-source --formula
- script: export HOMEBREW_NO_INSTALLED_DEPENDENTS_CHECK=1 && rm '/usr/local/bin/2to3-3.11' && brew unlink libomp && brew install $(Build.SourcesDirectory)/build/libomp.rb --build-from-source --formula
displayName: Install build dependencies
# Only build native assets to avoid conflicts.
- script: ./build.sh -projects $(Build.SourcesDirectory)/src/Native/Native.proj -configuration $(BuildConfig) /p:TargetArchitecture=x64 /p:CopyPackageAssets=true
Expand Down
18 changes: 10 additions & 8 deletions eng/Versions.props
Original file line number Diff line number Diff line change
Expand Up @@ -9,17 +9,19 @@
https://github.com/dotnet/arcade/blob/c788ffa83b088cafe9dbffc1cbc8155ba88b2553/Documentation/CorePackages/Versioning.md#output
-->
<DotNetUseShippingVersions>true</DotNetUseShippingVersions>
<VersionPrefix>3.0.0</VersionPrefix>
<VersionPrefix>4.0.0</VersionPrefix>
<PreReleaseVersionLabel>preview</PreReleaseVersionLabel>
<AssemblyVersion>1.0.0.0</AssemblyVersion>
<!-- .NET Runtime product dependencies -->
<MicrosoftBclAsyncInterfacesVersion>6.0.0</MicrosoftBclAsyncInterfacesVersion>
<MicrosoftExtensionsVersion>2.1.0</MicrosoftExtensionsVersion>
<MicrosoftExtensionsDependencyInjectionVersion>6.0.0</MicrosoftExtensionsDependencyInjectionVersion>
<SkiaSharpVersion>2.88.3</SkiaSharpVersion>
<NuGetVersion>6.7.0</NuGetVersion>
<SkiaSharpVersion>2.88.6</SkiaSharpVersion>
<SystemBuffersVersion>4.5.1</SystemBuffersVersion>
<SystemCodeDomVersion>4.5.0</SystemCodeDomVersion>
<SystemCollectionsImmutableVersion>1.5.0</SystemCollectionsImmutableVersion>
<SystemConfigurationConfigurationManagerVersion>6.0.1</SystemConfigurationConfigurationManagerVersion>
<SystemIOFileSystemAccessControl>4.5.0</SystemIOFileSystemAccessControl>
<SystemMemoryVersion>4.5.5</SystemMemoryVersion>
<SystemReflectionEmitLightweightVersion>4.3.0</SystemReflectionEmitLightweightVersion>
Expand All @@ -30,13 +32,13 @@
<SystemTextJsonVersion>6.0.1</SystemTextJsonVersion>
<SystemThreadingChannelsVersion>4.7.1</SystemThreadingChannelsVersion>
<!-- Other product dependencies -->
<ApacheArrowVersion>2.0.0</ApacheArrowVersion>
<ApacheArrowVersion>11.0.0</ApacheArrowVersion>
<GoogleProtobufVersion>3.19.6</GoogleProtobufVersion>
<LightGBMVersion>2.3.1</LightGBMVersion>
<MicrosoftCodeAnalysisAnalyzersVersion>3.3.0</MicrosoftCodeAnalysisAnalyzersVersion>
<MicrosoftCodeAnalysisCSharpVersion>3.9.0</MicrosoftCodeAnalysisCSharpVersion>
<MicrosoftDotNetInteractiveFormattingVersion>1.0.0-beta.22504.6</MicrosoftDotNetInteractiveFormattingVersion>
<MicrosoftDotNetInteractiveVersion>1.0.0-beta.22504.6</MicrosoftDotNetInteractiveVersion>
<MicrosoftDotNetInteractiveFormattingVersion>1.0.0-beta.23461.4</MicrosoftDotNetInteractiveFormattingVersion>
<MicrosoftDotNetInteractiveVersion>1.0.0-beta.23461.4</MicrosoftDotNetInteractiveVersion>
<MicrosoftMLOnnxRuntimeVersion>1.14.0</MicrosoftMLOnnxRuntimeVersion>
<MlNetMklDepsVersion>0.0.0.12</MlNetMklDepsVersion>
<!--
Expand Down Expand Up @@ -74,11 +76,11 @@
<SystemCompositionVersion>1.2.0</SystemCompositionVersion>
<!-- Test-only Dependencies -->
<ApprovalTestsVersion>5.4.7</ApprovalTestsVersion>
<BenchmarkDotNetVersion>0.12.0</BenchmarkDotNetVersion>
<BenchmarkDotNetVersion>0.13.1</BenchmarkDotNetVersion>
<DotNetRuntime60Version>6.0.9</DotNetRuntime60Version>
<DotNetRuntime80Version>8.0.0-preview.3.23174.8</DotNetRuntime80Version>
<FluentAssertionVersion>5.10.2</FluentAssertionVersion>
<MicrosoftCodeAnalysisTestingVersion>1.1.2-beta1.22512.1</MicrosoftCodeAnalysisTestingVersion>
<MicrosoftCodeAnalysisTestingVersion>1.1.2-beta1.23431.1</MicrosoftCodeAnalysisTestingVersion>
<MicrosoftDotNetXUnitExtensionsVersion>8.0.0-beta.23265.1</MicrosoftDotNetXUnitExtensionsVersion>
<MicrosoftExtensionsDependencyModelVersion>2.1.0</MicrosoftExtensionsDependencyModelVersion>
<MicrosoftExtensionsTestVersion>3.0.1</MicrosoftExtensionsTestVersion>
Expand All @@ -87,7 +89,7 @@
<MicrosoftMLTestDatabasesVersion>0.0.6-test</MicrosoftMLTestDatabasesVersion>
<MicrosoftMLTestModelsVersion>0.0.7-test</MicrosoftMLTestModelsVersion>
<SystemDataSqlClientVersion>4.6.1</SystemDataSqlClientVersion>
<SystemDataSQLiteCoreVersion>1.0.113</SystemDataSQLiteCoreVersion>
<SystemDataSQLiteCoreVersion>1.0.118</SystemDataSQLiteCoreVersion>
<XunitCombinatorialVersion>1.2.7</XunitCombinatorialVersion>
<XUnitVersion>2.4.2</XUnitVersion>
<!-- Opt-out repo features -->
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
// See the LICENSE file in the project root for more information.

using System;
using System.Collections.Generic;
using System.Linq;
using Microsoft.Data.Analysis;

Expand All @@ -22,19 +23,32 @@ public static DataFrame ToDataFrame(this TabularDataResource tabularDataResource

foreach (var fieldDescriptor in tabularDataResource.Schema.Fields)
{
var fieldName = fieldDescriptor.Name;
var column = tabularDataResource.Data.Select(row =>
{
if (row is IDictionary<string, object> dictionary)
{
return dictionary[fieldName];
}
else
{
return row.FirstOrDefault(kvp => kvp.Key == fieldName).Value;
}
});

switch (fieldDescriptor.Type)
{
case TableSchemaFieldType.Number:
dataFrame.Columns.Add(new DoubleDataFrameColumn(fieldDescriptor.Name, tabularDataResource.Data.Select(d => Convert.ToDouble(d[fieldDescriptor.Name]))));
dataFrame.Columns.Add(new DoubleDataFrameColumn(fieldDescriptor.Name, column.Select(Convert.ToDouble)));
break;
case TableSchemaFieldType.Integer:
dataFrame.Columns.Add(new Int64DataFrameColumn(fieldDescriptor.Name, tabularDataResource.Data.Select(d => Convert.ToInt64(d[fieldDescriptor.Name]))));
dataFrame.Columns.Add(new Int64DataFrameColumn(fieldDescriptor.Name, column.Select(Convert.ToInt64)));
break;
case TableSchemaFieldType.Boolean:
dataFrame.Columns.Add(new BooleanDataFrameColumn(fieldDescriptor.Name, tabularDataResource.Data.Select(d => Convert.ToBoolean(d[fieldDescriptor.Name]))));
dataFrame.Columns.Add(new BooleanDataFrameColumn(fieldDescriptor.Name, column.Select(Convert.ToBoolean)));
break;
case TableSchemaFieldType.String:
dataFrame.Columns.Add(new StringDataFrameColumn(fieldDescriptor.Name, tabularDataResource.Data.Select(d => Convert.ToString(d[fieldDescriptor.Name]))));
dataFrame.Columns.Add(new StringDataFrameColumn(fieldDescriptor.Name, column.Select(Convert.ToString)));
break;
default:
throw new ArgumentOutOfRangeException();
Expand Down
18 changes: 18 additions & 0 deletions src/Microsoft.Data.Analysis/ArrayUtility.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.

using System;
using System.Collections.Generic;
using System.Text;

namespace Microsoft.Data.Analysis
{
internal static class ArrayUtility
{
// Maximum size of one-dimensional array.
// See: https://msdn.microsoft.com/en-us/library/hh285054(v=vs.110).aspx
// Polyfilling Array.MaxLength API for netstandard2.0
public const int ArrayMaxSize = 0X7FEFFFFF;
}
}
6 changes: 3 additions & 3 deletions src/Microsoft.Data.Analysis/ArrowStringDataFrameColumn.cs
Original file line number Diff line number Diff line change
Expand Up @@ -213,9 +213,9 @@ private void Append(ReadOnlySpan<byte> value)
_offsetsBuffers.Add(mutableOffsetsBuffer);
mutableOffsetsBuffer.Append(0);
}
mutableDataBuffer.EnsureCapacity(value.Length);
value.CopyTo(mutableDataBuffer.RawSpan.Slice(mutableDataBuffer.Length));
mutableDataBuffer.Length += value.Length;
var startIndex = mutableDataBuffer.Length;
mutableDataBuffer.IncreaseSize(value.Length);
value.CopyTo(mutableDataBuffer.RawSpan.Slice(startIndex));
mutableOffsetsBuffer.Append(mutableOffsetsBuffer[mutableOffsetsBuffer.Length - 1] + value.Length);
}
SetValidityBit(Length - 1, value != default);
Expand Down
Loading
0