19
19
20
20
import './flags_webgpu' ;
21
21
22
- import { DataMover , DataType , ENV , KernelBackend , Rank , ShapeMap , Tensor , tensor1d , Tensor3D , util } from '@tensorflow/tfjs-core' ;
22
+ import { DataMover , DataType , ENV , KernelBackend , Rank , ShapeMap , Tensor , Tensor3D , util } from '@tensorflow/tfjs-core' ;
23
23
import * as shaderc from '@webgpu/shaderc' ;
24
24
25
25
import * as binary_op from './kernels/binary_op_webgpu' ;
@@ -32,8 +32,7 @@ import * as webgpu_program from './kernels/webgpu_program';
32
32
import { WebGPUBinary } from './kernels/webgpu_program' ;
33
33
34
34
type TensorInfo = {
35
- shape : number [ ] ,
36
- dtype : DataType ,
35
+ byteSize : number ,
37
36
values : Float32Array | Int32Array | Uint8Array ,
38
37
id : number ,
39
38
buffer : GPUBuffer
@@ -75,28 +74,31 @@ export class WebGPUBackend extends KernelBackend {
75
74
private tensorMap = new WeakMap < DataId , TensorInfo > ( ) ;
76
75
77
76
disposeData ( dataId : DataId ) : void {
78
- // Tensor disposal logic.
77
+ if ( ! this . tensorMap . has ( dataId ) ) {
78
+ throw new Error ( `Tensor ${ dataId } was not registered!` ) ;
79
+ }
80
+
81
+ const info = this . tensorMap . get ( dataId ) ;
82
+ this . destroyBuffer ( info . byteSize , info . buffer ) ;
79
83
}
80
84
81
- private createBuffer ( size : number ) {
82
- return this . device . createBuffer ( {
83
- size,
84
- usage : GPUBufferUsage . TRANSFER_SRC | GPUBufferUsage . TRANSFER_DST |
85
- GPUBufferUsage . STORAGE ,
86
- } ) ;
85
+ private createBuffer (
86
+ size : number ,
87
+ usage : GPUBufferUsage = GPUBufferUsage . STORAGE |
88
+ GPUBufferUsage . TRANSFER_SRC | GPUBufferUsage . TRANSFER_DST ) {
89
+ return this . device . createBuffer ( { size, usage} ) ;
87
90
}
88
91
89
- private setBufferData (
90
- buffer : GPUBuffer , data : Float32Array | Int32Array | Uint8Array ) {
91
- buffer . setSubData ( 0 , data ) ;
92
+ private destroyBuffer ( byteSize : number , buffer : GPUBuffer ) {
93
+ // TODO: recycle deleted buffers
94
+ buffer . destroy ( ) ;
92
95
}
93
96
94
97
register ( dataId : object , shape : number [ ] , dtype : DataType ) : void {
95
98
if ( ! this . tensorMap . has ( dataId ) ) {
96
- const buffer = this . createBuffer (
97
- util . sizeFromShape ( shape ) * util . bytesPerElement ( dtype ) ) ;
98
-
99
- this . tensorMap . set ( dataId , { shape, dtype, values : null , id : - 1 , buffer} ) ;
99
+ const byteSize = util . sizeFromShape ( shape ) * util . bytesPerElement ( dtype ) ;
100
+ const buffer = this . createBuffer ( byteSize ) ;
101
+ this . tensorMap . set ( dataId , { byteSize, values : null , id : - 1 , buffer} ) ;
100
102
}
101
103
}
102
104
@@ -107,7 +109,7 @@ export class WebGPUBackend extends KernelBackend {
107
109
108
110
const info = this . tensorMap . get ( dataId ) ;
109
111
info . values = values ;
110
- this . setBufferData ( info . buffer , values ) ;
112
+ info . buffer . setSubData ( 0 , values ) ;
111
113
this . tensorMap . set ( dataId , info ) ;
112
114
}
113
115
@@ -118,15 +120,11 @@ export class WebGPUBackend extends KernelBackend {
118
120
}
119
121
120
122
private async getBufferData ( info : TensorInfo ) : Promise < ArrayBuffer > {
121
- const size =
122
- util . sizeFromShape ( info . shape ) * util . bytesPerElement ( info . dtype ) ;
123
- const staging = this . device . createBuffer ( {
124
- size,
125
- usage : GPUBufferUsage . TRANSFER_DST | GPUBufferUsage . MAP_READ ,
126
- } ) ;
123
+ const staging = this . createBuffer (
124
+ info . byteSize , GPUBufferUsage . TRANSFER_DST | GPUBufferUsage . MAP_READ ) ;
127
125
{
128
126
const encoder = this . device . createCommandEncoder ( { } ) ;
129
- encoder . copyBufferToBuffer ( info . buffer , 0 , staging , 0 , size ) ;
127
+ encoder . copyBufferToBuffer ( info . buffer , 0 , staging , 0 , info . byteSize ) ;
130
128
this . commandQueue . push ( encoder ) ;
131
129
this . submitQueue ( ) ;
132
130
}
@@ -158,36 +156,40 @@ export class WebGPUBackend extends KernelBackend {
158
156
return Tensor . make ( shape , { } , dtype , this ) as T ;
159
157
}
160
158
159
+ private tensorToBinding ( tensor ?: Tensor ) : webgpu_program . BindingInfo {
160
+ if ( ! tensor ) {
161
+ return null ;
162
+ }
163
+
164
+ const tensorData = this . tensorMap . get ( tensor . dataId ) ;
165
+
166
+ return {
167
+ resource : {
168
+ offset : 0 ,
169
+ size : tensor . size * util . bytesPerElement ( tensor . dtype ) ,
170
+ buffer : tensorData . buffer
171
+ }
172
+ } ;
173
+ }
174
+
161
175
private compileAndRun <
162
176
K extends { dtype : DataType , size : number , dataId : { } , shape : number [ ] } > (
163
- program : webgpu_program . WebGPUProgram , inputs : Tensor [ ] ,
164
- output ?: Tensor ) : K {
177
+ program : webgpu_program . WebGPUProgram , inputs : Tensor [ ] , output ?: Tensor ,
178
+ uniforms ?: webgpu_program . BindingInfo ) : K {
165
179
if ( output == null ) {
166
180
output = this . makeOutputArray ( program . outputShape , inputs [ 0 ] . dtype ) ;
167
181
}
168
182
const key = webgpu_program . makeShaderKey ( program ) ;
169
183
const { bindGroupLayout, pipeline} = this . getAndSavePipeline ( key , ( ) => {
170
184
return webgpu_program . compileProgram (
171
185
this . compiler , this . shaderc . shader_kind . compute , this . compileOpts ,
172
- this . device , program , inputs , output ) ;
186
+ this . device , program , inputs , output , uniforms ) ;
173
187
} ) ;
174
188
175
189
// Creating bind groups on the fly should never be a bottleneck.
176
- const bg = this . device . createBindGroup ( {
177
- layout : bindGroupLayout ,
178
- bindings : inputs . concat ( output ) . map ( ( tensor , i : number ) => {
179
- const tensorData = this . tensorMap . get ( tensor . dataId ) ;
180
-
181
- return {
182
- binding : i ,
183
- resource : {
184
- offset : 0 ,
185
- size : tensor . size * util . bytesPerElement ( tensor . dtype ) ,
186
- buffer : tensorData . buffer
187
- }
188
- } ;
189
- } )
190
- } ) ;
190
+ const bg = webgpu_program . makeBindGroup (
191
+ this . device , bindGroupLayout , inputs . map ( t => this . tensorToBinding ( t ) ) ,
192
+ this . tensorToBinding ( output ) , uniforms ) ;
191
193
192
194
const encoder = this . device . createCommandEncoder ( { } ) ;
193
195
const pass = encoder . beginComputePass ( ) ;
@@ -204,6 +206,17 @@ export class WebGPUBackend extends KernelBackend {
204
206
return output as { } as K ;
205
207
}
206
208
209
+ private makeUniforms ( data : Uint32Array ) : webgpu_program . BindingInfo {
210
+ const dimensionsBuffer = this . createBuffer (
211
+ data . byteLength ,
212
+ GPUBufferUsage . TRANSFER_DST | GPUBufferUsage . UNIFORM ) ;
213
+ dimensionsBuffer . setSubData ( 0 , data ) ;
214
+
215
+ return {
216
+ resource : { offset : 0 , size : data . byteLength , buffer : dimensionsBuffer }
217
+ } ;
218
+ }
219
+
207
220
pad < T extends Tensor > (
208
221
x : T , paddings : Array < [ number , number ] > , constantValue : number ) : T {
209
222
const program = new PadProgram ( x . shape , paddings , constantValue ) ;
@@ -244,12 +257,17 @@ export class WebGPUBackend extends KernelBackend {
244
257
const output =
245
258
Tensor . make ( [ batch , outerShapeA , outerShapeB ] , { } , a . dtype , this ) as
246
259
Tensor3D ;
247
-
248
260
const program = new MatMulProgram ( output . shape ) ;
249
- const dimensions =
250
- tensor1d ( [ outerShapeA , sharedDim , outerShapeB , batch ] , 'int32' ) ;
251
- // TODO: dispose mnkb
252
261
253
- return this . compileAndRun ( program , [ a , b , dimensions ] , output ) as Tensor3D ;
262
+ const dimensionsData =
263
+ new Uint32Array ( [ outerShapeA , sharedDim , outerShapeB , batch ] ) ;
264
+ const dimensions = this . makeUniforms ( dimensionsData ) ;
265
+
266
+ const result =
267
+ this . compileAndRun ( program , [ a , b ] , output , dimensions ) as Tensor3D ;
268
+
269
+ this . destroyBuffer ( dimensionsData . byteLength , dimensions . resource . buffer ) ;
270
+
271
+ return result ;
254
272
}
255
273
}
0 commit comments