8000 [MPS] Chunk fillBuffer into 4Gb slices · pytorch/pytorch@65cb028 · GitHub
[go: up one dir, main page]

Skip to content

Commit 65cb028

Browse files
committed
[MPS] Chunk fillBuffer into 4Gb slices
To avoid regression on MacOS 26 Fixes #164093 [ghstack-poisoned]
1 parent 6ba83e0 commit 65cb028

File tree

1 file changed

+12
-1
lines changed

1 file changed

+12
-1
lines changed

aten/src/ATen/mps/MPSStream.mm

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -158,7 +158,18 @@ @interface MPSGraphExecutionDescriptor ()
158158
endKernelCoalescing();
159159
id<MTLBlitCommandEncoder> blitEncoder = [commandBuffer() blitCommandEncoder];
160160

161-
[blitEncoder fillBuffer:buffer range:NSMakeRange(offset, length) value:value];
161+
// For some reason fillBufferfor stopped working for lengh > 4Gb on MacOS 26
162+
// See https://github.com/pytorch/pytorch/issues/163962
163+
// Workaround by batching copy commands into 4Gb chunks
164+
constexpr size_t max_copy_size = 0x100000000; // 4GB
165+
size_t bytes_filled = 0;
166+
size_t bytes_remains = length;
167+
while (bytes_remains > 0) {
168+
NSUInteger bytes_to_copy = std::min(max_copy_size, bytes_remains);
169+
[blitEncoder fillBuffer:buffer range:NSMakeRange(offset + bytes_filled, bytes_to_copy) value:value];
170+
bytes_filled += bytes_to_copy;
171+
bytes_remains -= bytes_to_copy;
172+
}
162173
[blitEncoder endEncoding];
163174
synchronize(syncType);
164175
}

0 commit comments

Comments
 (0)
0