// the host has a (big) malloced hostArray, on which OpenCl should work.
- (void)makeBufferOfSize: (size_t)arraySize from: (void *)hostArray { _clArray = clCreateBuffer( context, CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR, arraySize, hostArray, NULL ); _hostArray = hostArray; } for(;;) { // the host tells OpenCL to run a kernel which modifies _clArray // then the host needs some of the data which OpenCL has modified: - (void)theHostWantsDataFrom: (NSUInteger)sta to: (NSUInteger)end { clEnqueueReadBuffer( commands, _clArray, CL_TRUE, sta, end - sta, _hostArray + sta, 0, NULL, NULL ); } } This works fine. clEnqueueReadBuffer is a no-op for DEVICE_HOST_UNIFIED_MEMORY and is reasonably fast else (i.e. for an external GPU). But now for an exercise in gcl: - (void)makeBufferOfSize: (size_t)arraySize from: (void *)hostArray { _clArray = gcl_malloc( arraySize, hostArray, CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR ); _hostArray = hostArray; } - (void)theHostWantsDataFrom: (NSUInteger)sta to: (NSUInteger)end { if ( DEVICE_HOST_UNIFIED_MEMORY ) { // nothing needs to be done here; just wait till all work has been done dispatch_group_wait( group, DISPATCH_TIME_FOREVER ); } else // very bad code here { NSUInteger bytesToCopy = end - sta; uint8 *tempBuff = malloc( bytesToCopy ); dispatch_sync( queue, ^void{ gcl_memcpy( tempBuff, _clArray + sta, bytesToCopy ); } ); memcpy( _hostArray + sta, tempBuff, bytesToCopy ); // (BAD) free( tempBuff ); }; } This works, but the double copy at (BAD) does not look very efficient. There clearly must be a better way. But how? Gerriet. _______________________________________________ Cocoa-dev mailing list (Cocoa-dev@lists.apple.com) Please do not post admin requests or moderator comments to the list. Contact the moderators at cocoa-dev-admins(at)lists.apple.com Help/Unsubscribe/Update your Subscription: https://lists.apple.com/mailman/options/cocoa-dev/archive%40mail-archive.com This email sent to arch...@mail-archive.com