@@ -240,15 +240,8 @@ Error replayKernel() {
240240 return createErr (" failed to read the globals file" );
241241 auto GlobalsBuffer = std::move (GlobalsBufferOrErr.get ());
242242
243- // On AMD for currently unknown reasons we cannot copy memory mapped data to
244- // device. This is a work-around.
245- uint8_t *RecordedGlobals = new uint8_t [GlobalsBuffer->getBufferSize ()];
246- std::memcpy (RecordedGlobals,
247- const_cast <char *>(GlobalsBuffer->getBuffer ().data ()),
248- GlobalsBuffer->getBufferSize ());
249-
250- void *BufferPtr = (void *)RecordedGlobals;
251- uint32_t NumGlobals = *((uint32_t *)(BufferPtr));
243+ const void *BufferPtr = const_cast <char *>(GlobalsBuffer->getBufferStart ());
244+ uint32_t NumGlobals = *((const uint32_t *)(BufferPtr));
252245 BufferPtr = utils::advancePtr (BufferPtr, sizeof (uint32_t ));
253246
254247 SmallVector<llvm::offloading::EntryTy> OffloadEntries (
@@ -268,14 +261,15 @@ Error replayKernel() {
268261 Global.Address = static_cast <char *>(OffloadEntries[0 ].Address ) + I + 1 ;
269262
270263 // Setup the offload entry using the information from the file.
271- uint32_t NameSize = *((uint32_t *)(BufferPtr));
264+ uint32_t NameSize = *((const uint32_t *)(BufferPtr));
272265 BufferPtr = utils::advancePtr (BufferPtr, sizeof (uint32_t ));
273- uint64_t Size = *((uint64_t *)(BufferPtr));
266+ uint64_t Size = *((const uint64_t *)(BufferPtr));
274267 BufferPtr = utils::advancePtr (BufferPtr, sizeof (uint64_t ));
275268 Global.Size = Size;
276- Global.SymbolName = (char *)BufferPtr;
269+ Global.SymbolName =
270+ const_cast <char *>(static_cast <const char *>(BufferPtr));
277271 BufferPtr = utils::advancePtr (BufferPtr, NameSize);
278- Global.AuxAddr = BufferPtr;
272+ Global.AuxAddr = const_cast < void *>( BufferPtr) ;
279273 BufferPtr = utils::advancePtr (BufferPtr, Size);
280274 }
281275
@@ -320,25 +314,17 @@ Error replayKernel() {
320314 return createErr (" failed to read the kernel record input file" );
321315 auto RecordInputBuffer = std::move (RecordInputBufferOrErr.get ());
322316
323- // On AMD for currently unknown reasons we cannot copy memory mapped data to
324- // device. This is a work-around.
325- uint8_t *RecordedData = new uint8_t [RecordInputBuffer->getBufferSize ()];
326- std::memcpy (RecordedData,
327- const_cast <char *>(RecordInputBuffer->getBuffer ().data ()),
328- RecordInputBuffer->getBufferSize ());
329-
330317 KernelReplayOutcomeTy Outcome;
331318 Rc = __tgt_target_kernel_replay (
332319 /* Loc=*/ nullptr , DeviceId, OffloadEntries[0 ].Address ,
333- (char *)RecordedData, RecordInputBuffer->getBufferSize (),
320+ const_cast <char *>(RecordInputBuffer->getBufferStart ()),
321+ RecordInputBuffer->getBufferSize (),
334322 NumGlobals ? &OffloadEntries[1 ] : nullptr , NumGlobals, TgtArgs.data (),
335323 TgtArgOffsets.data (), NumArgs, NumTeams, NumThreads, SharedMemorySize,
336324 LoopTripCount, &Outcome);
337325 if (Rc != OMP_TGT_SUCCESS)
338326 return createErr (" failed to replay kernel" );
339327
340- delete[] RecordedData;
341-
342328 // Verify the replay output if requested.
343329 if (VerifyOpt) {
344330 if (Outcome.OutputFilepath .empty ())
0 commit comments