Skip to content

Commit 388a7c4

Browse files
authored
Cache debugger patches to speed up x64 stackwalk epilogue/prologue scanning (#125459)
Second partial fix for #122459 Caches the list of debugger breakpoint patches in the DAC so that x64 stack unwinding doesn't re-scan the patch hash table on every frame. During mini dump collection, each stack frame triggers DacReplacePatchesInHostMemory to restore original opcodes before reading memory — even though there are typically zero active patches during a dump. The patch hash table has 1,000 fixed buckets, so each call walked all of them regardless. The cache is populated once on first access and invalidated only on Flush(). Measured minidump collection against the same repro app with 10,000 iterations across 10 threads. The baseline was 55s, this change alone brings it to ~7s
1 parent 9f85e5d commit 388a7c4

3 files changed

Lines changed: 95 additions & 23 deletions

File tree

src/coreclr/debug/daccess/daccess.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3286,6 +3286,9 @@ ClrDataAccess::Flush(void)
32863286
//
32873287
m_mdImports.Flush();
32883288

3289+
// Free cached patch entries for thread unwinding
3290+
m_patchCache.Flush();
3291+
32893292
// Free instance memory.
32903293
m_instances.Flush();
32913294

src/coreclr/debug/daccess/dacfn.cpp

Lines changed: 48 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -1352,43 +1352,68 @@ HRESULT DacReplacePatchesInHostMemory(MemoryRange range, PVOID pBuffer)
13521352
return S_OK;
13531353
}
13541354

1355+
if (g_dacImpl == nullptr)
1356+
{
1357+
return E_UNEXPECTED;
1358+
}
1359+
1360+
// Cache the patches as the target is not running during DAC operations, and hash
1361+
// table iteration is pretty slow.
1362+
const SArray<DacPatchCacheEntry>& entries = g_dacImpl->m_patchCache.GetEntries();
1363+
1364+
CORDB_ADDRESS address = (CORDB_ADDRESS)(dac_cast<TADDR>(range.StartAddress()));
1365+
SIZE_T cbSize = range.Size();
1366+
1367+
for (COUNT_T i = 0; i < entries.GetCount(); i++)
1368+
{
1369+
const DacPatchCacheEntry& entry = entries[i];
1370+
1371+
if (IsPatchInRequestedRange(address, cbSize, entry.address))
1372+
{
1373+
CORDbgSetInstructionEx(reinterpret_cast<PBYTE>(pBuffer), address, entry.address, entry.opcode, cbSize);
1374+
}
1375+
}
1376+
1377+
return S_OK;
1378+
}
1379+
1380+
const SArray<DacPatchCacheEntry>& DacPatchCache::GetEntries()
1381+
{
1382+
Populate();
1383+
return m_entries;
1384+
}
1385+
1386+
void DacPatchCache::Populate()
1387+
{
1388+
SUPPORTS_DAC;
1389+
1390+
if (m_isPopulated)
1391+
{
1392+
return;
1393+
}
1394+
1395+
// Clear any stale entries from previous failed population attempts
1396+
m_entries.Clear();
1397+
13551398
HASHFIND info;
13561399

13571400
DebuggerPatchTable * pTable = DebuggerController::GetPatchTable();
13581401
DebuggerControllerPatch * pPatch = pTable->GetFirstPatch(&info);
13591402

1360-
// <PERF>
1361-
// The unwinder needs to read the stack very often to restore pushed registers, retrieve the
1362-
// return addres, etc. However, stack addresses should never be patched.
1363-
// One way to optimize this code is to pass the stack base and the stack limit of the thread to this
1364-
// function and use those two values to filter out stack addresses.
1365-
//
1366-
// Another thing we can do is instead of enumerating the patches, we could enumerate the address.
1367-
// This is more efficient when we have a large number of patches and a small memory range. Perhaps
1368-
// we could do a hybrid approach, i.e. use the size of the range and the number of patches to dynamically
1369-
// determine which enumeration is more efficient.
1370-
// </PERF>
13711403
while (pPatch != NULL)
13721404
{
13731405
CORDB_ADDRESS patchAddress = (CORDB_ADDRESS)dac_cast<TADDR>(pPatch->address);
13741406

13751407
if (patchAddress != (CORDB_ADDRESS)NULL)
13761408
{
1377-
PRD_TYPE opcode = pPatch->opcode;
1378-
1379-
CORDB_ADDRESS address = (CORDB_ADDRESS)(dac_cast<TADDR>(range.StartAddress()));
1380-
SIZE_T cbSize = range.Size();
1381-
1382-
// Check if the address of the patch is in the specified memory range.
1383-
if (IsPatchInRequestedRange(address, cbSize, patchAddress))
1384-
{
1385-
// Replace the patch in the buffer with the original opcode.
1386-
CORDbgSetInstructionEx(reinterpret_cast<PBYTE>(pBuffer), address, patchAddress, opcode, cbSize);
1387-
}
1409+
DacPatchCacheEntry entry;
1410+
entry.address = patchAddress;
1411+
entry.opcode = pPatch->opcode;
1412+
m_entries.Append(entry);
13881413
}
13891414

13901415
pPatch = pTable->GetNextPatch(&info);
13911416
}
13921417

1393-
return S_OK;
1418+
m_isPopulated = true;
13941419
}

src/coreclr/debug/daccess/dacimpl.h

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -792,6 +792,48 @@ class DacStreamManager;
792792

793793
#include "cdac.h"
794794

795+
//----------------------------------------------------------------------------
796+
//
797+
// DacPatchCache - Caches debugger breakpoint patches from the target process.
798+
//
799+
// DacReplacePatchesInHostMemory needs to iterate the target's patch hash table
800+
// to replace breakpoint instructions with original opcodes. This iteration is
801+
// expensive because it walks all hash buckets and entries and it will hit either a
802+
// hash lookup in the instance cache or a cache miss + remote read. Since the target
803+
// is not running during DAC operations, the patches should not change while we are performing
804+
// memory enumeration, but if they do we'll miss them until the next time Flush() gets called.
805+
//
806+
//----------------------------------------------------------------------------
807+
808+
struct DacPatchCacheEntry
809+
{
810+
CORDB_ADDRESS address;
811+
PRD_TYPE opcode;
812+
};
813+
814+
class DacPatchCache
815+
{
816+
public:
817+
DacPatchCache()
818+
: m_isPopulated(false)
819+
{
820+
}
821+
822+
const SArray<DacPatchCacheEntry>& GetEntries();
823+
824+
void Flush()
825+
{
826+
m_entries.Clear();
827+
m_isPopulated = false;
828+
}
829+
830+
private:
831+
void Populate();
832+
833+
SArray<DacPatchCacheEntry> m_entries;
834+
bool m_isPopulated;
835+
};
836+
795837
//----------------------------------------------------------------------------
796838
//
797839
// ClrDataAccess.
@@ -1420,6 +1462,8 @@ class ClrDataAccess
14201462
ULONG32 m_instanceAge;
14211463
bool m_debugMode;
14221464

1465+
DacPatchCache m_patchCache;
1466+
14231467
// This currently exists on the DAC as a way of managing lifetime of loading/freeing the cdac
14241468
// TODO: [cdac] Remove when cDAC deploys with SOS - https://github.com/dotnet/runtime/issues/108720
14251469
CDAC m_cdac;

0 commit comments

Comments
 (0)