Skip to content

Commit 4e32f89

Browse files
[LLDB] Fix potential data race in Symtab initialization (#192753)
Claude pointed out to me that Symtab::FindFunctionSymbols doesn't lock the mutex before checking m_name_indexes_computed and recomputing it. On top of that all the initialization flags are bitfields, which makes any unguarded concurrent accesses UB. Changing them to bools should no longer be necessary after introducing a lock, but several of the public methods trust that their caller holds the lock so I'm opting to remove this footgun just in case. rdar://174988238
1 parent b332cb0 commit 4e32f89

2 files changed

Lines changed: 114 additions & 117 deletions

File tree

lldb/include/lldb/Symbol/Symtab.h

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -270,17 +270,20 @@ class Symtab {
270270
void InitNameIndexes();
271271
void InitAddressIndexes();
272272

273+
/// Provide thread safety for this symbol table.
274+
mutable std::recursive_mutex m_mutex;
275+
273276
ObjectFile *m_objfile;
274277
collection m_symbols;
275278
FileRangeToIndexMap m_file_addr_to_index;
276279

277280
/// Maps function names to symbol indices (grouped by FunctionNameTypes)
278281
std::map<lldb::FunctionNameType, UniqueCStringMap<uint32_t>>
279282
m_name_to_symbol_indices;
280-
mutable std::recursive_mutex
281-
m_mutex; // Provide thread safety for this symbol table
282-
bool m_file_addr_to_index_computed : 1, m_name_indexes_computed : 1,
283-
m_loaded_from_cache : 1, m_saved_to_cache : 1;
283+
bool m_file_addr_to_index_computed = false;
284+
bool m_name_indexes_computed = false;
285+
bool m_loaded_from_cache = false;
286+
bool m_saved_to_cache = false;
284287

285288
private:
286289
UniqueCStringMap<uint32_t> &

lldb/source/Symbol/Symtab.cpp

Lines changed: 107 additions & 113 deletions
Original file line numberDiff line numberDiff line change
@@ -32,10 +32,7 @@ using namespace lldb;
3232
using namespace lldb_private;
3333

3434
Symtab::Symtab(ObjectFile *objfile)
35-
: m_objfile(objfile), m_symbols(), m_file_addr_to_index(*this),
36-
m_name_to_symbol_indices(), m_mutex(),
37-
m_file_addr_to_index_computed(false), m_name_indexes_computed(false),
38-
m_loaded_from_cache(false), m_saved_to_cache(false) {
35+
: m_objfile(objfile), m_file_addr_to_index(*this) {
3936
m_name_to_symbol_indices.emplace(std::make_pair(
4037
lldb::eFunctionNameTypeNone, UniqueCStringMap<uint32_t>()));
4138
m_name_to_symbol_indices.emplace(std::make_pair(
@@ -280,116 +277,116 @@ static bool lldb_skip_name(llvm::StringRef mangled,
280277

281278
void Symtab::InitNameIndexes() {
282279
// Protected function, no need to lock mutex...
283-
if (!m_name_indexes_computed) {
284-
m_name_indexes_computed = true;
285-
ElapsedTime elapsed(m_objfile->GetModule()->GetSymtabIndexTime());
286-
LLDB_SCOPED_TIMER();
280+
if (m_name_indexes_computed)
281+
return;
287282

288-
// Collect all loaded language plugins.
289-
std::vector<Language *> languages;
290-
Language::ForEach([&languages](Language *l) {
291-
languages.push_back(l);
292-
return IterationAction::Continue;
293-
});
283+
m_name_indexes_computed = true;
284+
ElapsedTime elapsed(m_objfile->GetModule()->GetSymtabIndexTime());
285+
LLDB_SCOPED_TIMER();
294286

295-
auto &name_to_index = GetNameToSymbolIndexMap(lldb::eFunctionNameTypeNone);
296-
auto &basename_to_index =
297-
GetNameToSymbolIndexMap(lldb::eFunctionNameTypeBase);
298-
auto &method_to_index =
299-
GetNameToSymbolIndexMap(lldb::eFunctionNameTypeMethod);
300-
auto &selector_to_index =
301-
GetNameToSymbolIndexMap(lldb::eFunctionNameTypeSelector);
302-
// Create the name index vector to be able to quickly search by name
303-
const size_t num_symbols = m_symbols.size();
304-
name_to_index.Reserve(num_symbols);
305-
306-
// The "const char *" in "class_contexts" and backlog::value_type::second
307-
// must come from a ConstString::GetCString()
308-
std::set<const char *> class_contexts;
309-
std::vector<std::pair<NameToIndexMap::Entry, const char *>> backlog;
310-
backlog.reserve(num_symbols / 2);
311-
312-
// Instantiation of the demangler is expensive, so better use a single one
313-
// for all entries during batch processing.
314-
RichManglingContext rmc;
315-
for (uint32_t value = 0; value < num_symbols; ++value) {
316-
Symbol *symbol = &m_symbols[value];
317-
318-
// Don't let trampolines get into the lookup by name map If we ever need
319-
// the trampoline symbols to be searchable by name we can remove this and
320-
// then possibly add a new bool to any of the Symtab functions that
321-
// lookup symbols by name to indicate if they want trampolines. We also
322-
// don't want any synthetic symbols with auto generated names in the
323-
// name lookups.
324-
if (symbol->IsTrampoline() || symbol->IsSyntheticWithAutoGeneratedName())
325-
continue;
287+
// Collect all loaded language plugins.
288+
std::vector<Language *> languages;
289+
Language::ForEach([&languages](Language *l) {
290+
languages.push_back(l);
291+
return IterationAction::Continue;
292+
});
326293

327-
// If the symbol's name string matched a Mangled::ManglingScheme, it is
328-
// stored in the mangled field.
329-
Mangled &mangled = symbol->GetMangled();
330-
if (ConstString name = mangled.GetMangledName()) {
331-
name_to_index.Append(name, value);
294+
auto &name_to_index = GetNameToSymbolIndexMap(lldb::eFunctionNameTypeNone);
295+
auto &basename_to_index =
296+
GetNameToSymbolIndexMap(lldb::eFunctionNameTypeBase);
297+
auto &method_to_index =
298+
GetNameToSymbolIndexMap(lldb::eFunctionNameTypeMethod);
299+
auto &selector_to_index =
300+
GetNameToSymbolIndexMap(lldb::eFunctionNameTypeSelector);
301+
// Create the name index vector to be able to quickly search by name
302+
const size_t num_symbols = m_symbols.size();
303+
name_to_index.Reserve(num_symbols);
304+
305+
// The "const char *" in "class_contexts" and backlog::value_type::second
306+
// must come from a ConstString::GetCString()
307+
std::set<const char *> class_contexts;
308+
std::vector<std::pair<NameToIndexMap::Entry, const char *>> backlog;
309+
backlog.reserve(num_symbols / 2);
310+
311+
// Instantiation of the demangler is expensive, so better use a single one
312+
// for all entries during batch processing.
313+
RichManglingContext rmc;
314+
for (size_t value = 0; value < num_symbols; ++value) {
315+
Symbol *symbol = &m_symbols[value];
316+
317+
// Don't let trampolines get into the lookup by name map If we ever need
318+
// the trampoline symbols to be searchable by name we can remove this and
319+
// then possibly add a new bool to any of the Symtab functions that
320+
// lookup symbols by name to indicate if they want trampolines. We also
321+
// don't want any synthetic symbols with auto generated names in the
322+
// name lookups.
323+
if (symbol->IsTrampoline() || symbol->IsSyntheticWithAutoGeneratedName())
324+
continue;
332325

333-
if (symbol->ContainsLinkerAnnotations()) {
334-
// If the symbol has linker annotations, also add the version without
335-
// the annotations.
336-
ConstString stripped = ConstString(
337-
m_objfile->StripLinkerSymbolAnnotations(name.GetStringRef()));
338-
name_to_index.Append(stripped, value);
339-
}
326+
// If the symbol's name string matched a Mangled::ManglingScheme, it is
327+
// stored in the mangled field.
328+
Mangled &mangled = symbol->GetMangled();
329+
if (ConstString name = mangled.GetMangledName()) {
330+
name_to_index.Append(name, value);
331+
332+
if (symbol->ContainsLinkerAnnotations()) {
333+
// If the symbol has linker annotations, also add the version without
334+
// the annotations.
335+
ConstString stripped = ConstString(
336+
m_objfile->StripLinkerSymbolAnnotations(name.GetStringRef()));
337+
name_to_index.Append(stripped, value);
338+
}
340339

341-
const SymbolType type = symbol->GetType();
342-
if (type == eSymbolTypeCode || type == eSymbolTypeResolver) {
343-
if (mangled.GetRichManglingInfo(rmc, lldb_skip_name)) {
344-
RegisterMangledNameEntry(value, class_contexts, backlog, rmc);
345-
continue;
346-
}
340+
const SymbolType type = symbol->GetType();
341+
if (type == eSymbolTypeCode || type == eSymbolTypeResolver) {
342+
if (mangled.GetRichManglingInfo(rmc, lldb_skip_name)) {
343+
RegisterMangledNameEntry(value, class_contexts, backlog, rmc);
344+
continue;
347345
}
348346
}
347+
}
349348

350-
// Symbol name strings that didn't match a Mangled::ManglingScheme, are
351-
// stored in the demangled field.
352-
if (ConstString name = mangled.GetDemangledName()) {
353-
name_to_index.Append(name, value);
349+
// Symbol name strings that didn't match a Mangled::ManglingScheme, are
350+
// stored in the demangled field.
351+
if (ConstString name = mangled.GetDemangledName()) {
352+
name_to_index.Append(name, value);
354353

355-
if (symbol->ContainsLinkerAnnotations()) {
356-
// If the symbol has linker annotations, also add the version without
357-
// the annotations.
358-
name = ConstString(
359-
m_objfile->StripLinkerSymbolAnnotations(name.GetStringRef()));
360-
name_to_index.Append(name, value);
361-
}
354+
if (symbol->ContainsLinkerAnnotations()) {
355+
// If the symbol has linker annotations, also add the version without
356+
// the annotations.
357+
name = ConstString(
358+
m_objfile->StripLinkerSymbolAnnotations(name.GetStringRef()));
359+
name_to_index.Append(name, value);
360+
}
362361

363-
// If the demangled name turns out to be an ObjC name, and is a category
364-
// name, add the version without categories to the index too.
365-
for (Language *lang : languages) {
366-
for (auto variant : lang->GetMethodNameVariants(name)) {
367-
if (variant.GetType() & lldb::eFunctionNameTypeSelector)
368-
selector_to_index.Append(variant.GetName(), value);
369-
else if (variant.GetType() & lldb::eFunctionNameTypeFull)
370-
name_to_index.Append(variant.GetName(), value);
371-
else if (variant.GetType() & lldb::eFunctionNameTypeMethod)
372-
method_to_index.Append(variant.GetName(), value);
373-
else if (variant.GetType() & lldb::eFunctionNameTypeBase)
374-
basename_to_index.Append(variant.GetName(), value);
375-
}
362+
// If the demangled name turns out to be an ObjC name, and is a category
363+
// name, add the version without categories to the index too.
364+
for (Language *lang : languages) {
365+
for (auto variant : lang->GetMethodNameVariants(name)) {
366+
if (variant.GetType() & lldb::eFunctionNameTypeSelector)
367+
selector_to_index.Append(variant.GetName(), value);
368+
else if (variant.GetType() & lldb::eFunctionNameTypeFull)
369+
name_to_index.Append(variant.GetName(), value);
370+
else if (variant.GetType() & lldb::eFunctionNameTypeMethod)
371+
method_to_index.Append(variant.GetName(), value);
372+
else if (variant.GetType() & lldb::eFunctionNameTypeBase)
373+
basename_to_index.Append(variant.GetName(), value);
376374
}
377375
}
378376
}
379-
380-
for (const auto &record : backlog) {
381-
RegisterBacklogEntry(record.first, record.second, class_contexts);
382-
}
383-
384-
name_to_index.Sort();
385-
name_to_index.SizeToFit();
386-
selector_to_index.Sort();
387-
selector_to_index.SizeToFit();
388-
basename_to_index.Sort();
389-
basename_to_index.SizeToFit();
390-
method_to_index.Sort();
391-
method_to_index.SizeToFit();
392377
}
378+
379+
for (const auto &record : backlog)
380+
RegisterBacklogEntry(record.first, record.second, class_contexts);
381+
382+
name_to_index.Sort();
383+
name_to_index.SizeToFit();
384+
selector_to_index.Sort();
385+
selector_to_index.SizeToFit();
386+
basename_to_index.Sort();
387+
basename_to_index.SizeToFit();
388+
method_to_index.Sort();
389+
method_to_index.SizeToFit();
393390
}
394391

395392
void Symtab::RegisterMangledNameEntry(
@@ -684,8 +681,7 @@ uint32_t Symtab::AppendSymbolIndexesWithName(ConstString symbol_name,
684681
std::lock_guard<std::recursive_mutex> guard(m_mutex);
685682

686683
if (symbol_name) {
687-
if (!m_name_indexes_computed)
688-
InitNameIndexes();
684+
InitNameIndexes();
689685

690686
return GetNameIndexes(symbol_name, indexes);
691687
}
@@ -701,8 +697,7 @@ uint32_t Symtab::AppendSymbolIndexesWithName(ConstString symbol_name,
701697
LLDB_SCOPED_TIMER();
702698
if (symbol_name) {
703699
const size_t old_size = indexes.size();
704-
if (!m_name_indexes_computed)
705-
InitNameIndexes();
700+
InitNameIndexes();
706701

707702
std::vector<uint32_t> all_name_indexes;
708703
const size_t name_match_count =
@@ -823,8 +818,7 @@ Symtab::FindAllSymbolsWithNameAndType(ConstString name,
823818

824819
// Initialize all of the lookup by name indexes before converting NAME to a
825820
// uniqued string NAME_STR below.
826-
if (!m_name_indexes_computed)
827-
InitNameIndexes();
821+
InitNameIndexes();
828822

829823
if (name) {
830824
// The string table did have a string that matched, but we need to check
@@ -841,8 +835,7 @@ void Symtab::FindAllSymbolsWithNameAndType(
841835
LLDB_SCOPED_TIMER();
842836
// Initialize all of the lookup by name indexes before converting NAME to a
843837
// uniqued string NAME_STR below.
844-
if (!m_name_indexes_computed)
845-
InitNameIndexes();
838+
InitNameIndexes();
846839

847840
if (name) {
848841
// The string table did have a string that matched, but we need to check
@@ -870,8 +863,7 @@ Symbol *Symtab::FindFirstSymbolWithNameAndType(ConstString name,
870863
Visibility symbol_visibility) {
871864
std::lock_guard<std::recursive_mutex> guard(m_mutex);
872865
LLDB_SCOPED_TIMER();
873-
if (!m_name_indexes_computed)
874-
InitNameIndexes();
866+
InitNameIndexes();
875867

876868
if (name) {
877869
std::vector<uint32_t> matching_indexes;
@@ -1126,8 +1118,8 @@ void Symtab::FindFunctionSymbols(ConstString name, uint32_t name_type_mask,
11261118
}
11271119
}
11281120

1129-
if (!m_name_indexes_computed)
1130-
InitNameIndexes();
1121+
std::lock_guard<std::recursive_mutex> guard(m_mutex);
1122+
InitNameIndexes();
11311123

11321124
for (lldb::FunctionNameType type :
11331125
{lldb::eFunctionNameTypeBase, lldb::eFunctionNameTypeMethod,
@@ -1178,7 +1170,9 @@ void Symtab::SaveToCache() {
11781170
DataFileCache *cache = Module::GetIndexCache();
11791171
if (!cache)
11801172
return; // Caching is not enabled.
1181-
InitNameIndexes(); // Init the name indexes so we can cache them as well.
1173+
1174+
// Init the name indexes so we can cache them as well.
1175+
InitNameIndexes();
11821176
const auto byte_order = endian::InlHostByteOrder();
11831177
DataEncoder file(byte_order, /*addr_size=*/8);
11841178
// Encode will return false if the symbol table's object file doesn't have

0 commit comments

Comments
 (0)