Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion source/common/stats/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -195,7 +195,10 @@ envoy_cc_library(
envoy_cc_library(
name = "symbol_table_lib",
srcs = ["symbol_table.cc"],
hdrs = ["symbol_table.h"],
hdrs = [
"symbol_table.h",
"well_known_tokens.h",
],
deps = [
":recent_lookups_lib",
"//source/common/common:assert_lib",
Expand Down
145 changes: 133 additions & 12 deletions source/common/stats/symbol_table.cc
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,17 @@
#include "source/common/common/assert.h"
#include "source/common/common/logger.h"
#include "source/common/common/utility.h"
#include "source/common/stats/well_known_tokens.h"

#include "absl/strings/str_cat.h"

namespace Envoy {
namespace Stats {
namespace {

// The well-known token table lives in well_known_tokens.h. WellKnownTokensNum is
// the number of reserved symbol slots (including the unused index-0 sentinel).
static constexpr Symbol WellKnownTokensNum = sizeof(kWellKnownTokens) / sizeof(kWellKnownTokens[0]);
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

there's an ARRAYSIZE macro of some sort.


// When storing Symbol arrays, we disallow Symbol 0, which is the only Symbol
// that will decode into uint8_t array starting (and ending) with {0}. Thus we
Expand All @@ -21,9 +27,25 @@ namespace Stats {
// used for dynamically discovered stat-name tokens where you don't want to take
// a symbol table lock, and would rather pay extra memory overhead to store the
// tokens as fully elaborated strings.
static constexpr Symbol FirstValidSymbol = 1;
static constexpr Symbol FirstValidSymbol = WellKnownTokensNum;
static constexpr uint8_t LiteralStringIndicator = 0;

// Lock-free encode-side lookup: token string -> symbol ID. Built once at first
// access; safe for concurrent readers afterward.
const absl::flat_hash_map<absl::string_view, Symbol>& wellKnownEncodeMap() {
static const auto* m = []() {
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think this pattern is needed; you can just construct this in the symbol table.

It is safe for concurrent readers but will require some sort of synchronization created by the compiler, so it's better just to make this part of the context, since we do have a context for this.

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sounds great.

auto* m = new absl::flat_hash_map<absl::string_view, Symbol>();
m->reserve(WellKnownTokensNum);
for (Symbol i = 1; i < WellKnownTokensNum; ++i) {
(*m)[kWellKnownTokens[i]] = i;
}
return m;
}();
return *m;
}

} // namespace

#ifndef ENVOY_CONFIG_COVERAGE
void StatName::debugPrint() {
// TODO(jmarantz): capture this functionality (always prints regardless of
Expand Down Expand Up @@ -166,6 +188,29 @@ class SymbolTable::Encoding::TokenIter {
return TokenType::Symbol;
}

// Quick scan of the remaining tokens to count how many there are without decoding them or
// output them.
size_t remainingTokens() const {
const uint8_t* array = array_;
size_t size = size_;

size_t count = 0;
for (size_t i = 0; i < size;) {
if (array[i] == LiteralStringIndicator) {
// Skip the literal string indicator.
++i;
std::pair<uint64_t, size_t> length_consumed = decodeNumber(array + i);
i += length_consumed.second; // Skip the varint-encoded length.
i += length_consumed.first; // Skip the literal string data.
} else {
std::pair<uint64_t, size_t> symbol_consumed = decodeNumber(array + i);
i += symbol_consumed.second; // Skip the varint-encoded symbol.
}
++count;
}
return count;
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this somewhat complicated function should be exposed, perhaps as a private static member funciton, so it can be unit-tested and fuzzed.

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I will give it a try

}

/** @return the current string_view -- only valid to call if next()==TokenType::StringView */
absl::string_view stringView() const {
#ifndef NDEBUG
Expand Down Expand Up @@ -236,12 +281,35 @@ bool StatName::startsWith(StatName prefix) const {

std::vector<absl::string_view> SymbolTable::decodeStrings(StatName stat_name) const {
std::vector<absl::string_view> strings;
Thread::LockGuard lock(lock_);
Encoding::decodeTokens(
stat_name,
[this, &strings](Symbol symbol)
ABSL_NO_THREAD_SAFETY_ANALYSIS { strings.push_back(fromSymbol(symbol)); },
[&strings](absl::string_view str) { strings.push_back(str); });
absl::InlinedVector<std::pair<Symbol, size_t>, 8> symbol_positions;
Encoding::TokenIter iter(stat_name);
strings.reserve(iter.remainingTokens());

for (Encoding::TokenIter::TokenType type = iter.next();
type != Encoding::TokenIter::TokenType::End; type = iter.next()) {
if (type == Encoding::TokenIter::TokenType::Symbol) {
const Symbol symbol = iter.symbol();
if (symbol < WellKnownTokensNum) {
strings.push_back(kWellKnownTokens[symbol]);
} else {
symbol_positions.emplace_back(symbol, strings.size());
strings.push_back(absl::string_view()); // placeholder to be filled in later.
}
} else {
ASSERT(type == Encoding::TokenIter::TokenType::StringView);
strings.push_back(iter.stringView());
}
}

if (!symbol_positions.empty()) {
// Only take the lock if we have dynamic symbols to decode. We have already decoded the
// well-known symbols and the literal string symbols.
Thread::LockGuard lock(lock_);
for (const auto& symbol_position : symbol_positions) {
strings[symbol_position.second] = fromSymbol(symbol_position.first);
}
}

return strings;
}

Expand Down Expand Up @@ -285,19 +353,34 @@ void SymbolTable::addTokensToEncoding(const absl::string_view name, Encoding& en
// string-splitting and prepare a temp vector of Symbol first.
const std::vector<absl::string_view> tokens = absl::StrSplit(name, '.');
std::vector<Symbol> symbols;
symbols.reserve(tokens.size());
symbols.resize(tokens.size());
absl::InlinedVector<std::pair<size_t, size_t>, 8> un_well_known_token_positions;
const auto& well_known_encode_map = wellKnownEncodeMap();

for (size_t i = 0; i < tokens.size(); ++i) {
const absl::string_view token = tokens[i];
const size_t hash = absl::Hash<std::string_view>{}(token);
auto well_known_search = well_known_encode_map.find(token, hash);
if (well_known_search != well_known_encode_map.end()) {
symbols[i] = well_known_search->second;
} else {
un_well_known_token_positions.emplace_back(i, hash);
}
}

// Now take the lock and populate the Symbol objects, which involves bumping
// ref-counts in this.
{
Thread::LockGuard lock(lock_);
recent_lookups_.lookup(name);
for (auto& token : tokens) {
for (const auto& token_position : un_well_known_token_positions) {
const size_t i = token_position.first;
const size_t hash = token_position.second;
// TODO(jmarantz): consider using StatNameDynamicStorage for tokens with
// length below some threshold, say 4 bytes. It might be preferable not to
// reserve Symbols for every 3 digit number found (for example) in ipv4
// addresses.
symbols.push_back(toSymbol(token));
symbols[i] = toUnWellKnownSymbol(tokens[i], hash);
}
}

Expand All @@ -319,8 +402,24 @@ void SymbolTable::incRefCount(const StatName& stat_name) {
// Before taking the lock, decode the array of symbols from the SymbolTable::Storage.
const SymbolVec symbols = Encoding::decodeSymbols(stat_name);

// Well-known symbols are statically allocated and have no ref count. If the
// StatName only references well-known symbols, skip the lock entirely.
bool any_dynamic = false;
for (Symbol s : symbols) {
if (s >= FirstValidSymbol) {
any_dynamic = true;
break;
}
}
if (!any_dynamic) {
return;
}

Thread::LockGuard lock(lock_);
for (Symbol symbol : symbols) {
if (symbol < FirstValidSymbol) {
continue;
}
auto decode_search = decode_map_.find(symbol);

ASSERT(decode_search != decode_map_.end(),
Expand All @@ -341,8 +440,24 @@ void SymbolTable::free(const StatName& stat_name) {
// Before taking the lock, decode the array of symbols from the SymbolTable::Storage.
const SymbolVec symbols = Encoding::decodeSymbols(stat_name);

// Well-known symbols are statically allocated and have no ref count. If the
// StatName only references well-known symbols, skip the lock entirely.
bool any_dynamic = false;
for (Symbol s : symbols) {
if (s >= FirstValidSymbol) {
any_dynamic = true;
break;
}
}
if (!any_dynamic) {
return;
}

Thread::LockGuard lock(lock_);
for (Symbol symbol : symbols) {
if (symbol < FirstValidSymbol) {
continue;
}
auto decode_search = decode_map_.find(symbol);
ASSERT(decode_search != decode_map_.end());

Expand Down Expand Up @@ -439,9 +554,9 @@ StatNameSetPtr SymbolTable::makeSet(absl::string_view name) {
return stat_name_set;
}

Symbol SymbolTable::toSymbol(absl::string_view sv) {
Symbol SymbolTable::toUnWellKnownSymbol(absl::string_view sv, size_t hash) {
Symbol result;
auto encode_find = encode_map_.find(sv);
auto encode_find = encode_map_.find(sv, hash);
// If the string segment doesn't already exist,
if (encode_find == encode_map_.end()) {
// We create the actual string, place it in the decode_map_, and then insert
Expand All @@ -467,6 +582,12 @@ Symbol SymbolTable::toSymbol(absl::string_view sv) {

absl::string_view SymbolTable::fromSymbol(const Symbol symbol) const
ABSL_EXCLUSIVE_LOCKS_REQUIRED(lock_) {
if (symbol < WellKnownTokensNum) {
// Well-known symbols are resolved without consulting decode_map_. The
// caller still holds the lock per the contract, but this skips the hash
// lookup entirely.
return kWellKnownTokens[symbol];
}
auto search = decode_map_.find(symbol);
RELEASE_ASSERT(search != decode_map_.end(), "no such symbol");
return search->second->toStringView();
Expand Down
7 changes: 5 additions & 2 deletions source/common/stats/symbol_table.h
Original file line number Diff line number Diff line change
Expand Up @@ -446,10 +446,13 @@ class SymbolTable final {
/**
* Convenience function for encode(), symbolizing one string segment at a time.
*
* @param sv the individual string to be encoded as a symbol.
* @param sv the individual string to be encoded as a symbol. This should not contains the
* well-known tokens.
* @param hash the precomputed hash of the string.
* @return Symbol the encoded string.
*/
Symbol toSymbol(absl::string_view sv) ABSL_EXCLUSIVE_LOCKS_REQUIRED(lock_);
Symbol toUnWellKnownSymbol(absl::string_view sv, size_t hash)
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: toNotWellKnownSymbol sounds a bit more natural to me, though I'm not a native speaker. Though, I would suggest to consider calling it toUnknownSymbol and drop the "well" part here all together.

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

agree: toUnknownSymbol would be more idiomatic.

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

SGTM.

ABSL_EXCLUSIVE_LOCKS_REQUIRED(lock_);

/**
* Convenience function for decode(), decoding one symbol at a time.
Expand Down
Loading
Loading