Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 29 additions & 3 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -42,20 +42,46 @@ jobs:
build-essential \
automake \
autoconf \
cmake \
libc6-dev \
gcc-multilib \
libc6-dev-i386 \
gdb \
docbook \
docbook-xsl \
docbook-xml \
xsltproc

# Callgrind cycle estimation links a static Capstone. It must be built
# x86+arm64 only (other printers reference libc symbols the -nodefaultlibs
# tool does not shim) and without stack-protector/fortify (the tool runs
# without glibc's %fs TLS). configure picks it up via $CAPSTONE_DIR.
- name: Build Capstone (cycle-estimation decoder)
run: |
CS_VERSION=5.0.9
CS_PREFIX="$GITHUB_WORKSPACE/.capstone"
git clone --depth 1 --branch "$CS_VERSION" \
https://github.com/capstone-engine/capstone.git /tmp/capstone-src
cmake -S /tmp/capstone-src -B /tmp/capstone-build \
-DCMAKE_BUILD_TYPE=Release \
-DCAPSTONE_ARCHITECTURE_DEFAULT=OFF \
-DCAPSTONE_X86_SUPPORT=ON \
-DCAPSTONE_ARM64_SUPPORT=ON \
-DCAPSTONE_BUILD_SHARED_LIBS=OFF \
-DCAPSTONE_BUILD_CSTOOL=OFF \
-DCMAKE_INSTALL_LIBDIR=lib \
-DCMAKE_INSTALL_PREFIX="$CS_PREFIX" \
-DCMAKE_C_FLAGS="-fno-stack-protector -U_FORTIFY_SOURCE -D_FORTIFY_SOURCE=0 -fPIC"
cmake --build /tmp/capstone-build -j"$(nproc)"
cmake --install /tmp/capstone-build
echo "CAPSTONE_DIR=$CS_PREFIX" >> "$GITHUB_ENV"

- name: Run autogen
run: ./autogen.sh

# CodSpeed only runs the 64-bit tool, and cycle estimation needs a
# 64-bit Capstone, so skip the 32-bit secondary build entirely. This
# halves build time and avoids compiling cycledecode.c without Capstone.
- name: Configure
run: ./configure
run: ./configure --enable-only64bit

- name: Build Valgrind
run: make -j$(nproc)
Expand Down
26 changes: 26 additions & 0 deletions .github/workflows/codspeed.yml
Original file line number Diff line number Diff line change
Expand Up @@ -58,12 +58,38 @@ jobs:
build-essential \
automake \
autoconf \
cmake \
gdb \
docbook \
docbook-xsl \
docbook-xml \
xsltproc

# The local tree requires Capstone for cycle estimation. Build it x86+arm64
# only and without stack-protector/fortify (the -nodefaultlibs tool runs
# without glibc's %fs TLS); `just build local` -> configure reads
# $CAPSTONE_DIR. Upstream release builds ignore it.
- name: Build Capstone (cycle-estimation decoder)
if: steps.valgrind-cache.outputs.cache-hit != 'true' && matrix.valgrind == 'local'
run: |
CS_VERSION=5.0.9
CS_PREFIX="$GITHUB_WORKSPACE/.capstone"
git clone --depth 1 --branch "$CS_VERSION" \
https://github.com/capstone-engine/capstone.git /tmp/capstone-src
cmake -S /tmp/capstone-src -B /tmp/capstone-build \
-DCMAKE_BUILD_TYPE=Release \
-DCAPSTONE_ARCHITECTURE_DEFAULT=OFF \
-DCAPSTONE_X86_SUPPORT=ON \
-DCAPSTONE_ARM64_SUPPORT=ON \
-DCAPSTONE_BUILD_SHARED_LIBS=OFF \
-DCAPSTONE_BUILD_CSTOOL=OFF \
-DCMAKE_INSTALL_LIBDIR=lib \
-DCMAKE_INSTALL_PREFIX="$CS_PREFIX" \
-DCMAKE_C_FLAGS="-fno-stack-protector -U_FORTIFY_SOURCE -D_FORTIFY_SOURCE=0 -fPIC"
cmake --build /tmp/capstone-build -j"$(nproc)"
cmake --install /tmp/capstone-build
echo "CAPSTONE_DIR=$CS_PREFIX" >> "$GITHUB_ENV"

- name: Build Valgrind (${{ matrix.valgrind }})
if: steps.valgrind-cache.outputs.cache-hit != 'true'
run: just build ${{ matrix.valgrind }}
Expand Down
32 changes: 28 additions & 4 deletions .github/workflows/release.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,11 +16,9 @@ jobs:
- architecture: amd64
platform: ubuntu-22.04
ubuntu-version: 22.04
additional-deps: gcc-multilib libc6-dev-i386
- architecture: amd64
platform: ubuntu-24.04
ubuntu-version: 24.04
additional-deps: gcc-multilib libc6-dev-i386
- architecture: arm64
platform: codspeedhq-arm64-ubuntu-22.04
ubuntu-version: 22.04
Expand All @@ -36,15 +34,41 @@ jobs:
- name: Install packaging deps
run: sudo apt-get install -y build-essential devscripts debhelper dh-make
- name: Install build deps
run: sudo apt-get install -y debhelper-compat gdb mpi-default-dev pkgconf docbook docbook-xsl docbook-xml xsltproc ${{ matrix.runner.additional-deps }}
run: sudo apt-get install -y debhelper-compat gdb mpi-default-dev pkgconf cmake docbook docbook-xsl docbook-xml xsltproc
- name: Configure GPG Key
run: echo -n "$GPG_SIGNING_KEY" | base64 --decode | gpg --import
env:
GPG_SIGNING_KEY: ${{ secrets.GPG_SIGNING_KEY }}

# Callgrind cycle estimation links a static Capstone. It must be built
# x86+arm64 only (other printers reference libc symbols the -nodefaultlibs
# tool does not shim) and without stack-protector/fortify (the tool runs
# without glibc's %fs TLS). debian/rules forwards $CAPSTONE_DIR to
# configure via --with-capstone.
- name: Build Capstone (cycle-estimation decoder)
run: |
CS_VERSION=5.0.9
CS_PREFIX="$GITHUB_WORKSPACE/.capstone"
git clone --depth 1 --branch "$CS_VERSION" \
https://github.com/capstone-engine/capstone.git /tmp/capstone-src
cmake -S /tmp/capstone-src -B /tmp/capstone-build \
-DCMAKE_BUILD_TYPE=Release \
-DCAPSTONE_ARCHITECTURE_DEFAULT=OFF \
-DCAPSTONE_X86_SUPPORT=ON \
-DCAPSTONE_ARM64_SUPPORT=ON \
-DCAPSTONE_BUILD_SHARED_LIBS=OFF \
-DCAPSTONE_BUILD_CSTOOL=OFF \
-DCMAKE_INSTALL_LIBDIR=lib \
-DCMAKE_INSTALL_PREFIX="$CS_PREFIX" \
-DCMAKE_C_FLAGS="-fno-stack-protector -U_FORTIFY_SOURCE -D_FORTIFY_SOURCE=0 -fPIC"
cmake --build /tmp/capstone-build -j"$(nproc)"
cmake --install /tmp/capstone-build
echo "CAPSTONE_DIR=$CS_PREFIX" >> "$GITHUB_ENV"

- name: Build the deb package
id: build_deb
run: |
debuild --no-tgz-check -nc
debuild -e CAPSTONE_DIR --no-tgz-check -nc
echo "asset-path=$(find .. -name 'valgrind_*.deb')" >> "$GITHUB_OUTPUT"
env:
DEBEMAIL: ${{ vars.MAINTAINER_EMAIL }}
Expand Down
56 changes: 56 additions & 0 deletions CLAUDE.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
# CLAUDE.md

This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository.

## What this is

CodSpeed's fork of the Valgrind source tree (`github.com/CodSpeedHQ/valgrind-codspeed`, upstream `sourceware.org/git/valgrind.git`). All CodSpeed work is in **Callgrind** (`callgrind/`). CodSpeed-specific changes are tracked in `CODSPEED-CHANGELOG.md`; READMEs/NEWS/AUTHORS are untouched upstream content.

The headline modification: CodSpeed runs benchmarks with `--instr-atstart=no` and fires `CALLGRIND_START_INSTRUMENTATION` several frames deep (inside libpython, or behind a V8/JIT trampoline). Callgrind's shadow stack starts at 0 while real frames exist, so returns underflow and produce a "phantom root" holding ~all inclusive cost. The fix (`CLG_(reconstruct_call_stack_from_native)` in `callgrind/callstack.c`) seeds the shadow stack from the native stack at the OFF→ON transition. Related: `--obj-skip=<object>` CLI option and `CALLGRIND_ADD_OBJ_SKIP` client request to exclude whole shared libraries. See `.agents/docs/` for the detailed correctness analysis.

## Build

Autotools, but `configure` is checked in — **`./autogen.sh` is only needed if you modify `configure.ac`**. Build order matters: VEX → coregrind → callgrind.

To develop and use `vg-in-place` (runs the uninstalled binary from the repo root), build **in the repo**. Callgrind cycle estimation requires Capstone, so build from inside `nix develop` (which sets `CAPSTONE_DIR`, picked up by `configure`); otherwise pass `--with-capstone=PATH` to a static, x86+arm64-only, hardening-free Capstone:

```bash
./configure --enable-only64bit # needs CAPSTONE_DIR (nix develop) or --with-capstone=PATH; only64bit skips the no-Capstone 32-bit secondary build
make include/vgversion.h
make -j$(nproc) -C VEX
make -j$(nproc) -C coregrind
make -j$(nproc) -C callgrind
```

`just build local` is a separate flow that copies the tree to `/tmp/valgrind-build/valgrind-local` and builds there (used to test a clean build/install); it does **not** make `./vg-in-place` at the repo root usable.

Run uninstalled: `./vg-in-place --tool=callgrind [options] <prog>`

`valgrind` on PATH is symlinked to this repo's `vg-in-place` (`~/.local/bin/valgrind`), so an in-repo build is picked up automatically by `codspeed run` and friends — **no `sudo make install` needed**.

## Tests

Callgrind regression tests live in `callgrind/tests/` (`.vgtest` definitions, `.stderr.exp`/`.stdout.exp`/`.post.exp` expected outputs). Harness is the Perl script `tests/vg_regtest`.

```bash
tests/vg_regtest callgrind # whole callgrind suite
tests/vg_regtest callgrind/tests/fib # a single test (no extension)
```

Tests with a `uv`-based workload (e.g. `fib.vgtest`) have a `prereq` guard and silently skip if `uv` is not on PATH. Always use `uv` for Python.

## Conventions

- **Branches:** `cod-<issue-number>-<kebab-description>` (e.g. `cod-2714-investigate-pytest-flamegraph-regression`). Default branch is `master`.
- **Commits:** conventional commits with a callgrind scope, e.g. `fix(callgrind): ...`, `refactor(callgrind): ...`.
- **C style:** clang-format via `.clang-format` (LLVM-based, 3-space indent, Linux braces). Format C before committing.

## Gotchas

- The flamegraph validation harness (`.agents/flamegraph-validation/run_all.sh`) calls `../../vg-in-place`, so the repo must be built in place first (not via `just build local`).
- Debugging sessions leave untracked `vgcore.*` coredumps and `callgrind.out.*` files in `callgrind/tests/` — don't commit them.
- `obj_skip_checked` on `fn_node` is latched lazily on first BB entry; the shadow-stack seeder mirrors this check so decisions stay consistent.

## Notes for agents

`.agents/docs/` holds dated investigation notes (correctness matrices, bug analyses) — read the relevant one before changing shadow-stack seeding logic. Put new temporary docs/scripts under `.agents/`.
6 changes: 6 additions & 0 deletions CODSPEED-CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,12 @@ This file documents changes made to Valgrind for CodSpeed integration, beyond th

## Features

### Callgrind: Cycle estimation (`--cycle-estimation`)

**Feature**: Estimates per-instruction cycle cost (`Cy`/`Cl` events) by decoding the real guest instruction with Capstone and looking it up in a generated cost table (`x86_caps_lut.inc` / `arm64_caps_lut.inc`).

**Build requirement**: Capstone is **mandatory**. `configure` takes the decoder location from `--with-capstone=PATH` or the `CAPSTONE_DIR` environment variable (`nix develop` sets it) and fails if neither is present. Capstone must be built **x86+arm64 only** and **without stack-protector/fortify**: the Callgrind tool links `-nodefaultlibs` and runs without glibc's `%fs` TLS, so a hardened build fails to link (`undefined reference to __stack_chk_fail`, `__*_chk`) and the non-x86/arm64 instruction printers pull in libc symbols (e.g. XCore's `strtol`) the tool does not shim. The flake (`flake.nix`) and the CI workflows build Capstone this way; see `.github/workflows/`.

### Skip `.plt.sec` in `--skip-plt`

**Feature**: Added support for skipping `.plt.sec` sections when using the `--skip-plt` option.
Expand Down
5 changes: 4 additions & 1 deletion Justfile
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,10 @@ build-in dir:
./autogen.sh
fi

./configure
# 64-bit only: CodSpeed runs the 64-bit tool, and cycle estimation needs a
# 64-bit Capstone, so the 32-bit secondary build (which has no Capstone) is
# skipped.
./configure --enable-only64bit
make include/vgversion.h
make -j$(nproc) -C VEX
make -j$(nproc) -C coregrind
Expand Down
41 changes: 35 additions & 6 deletions bench/generate_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,11 +26,14 @@
"stress-ng --cpu 4 --cpu-ops 10",
]

# Callgrind configurations: (extra args, config name). The config name is the
# last segment of the benchmark id, e.g. `test_valgrind[<version>, <cmd>, no-inline]`.
# Callgrind configurations: (extra args, config name, requires_codspeed). The
# config name is the last segment of the benchmark id, e.g.
# `test_valgrind[<version>, <cmd>, no-inline]`. `requires_codspeed` marks configs
# that rely on CodSpeed-only options (e.g. `--cycle-estimation`); they are skipped
# for upstream Valgrind builds, which would otherwise abort with "Unknown option".
CONFIGS = [
(["--read-inline-info=no"], "no-inline"),
(["--read-inline-info=yes"], "inline"),
(["--read-inline-info=no"], "no-inline", False),
(["--read-inline-info=yes"], "inline", False),
(
[
"--trace-children=yes",
Expand All @@ -45,6 +48,7 @@
"--read-inline-info=yes",
],
"full-with-inline",
False,
),
(
[
Expand All @@ -59,9 +63,31 @@
"--dump-line=no",
],
"full-no-inline",
False,
),
(
[
"--trace-children=yes",
"--cache-sim=yes",
"--I1=32768,8,64",
"--D1=32768,8,64",
"--LL=8388608,16,64",
"--collect-systime=nsec",
"--compress-strings=no",
"--combine-dumps=yes",
"--dump-line=no",
"--read-inline-info=yes",
"--cycle-estimation=yes"
],
"full-with-inline-with-cycle-estimation",
True,
),
(["--cycle-estimation=yes"], "cycle-estimation", True),
]

# Label produced by `valgrind_version` for CodSpeed's custom build.
CODSPEED_VERSION = "valgrind.codspeed"


def valgrind_version(valgrind_path: str) -> str:
"""Return the normalized version label used in benchmark ids.
Expand All @@ -80,7 +106,7 @@ def valgrind_version(valgrind_path: str) -> str:

version = result.stdout.strip()
if "codspeed" in version:
return "valgrind.codspeed"
return CODSPEED_VERSION
return version


Expand All @@ -89,8 +115,11 @@ def build_config(valgrind_paths: list) -> dict:
benchmarks = []
for valgrind_path in valgrind_paths:
version = valgrind_version(valgrind_path)
is_codspeed = version == CODSPEED_VERSION
for cmd in COMMANDS:
for args, config_name in CONFIGS:
for args, config_name, requires_codspeed in CONFIGS:
if requires_codspeed and not is_codspeed:
continue
Comment thread
greptile-apps[bot] marked this conversation as resolved.
name = f"test_valgrind[{version}, {cmd}, {config_name}]"
exec_cmd = " ".join(
[valgrind_path, "--tool=callgrind", "--log-file=/dev/null", *args, cmd]
Expand Down
12 changes: 10 additions & 2 deletions callgrind/Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,15 @@ bin_SCRIPTS = \

noinst_HEADERS = \
costs.h \
cycledecode.h \
sigkey.h \
events.h \
global.h

# Generated cost tables + legacy XED table (#included by cycledecode.c under
# CLG_WITH_CAPSTONE; the arch is selected at compile time).
EXTRA_DIST += x86_caps_lut.inc arm64_caps_lut.inc x86_uops_lut.inc

#----------------------------------------------------------------------------
# callgrind-<platform>
#----------------------------------------------------------------------------
Expand All @@ -37,6 +43,7 @@ CALLGRIND_SOURCES_COMMON = \
clo.c \
context.c \
costs.c \
cycledecode.c \
debug.c \
dump.c \
events.c \
Expand All @@ -54,11 +61,12 @@ callgrind_@VGCONF_ARCH_PRI@_@VGCONF_OS@_SOURCES = \
callgrind_@VGCONF_ARCH_PRI@_@VGCONF_OS@_CPPFLAGS = \
$(AM_CPPFLAGS_@VGCONF_PLATFORM_PRI_CAPS@)
callgrind_@VGCONF_ARCH_PRI@_@VGCONF_OS@_CFLAGS = $(LTO_CFLAGS) \
$(AM_CFLAGS_@VGCONF_PLATFORM_PRI_CAPS@) $(CALLGRIND_CFLAGS_COMMON)
$(AM_CFLAGS_@VGCONF_PLATFORM_PRI_CAPS@) $(CALLGRIND_CFLAGS_COMMON) \
@CAPSTONE_CFLAGS@
callgrind_@VGCONF_ARCH_PRI@_@VGCONF_OS@_DEPENDENCIES = \
$(TOOL_DEPENDENCIES_@VGCONF_PLATFORM_PRI_CAPS@)
callgrind_@VGCONF_ARCH_PRI@_@VGCONF_OS@_LDADD = \
$(TOOL_LDADD_@VGCONF_PLATFORM_PRI_CAPS@)
$(TOOL_LDADD_@VGCONF_PLATFORM_PRI_CAPS@) @CAPSTONE_LIBS@
callgrind_@VGCONF_ARCH_PRI@_@VGCONF_OS@_LDFLAGS = \
$(TOOL_LDFLAGS_@VGCONF_PLATFORM_PRI_CAPS@)
callgrind_@VGCONF_ARCH_PRI@_@VGCONF_OS@_LINK = \
Expand Down
Loading
Loading