-
Notifications
You must be signed in to change notification settings - Fork 937
Description
Describe the bug
While investigating the issue found here #1245
It was found it's possible to trigger a race condition in the async profiler, which could result in the profiler crashing during parsing library symbols or during installation of hooks
This happens in the provided examples happens due to having two different distinct flows for dlopen,
- Some
dlopencalls will go through the profiler hooked methods - Some
dlopencalls will never be redirected to the profiler hooked methods
This in turn makes it possible for the profiler to have multiple possible race conditions that include the following:
- During
/proc/self/mapsthe lib could be found in a non ready state (Not fully loaded) which could result in a crash duringElfParser::parseFilecall - During
/proc/self/mapsthe lib could be found in a fully loaded state but is unloaded beforeElfParser::parseFilewhich results in a crash - Lib is detected & parsed correctly during the
Symbols::parseLibrariescall, but is unloaded before theMallocTracer::installHookscall which results in a crash
All of the above could happen due to race conditions between hooked & unhooked dlopen calls.
Important Note: After discussing with @apangin it was agreed pm that dlopen hook shortcomings could be used to reproduce the issue but it should not be considered a root cause as it's not a goal to hook everything & trying to do that is a dead end as calls to absolute addresses of methods will bypass installed hooks.
Expected vs. actual behavior
Async profiler should not crash while parsing & patching libs
Reproduction Steps
Reproducer to cause Seg Fault from race condition due to unpatched dlopen
Note:
- Make sure to replace ${PWD} with current directory where files are created
- Create all files into the same directory
- Create all files under a directory called
unpatched
Create the following Java file Main.java
package unpatched;
import java.io.FileInputStream;
import java.io.InputStream;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.StandardCopyOption;
import java.util.ArrayList;
import java.util.List;
import java.util.concurrent.CompletableFuture;
import java.util.concurrent.ForkJoinPool;
import java.util.stream.IntStream;
public class Main {
private static final ForkJoinPool THREAD_POOL = new ForkJoinPool(10);
public static native void loadLibUnpatched();
public static native void execute(String libPath);
private static Void executeFlow() {
try {
// Create a temp library file
Path libraryPath = Files.createTempFile("temp", "my_lib.so");
libraryPath.toFile().deleteOnExit();
InputStream in = new FileInputStream("${PWD}/my_lib.so");
assert in != null;
Files.copy(in, libraryPath, StandardCopyOption.REPLACE_EXISTING);
execute(libraryPath.toString());
libraryPath.toFile().delete();
} catch (Exception exception) {
throw new RuntimeException(exception);
}
return null;
}
public static void execute() {
for (int i = 0; i < 1000; i++) {
List<CompletableFuture<Void>> futures = new ArrayList<>();
IntStream.range(0, 100).forEach(ignore -> futures.add(CompletableFuture.supplyAsync(Main::executeFlow, THREAD_POOL)));
futures.forEach(CompletableFuture::join);
}
}
public static void main(String[] args) throws InterruptedException {
System.load("${PWD}/unpatched_Main.so");
loadLibUnpatched();
Thread thread = new Thread(() -> {
try {
Thread.sleep(1000);
} catch (Exception e) {}
System.load("${PWD}/my_lib.so");
});
thread.start();
execute();
thread.join();
}
}
Create the following unpatched_Main.h file
/* DO NOT EDIT THIS FILE - it is machine generated */
#include <jni.h>
/* Header for class unpatched_Main */
#ifndef _Included_unpatched_Main
#define _Included_unpatched_Main
#ifdef __cplusplus
extern "C" {
#endif
/*
* Class: unpatched_Main
* Method: loadLibUnpatched
* Signature: ()V
*/
JNIEXPORT void JNICALL Java_unpatched_Main_loadLibUnpatched
(JNIEnv *, jclass);
/*
* Class: unpatched_Main
* Method: execute
* Signature: (Ljava/lang/String;)V
*/
JNIEXPORT void JNICALL Java_unpatched_Main_execute
(JNIEnv *, jclass, jstring);
#ifdef __cplusplus
}
#endif
#endif
Create the following unpatched_Main.cpp file
#include "unpatched_Main.h"
#include <dlfcn.h>
#include <stdlib.h>
typedef void* (*my_dlopen_t)(char*, int);
typedef void* (*my_malloc_t)(size_t);
typedef void (*my_free_t)(void*);
my_dlopen_t local_dlopen = NULL;
JNIEXPORT void JNICALL Java_unpatched_Main_loadLibUnpatched(JNIEnv* jenv, jclass cls) {
void *ptr = dlopen("my_dlopen_lib.so", RTLD_NOW | RTLD_GLOBAL);
dlclose(ptr);
ptr = dlopen("my_dlopen_lib.so", RTLD_NOW | RTLD_GLOBAL);
local_dlopen = (my_dlopen_t) dlsym(ptr, "my_dlopen");
}
JNIEXPORT void JNICALL Java_unpatched_Main_execute(JNIEnv* jenv, jclass cls, jstring path) {
// Convert jstring to char*
const char* nativePath = jenv->GetStringUTFChars(path, NULL);
void* lib = local_dlopen((char*)nativePath, RTLD_NOW | RTLD_GLOBAL);
my_malloc_t local_malloc = (my_malloc_t) dlsym(lib, "my_malloc");
my_free_t local_free = (my_free_t) dlsym(lib, "my_free");
local_free(local_malloc(199999999));
dlclose(lib);
// IMPORTANT: Release the string when done to prevent memory leaks
jenv->ReleaseStringUTFChars(path, nativePath);
}
create the following lib.h file
#ifndef MYLIB_H
#define MYLIB_H
#define EXPORT __attribute__((visibility("default")))
#include "stdlib.h"
EXPORT void* my_malloc(size_t size);
EXPORT void my_free(void* ptr);
#endif
create the following lib.c file
#include "lib.h"
EXPORT void* my_malloc(size_t size) {
return malloc(size);
}
EXPORT void my_free(void* ptr) {
free(ptr);
}
create the following lib_dlopen.h file
#ifndef MY_DLOPEN_LIB_H
#define MY_DLOPEN_LIB_H
#define EXPORT __attribute__((visibility("default")))
EXPORT void* my_dlopen(char* path, int options);
#endif
create the following lib_dlopen.c file
#include "lib_dlopen.h"
#include <dlfcn.h>
#include <stdio.h>
EXPORT void* my_dlopen(char* path, int options) {
printf("my_dlopen: %s\n", path);
return dlopen(path, options);
}
execute the following commands
g++ -fPIC -I${JAVA_HOME}/include -I${JAVA_HOME}/include/linux unpatched_Main.cpp -shared -o unpatched_Main.so
gcc -fPIC lib.c -shared -o my_lib.so
gcc -fPIC lib_dlopen.c -shared -o my_dlopen_lib.so
LD_LIBRARY_PATH=${PWD} gdb --ex "set pagination off" --ex "handle SIGSEGV pass nostop noprint" --ex "set print thread-events off" --args java -agentpath:${PATH_TO_ASYNC_PROFILER_LIB}/libasyncProfiler.so=start,nativemem=1,total,cstack=dwarf,file=output.jfr Main.java unpatched.Main
Additional Information/Context
No response
Async-profiler version
latest
Environment details
This was observed when using
- Java 11
- gcc (GCC) 7.3.1 20180712 (Red Hat 7.3.1-17)
- Amazon 2 Linux