menios icon indicating copy to clipboard operation
menios copied to clipboard

Implement thread debugging and profiling support

Open pbalduino opened this issue 4 months ago • 1 comments

Goal

Implement comprehensive debugging and profiling support for multithreaded applications to enable development and optimization of threaded programs on meniOS.

Context

With threading infrastructure in place (Issues #108-#111), developers need tools to debug, profile, and optimize multithreaded applications. This includes integration with debuggers like GDB, thread state inspection, deadlock detection, and performance profiling capabilities.

Definition of Done

  • GDB threading support: Integration with GDB for thread debugging
  • Thread state inspection: Runtime thread state and stack inspection
  • Deadlock detection: Automatic deadlock detection and reporting
  • Thread profiling: Performance profiling per thread
  • Lock contention analysis: Analysis of synchronization bottlenecks
  • Thread-aware stack traces: Stack unwinding for all threads
  • Thread naming: Named threads for easier debugging
  • Debug symbols: Thread-aware debug symbol support
  • Performance counters: Per-thread performance monitoring

GDB Integration Support

// GDB-specific thread information structure
struct gdb_thread_info {
    tid_t tid;                 // Thread ID
    pid_t pid;                 // Process ID  
    void *stack_base;          // Stack base address
    void *stack_top;           // Stack top address
    struct cpu_context *context;  // CPU register context
    enum thread_state state;   // Current thread state
    char name[64];             // Thread name
    int priority;              // Thread priority
    void *tls_base;            // Thread-local storage base
};

// GDB interface functions
int gdb_get_thread_list(struct gdb_thread_info **threads, int *count);
int gdb_get_thread_context(tid_t tid, struct cpu_context *context);
int gdb_set_thread_context(tid_t tid, const struct cpu_context *context);
int gdb_thread_step(tid_t tid);
int gdb_thread_continue(tid_t tid);

Thread State Inspection

// Thread inspection API
typedef struct {
    tid_t tid;                 // Thread identifier
    pid_t pid;                 // Parent process
    enum thread_state state;   // Current state
    char name[64];             // Thread name
    int priority;              // Thread priority
    void *stack_base;          // Stack base
    size_t stack_size;         // Stack size
    size_t stack_used;         // Used stack space
    uint64_t cpu_time;         // CPU time consumed
    uint64_t creation_time;    // Thread creation timestamp
    uint64_t context_switches; // Number of context switches
    void *waiting_on;          // Object thread is waiting on
} thread_info_t;

// Thread inspection functions
int get_thread_info(tid_t tid, thread_info_t *info);
int get_all_threads(pid_t pid, thread_info_t **threads, int *count);
int get_thread_stack_trace(tid_t tid, void **stack_frames, int max_frames);

Deadlock Detection System

// Deadlock detection infrastructure
typedef struct lock_dependency {
    tid_t holder_tid;          // Thread holding the lock
    tid_t waiter_tid;          // Thread waiting for the lock
    void *lock_object;         // Lock object address
    const char *lock_type;     // Type of lock (mutex, rwlock, etc.)
    const char *file;          // Source file location
    int line;                  // Source line number
    uint64_t timestamp;        // When dependency was created
} lock_dependency_t;

// Deadlock detection API
typedef struct deadlock_info {
    int thread_count;          // Number of threads in deadlock
    tid_t *threads;            // Threads involved in deadlock
    lock_dependency_t *cycle;  // Dependency cycle causing deadlock
    int cycle_length;          // Length of dependency cycle
} deadlock_info_t;

// Deadlock detection functions
int enable_deadlock_detection(bool enable);
int check_for_deadlocks(deadlock_info_t **deadlocks, int *count);
int register_lock_dependency(tid_t tid, void *lock, const char *type,
                            const char *file, int line);
int remove_lock_dependency(tid_t tid, void *lock);

Thread Profiling System

// Per-thread performance counters
typedef struct thread_profile {
    tid_t tid;                 // Thread identifier
    uint64_t cpu_cycles;       // CPU cycles consumed
    uint64_t instructions;     // Instructions executed
    uint64_t cache_misses;     // Cache miss count
    uint64_t page_faults;      // Page fault count
    uint64_t system_calls;     // System call count
    uint64_t context_switches; // Context switch count
    uint64_t lock_acquisitions; // Lock acquisition count
    uint64_t lock_contentions; // Lock contention count
    uint64_t wait_time;        // Time spent waiting
    uint64_t run_time;         // Time spent running
} thread_profile_t;

// Profiling API
int start_thread_profiling(tid_t tid);
int stop_thread_profiling(tid_t tid);
int get_thread_profile(tid_t tid, thread_profile_t *profile);
int reset_thread_profile(tid_t tid);
int dump_all_profiles(const char *filename);

Lock Contention Analysis

// Lock contention information
typedef struct lock_stats {
    void *lock_address;        // Lock object address
    const char *lock_type;     // Type of lock
    const char *name;          // Lock name (if named)
    uint64_t acquisitions;     // Total acquisitions
    uint64_t contentions;      // Contended acquisitions
    uint64_t wait_time_total;  // Total wait time
    uint64_t wait_time_max;    // Maximum wait time
    uint64_t hold_time_total;  // Total hold time
    uint64_t hold_time_max;    // Maximum hold time
    tid_t most_contending_thread;  // Thread with most contention
} lock_stats_t;

// Lock analysis API
int enable_lock_profiling(bool enable);
int get_lock_stats(void *lock, lock_stats_t *stats);
int get_all_lock_stats(lock_stats_t **stats, int *count);
int reset_lock_stats(void *lock);
int dump_lock_analysis(const char *filename);

Thread Naming and Identification

// Thread naming API
int pthread_setname_np(pthread_t thread, const char *name);
int pthread_getname_np(pthread_t thread, char *name, size_t len);

// Thread identification helpers
const char *get_thread_name(tid_t tid);
tid_t find_thread_by_name(const char *name);
int set_thread_description(tid_t tid, const char *description);

Stack Unwinding and Trace Support

// Stack frame information
typedef struct stack_frame {
    void *address;             // Frame address
    void *function;            // Function address
    const char *function_name; // Function name
    const char *filename;      // Source filename
    int line_number;           // Source line number
    size_t frame_size;         // Frame size
} stack_frame_t;

// Stack unwinding API
int unwind_thread_stack(tid_t tid, stack_frame_t *frames, int max_frames);
int print_thread_backtrace(tid_t tid, FILE *output);
int get_thread_call_stack(tid_t tid, char **stack_trace);

Implementation Components

GDB Integration

  • Thread information export for GDB
  • Remote debugging protocol support
  • Thread-aware breakpoint handling
  • Multi-threaded step/continue operations
  • Thread-specific watchpoints

Deadlock Detection Algorithm

  • Wait-for graph construction
  • Cycle detection in dependency graph
  • Real-time deadlock monitoring
  • Configurable detection sensitivity
  • Deadlock recovery suggestions

Performance Monitoring

  • Hardware performance counter integration
  • Software event counting
  • Time-based profiling
  • Statistical sampling profiling
  • Lock-free profiling data collection

Debug Symbol Integration

  • DWARF debug information for threads
  • Thread-aware symbol resolution
  • Source-level thread debugging
  • Variable inspection per thread
  • Thread-local variable support

Testing Strategy

  • GDB integration testing with multithreaded programs
  • Deadlock detection accuracy testing
  • Thread profiling overhead measurement
  • Stack unwinding correctness validation
  • Lock contention analysis accuracy testing
  • Debug symbol resolution testing
  • Multi-threaded debugging scenario testing

Dependencies

  • Kernel threading: Issue #108 - Thread management infrastructure
  • pthread API: Issue #109 - POSIX threading interface
  • Advanced synchronization: Issue #111 - Lock implementations for monitoring
  • Debug symbols: Need DWARF/ELF debug information support
  • Performance counters: Hardware performance monitoring support

Integration Points

  • GDB remote debugging protocol integration
  • Integration with development tools and IDEs
  • System call tracing integration
  • Memory debugging tool integration
  • Performance analysis tool integration

Security Considerations

  • Access control for thread debugging operations
  • Secure debug information handling
  • Prevention of information leakage via debugging
  • Resource limits for debugging operations
  • Secure profiling data collection

Files to Create/Modify

  • src/kernel/debug/thread_debug.c - Thread debugging infrastructure
  • src/kernel/debug/deadlock_detect.c - Deadlock detection system
  • src/kernel/debug/thread_profile.c - Thread profiling implementation
  • lib/pthread/pthread_debug.c - pthread debugging support
  • tools/thread_analyzer.c - Thread analysis utility
  • include/thread_debug.h - Thread debugging interface
  • gdb/meniOS_thread.py - GDB threading support script

Performance Goals

  • Debugging overhead < 5% when enabled
  • Deadlock detection latency < 1ms
  • Profiling overhead < 2% for basic counters
  • Stack unwinding < 100μs per thread
  • Lock analysis overhead < 1% per operation

Error Handling

  • Graceful handling when debugging not supported
  • Safe operation with corrupted thread state
  • Resource cleanup on debugging failures
  • Error reporting for invalid debug operations
  • Fallback modes for limited debug capabilities

Advanced Features

  • Remote debugging: Network-based thread debugging
  • Time-travel debugging: Record and replay threading
  • Visual thread analysis: Graphical thread state visualization
  • Automated deadlock resolution: Suggestions for deadlock fixes
  • Performance optimization hints: Automatic optimization suggestions

Usage Examples

#include <pthread.h>
#include <thread_debug.h>

// Named thread with profiling
void* worker_thread(void* arg) {
    // Set thread name for debugging
    pthread_setname_np(pthread_self(), "worker");
    
    // Enable profiling for this thread
    start_thread_profiling(pthread_self());
    
    // Do work with instrumented locks
    pthread_mutex_t *mutex = (pthread_mutex_t*)arg;
    
    for (int i = 0; i < 1000; i++) {
        pthread_mutex_lock(mutex);  // Automatically tracked
        // Critical section work
        usleep(100);
        pthread_mutex_unlock(mutex);
    }
    
    // Get profiling results
    thread_profile_t profile;
    get_thread_profile(pthread_self(), &profile);
    printf("Thread consumed %lu CPU cycles\n", profile.cpu_cycles);
    
    return NULL;
}

// Deadlock detection example
void setup_deadlock_detection() {
    // Enable deadlock detection
    enable_deadlock_detection(true);
    
    // Check for deadlocks periodically
    deadlock_info_t *deadlocks;
    int count;
    
    if (check_for_deadlocks(&deadlocks, &count) == 0 && count > 0) {
        printf("Deadlock detected involving %d threads\n", 
               deadlocks[0].thread_count);
        
        // Print deadlock cycle
        for (int i = 0; i < deadlocks[0].cycle_length; i++) {
            printf("Thread %d waiting for lock at %p\n",
                   deadlocks[0].cycle[i].waiter_tid,
                   deadlocks[0].cycle[i].lock_object);
        }
    }
}

// Thread inspection example
void inspect_all_threads() {
    thread_info_t *threads;
    int count;
    
    if (get_all_threads(getpid(), &threads, &count) == 0) {
        printf("Process has %d threads:\n", count);
        
        for (int i = 0; i < count; i++) {
            printf("Thread %d (%s): %s, %.1f%% stack used\n",
                   threads[i].tid,
                   threads[i].name,
                   thread_state_name(threads[i].state),
                   (threads[i].stack_used * 100.0) / threads[i].stack_size);
            
            // Print stack trace
            print_thread_backtrace(threads[i].tid, stdout);
        }
        
        free(threads);
    }
}

int main() {
    pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER;
    pthread_t threads[4];
    
    // Setup debugging
    setup_deadlock_detection();
    
    // Create worker threads
    for (int i = 0; i < 4; i++) {
        pthread_create(&threads[i], NULL, worker_thread, &mutex);
    }
    
    // Monitor threads
    sleep(1);
    inspect_all_threads();
    
    // Wait for completion
    for (int i = 0; i < 4; i++) {
        pthread_join(threads[i], NULL);
    }
    
    // Dump final analysis
    dump_lock_analysis("lock_analysis.txt");
    
    return 0;
}

Related Issues

  • Essential for multithreaded application development
  • Required for debugging complex threading issues
  • Enables performance optimization of threaded code
  • Foundation for development tool integration
  • Critical for production threading support

pbalduino avatar Sep 28 '25 15:09 pbalduino