之前写过在Python中监视卡死崩溃退出并打印卡死处的调用堆栈
在此记录一下C++的版本,不过没有在代码层面实现堆栈打印,可以通过core dump和gdb来查看崩溃时的堆栈
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 #pragma once #include <cstdint> #include <mutex> #include <thread> #include <atomic> #include <condition_variable> class WatchDog {public : WatchDog (int timeout=10 , bool echo=false ); ~WatchDog () { stop (); } void stop () ; void kick () ; private : void dog () ; void bark () ; private : const int _timeout; const int _echo; std::atomic<int64_t > _last_kicked_ts; std::mutex _mutex; bool _stopped; std::condition_variable _cond; std::thread _dog; };
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 #include "WatchDog.h" #include <iostream> using namespace std;namespace {int64_t get_gmtime_us () { std::chrono::system_clock clock; return std::chrono::duration_cast <std::chrono::microseconds>( clock.now ().time_since_epoch ()).count (); } } WatchDog::WatchDog (int timeout, bool echo) : _timeout(timeout) , _echo(echo) , _last_kicked_ts(get_gmtime_us ()) , _stopped(false ) , _dog(&WatchDog::dog, this ) { } void WatchDog::stop () { do { std::unique_lock<std::mutex> lock (_mutex) ; _stopped = true ; _cond.notify_one (); } while (false ); try { _dog.join (); } catch (...) { } } void WatchDog::kick () { _last_kicked_ts = get_gmtime_us (); } void WatchDog::dog () { std::unique_lock<std::mutex> lock (_mutex) ; while (true ) { if (_stopped) return ; int64_t ts = get_gmtime_us (); if (ts - _last_kicked_ts > _timeout * 1000000 ) { bark (); } if (_echo) { std::cout << "Successful dog check" << " [ts] " << ts << " [last_kicked_ts] " << _last_kicked_ts << std::endl; } int n = std::max (_timeout / 3 , 1 ); _cond.wait_for (lock, std::chrono::seconds (n)); } } void WatchDog::bark () { if (_echo) { std::cout << "\n!!!!! WATCH DOG FAILURE TRIGGERED !!!!!" << std::endl; } abort (); }