Files
Ayay/SHH.ProcessLaunchers/ManagedProcess.cs

486 lines
18 KiB
C#
Raw Permalink Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
using System;
using System.Diagnostics;
using System.IO;
using System.Threading;
using System.Threading.Tasks;
namespace SHH.ProcessLaunchers
{
// =========================================================
// 内部核心类:单个受管进程 (封装了所有复杂逻辑)
// =========================================================
/// <summary>
/// 受管进程实例 (Internal Worker)
/// <para>职责:管理【单个】进程的生命周期。</para>
/// <para>功能:包含 启动/停止/自愈/熔断/监控 的核心状态机逻辑。</para>
/// </summary>
internal class ManagedProcess
{
#region --- (Fields) ---
private readonly ProcessConfig _config;
private readonly ProcessManager _manager;
private readonly ILauncherLogger _logger;
/// <summary>
/// 实际的操作系统进程对象
/// </summary>
private Process _process;
/// <summary>
/// 标记位:是否为有意的停止
/// <para>True: 用户手动停止 (不自愈)</para>
/// <para>False: 运行中 (若退出则触发自愈)</para>
/// </summary>
private bool _isIntentionalStop = true;
// --- 异步任务控制令牌 ---
private CancellationTokenSource _delayCts; // 用于取消重启/熔断的倒计时
private CancellationTokenSource _monitorCts; // 用于取消资源监控循环
private CancellationTokenSource _schedulerCts; // 用于取消定时重启计划
// --- 运行时统计数据 ---
private int _consecutiveFailures = 0; // 连续失败次数 (熔断计数器)
private DateTime? _lastStartTime; // 最后启动时间 (用于计算稳定运行市场)
private DateTime? _lastExitTime; // 最后退出时间
private DateTime? _nextRetryTime; // 下次自动重试的时间点
/// <summary>
/// 当前生命周期状态 (对外只读)
/// </summary>
public ProcessStatus Status { get; private set; } = ProcessStatus.Stopped;
/// <summary>
/// 公开配置信息
/// </summary>
public ProcessConfig Config => _config;
#endregion
#region --- ---
public ManagedProcess(ProcessConfig config, ProcessManager manager, ILauncherLogger logger)
{
_config = config;
_manager = manager;
_logger = logger;
}
#endregion
#region --- (External Commands) ---
/// <summary>
/// 执行启动逻辑 (入口)
/// </summary>
public void ExecuteStart(LogTrigger trigger, string reason)
{
// 如果已经在运行或启动中,则忽略
if (Status == ProcessStatus.Running || Status == ProcessStatus.Starting) return;
// 1. 重置所有负面状态 (用户手动介入通常意味着修复了问题)
_delayCts?.Cancel();
_isIntentionalStop = false; // 标记为"非有意停止" -> 开启守护模式
_consecutiveFailures = 0;
_nextRetryTime = null;
// 2. 记录日志
_logger.LogLifecycle(_config.Id, LogAction.Start, trigger, reason);
// 3. 真正启动
LaunchProcess();
}
/// <summary>
/// 执行停止逻辑 (入口)
/// </summary>
public void ExecuteStop(LogTrigger trigger, string reason)
{
// 1. 标记为"有意停止" -> 阻止 HandleExitLogic 触发重启
_isIntentionalStop = true;
// 2. 取消所有后台任务
_delayCts?.Cancel();
_monitorCts?.Cancel();
_schedulerCts?.Cancel();
_nextRetryTime = null;
// 3. 记录日志 (仅当不是已经停止时)
if (Status != ProcessStatus.Stopped)
{
_logger.LogLifecycle(_config.Id, LogAction.Stop, trigger, reason);
}
// 4. 强制杀进程
KillProcess();
UpdateStatus(ProcessStatus.Stopped);
}
/// <summary>
/// 重置资源监控锁
/// </summary>
public void ResetGuards()
{
if (_config.Guards != null)
{
foreach (var guard in _config.Guards) guard.Reset();
}
}
#endregion
#region --- (Core Launch Logic) ---
/// <summary>
/// 启动进程的原子操作
/// </summary>
private void LaunchProcess()
{
try
{
UpdateStatus(ProcessStatus.Starting);
// 1. 路径检查
string path = Path.GetFullPath(_config.ExePath);
if (!File.Exists(path))
{
_logger.LogLifecycle(_config.Id, LogAction.Error, LogTrigger.System, "可执行文件未找到", path);
// 关键点:文件丢失属于严重错误,直接进入退出决策逻辑(可能会触发熔断)
HandleExitLogic(exitCode: -1);
return;
}
// 2. 组装 ProcessStartInfo
var psi = new ProcessStartInfo
{
FileName = path,
Arguments = _config.Arguments,
// 如果未配置工作目录,默认使用 EXE 所在目录
WorkingDirectory = string.IsNullOrEmpty(_config.WorkingDirectory) ? Path.GetDirectoryName(path) : _config.WorkingDirectory,
// 窗口可见性控制
CreateNoWindow = !_config.Visible,
// 必须为 false 才能重定向 IO流
UseShellExecute = false,
// IO 重定向开关
RedirectStandardOutput = _config.EnableLogRedirect,
RedirectStandardError = _config.EnableLogRedirect
};
_process = new Process { StartInfo = psi, EnableRaisingEvents = true };
// 3. 绑定 IO 重定向事件 (异步读取流)
if (_config.EnableLogRedirect)
{
_process.OutputDataReceived += (s, e) =>
{
if (!string.IsNullOrEmpty(e.Data))
{
// A. 记录到日志系统
_logger.LogConsole(_config.Id, e.Data, false);
// B. 触发对外事件 (供 UI 实时刷新)
_manager.DispatchOutput(_config.Id, e.Data, false);
}
};
_process.ErrorDataReceived += (s, e) =>
{
if (!string.IsNullOrEmpty(e.Data))
{
_logger.LogConsole(_config.Id, e.Data, true);
_manager.DispatchOutput(_config.Id, e.Data, true);
}
};
}
// 4. 绑定退出事件 (核心生命周期钩子)
_process.Exited += (s, e) =>
{
int code = -1;
try { code = _process.ExitCode; } catch { }
// 注意Exited 是在后台线程触发的,转交 HandleExitLogic 处理
HandleExitLogic(code);
};
// 5. 执行操作系统启动调用
if (!_process.Start())
{
throw new Exception("Process.Start() 返回 false启动失败");
}
// 6. 开始异步读取流 (必须在 Start 之后调用)
if (_config.EnableLogRedirect)
{
_process.BeginOutputReadLine();
_process.BeginErrorReadLine();
}
// 7. 更新状态
_lastStartTime = DateTime.Now;
UpdateStatus(ProcessStatus.Running);
_logger.LogLifecycle(_config.Id, LogAction.Start, LogTrigger.System, "进程启动成功", new { PID = _process.Id });
// 8. 启动后挂载:资源监控循环
StartMonitoring();
// 9. 启动后挂载:计划任务 (如果有配置)
if (_config.AutoRestartIntervalMinutes > 0)
{
ScheduleScheduledRestart(_config.AutoRestartIntervalMinutes * 60 * 1000);
}
}
catch (Exception ex)
{
_logger.LogLifecycle(_config.Id, LogAction.Error, LogTrigger.System, $"启动过程异常: {ex.Message}");
HandleExitLogic(-1);
}
}
#endregion
#region --- (Guard & Monitor) ---
/// <summary>
/// 启动资源监控后台任务
/// </summary>
private void StartMonitoring()
{
// 1. 取消旧任务
_monitorCts?.Cancel();
_monitorCts = new CancellationTokenSource();
// 如果没有配置哨兵,直接返回
if (_config.Guards == null || _config.Guards.Count == 0) return;
var token = _monitorCts.Token;
// 2. 启动长运行 Task
Task.Run(async () =>
{
while (!token.IsCancellationRequested)
{
try
{
// 默认轮询间隔 3 秒
await Task.Delay(3000, token);
// 每次检查前确认进程还活着
if (_process == null || _process.HasExited) break;
// 遍历所有哨兵
foreach (var guard in _config.Guards)
{
var result = guard.Check(_process, out string reason);
if (result == GuardResult.Warning)
{
// 警告级别:仅记录日志 (供客户端发邮件),不干涉进程
_logger.LogLifecycle(_config.Id, LogAction.ResourceCheck, LogTrigger.ResourceGuard, $"[警告] {reason}");
}
else if (result == GuardResult.Critical)
{
// 严重级别:记录日志并执行重启
_logger.LogLifecycle(_config.Id, LogAction.Restart, LogTrigger.ResourceGuard, $"[严重] {reason} -> 执行管控重启");
// 杀掉进程 -> 触发 Exited -> 触发 HandleExitLogic -> 自动重启
KillProcess();
return; // 退出监控循环
}
}
}
catch (TaskCanceledException) { break; } // 正常取消
catch (Exception ex)
{
_logger.LogConsole(_config.Id, $"监控线程异常: {ex.Message}", true);
}
}
}, token);
}
/// <summary>
/// 安排定时重启任务
/// </summary>
private void ScheduleScheduledRestart(int delayMs)
{
_schedulerCts?.Cancel();
_schedulerCts = new CancellationTokenSource();
Task.Delay(delayMs, _schedulerCts.Token).ContinueWith(t =>
{
// 只有当没被取消,且进程还在运行时,才执行重启
if (!t.IsCanceled && Status == ProcessStatus.Running)
{
_logger.LogLifecycle(_config.Id, LogAction.Restart, LogTrigger.Scheduler, "执行计划性重启 (AutoRestart)");
KillProcess(); // 触发自动重启
}
});
}
#endregion
#region --- (Decision Logic) ---
/// <summary>
/// 进程退出后的核心决策逻辑 (自愈 + 熔断)
/// </summary>
/// <param name="exitCode">进程退出码</param>
private void HandleExitLogic(int exitCode)
{
// 1. 清理伴生任务
_monitorCts?.Cancel();
_schedulerCts?.Cancel();
// 2. 意图判断:如果是用户手动停的,或者是计划重启中的 Kill
// 这里需要判断 _isIntentionalStop。
// 注意:如果是用户 Stop_isIntentionalStop 为 true直接返回不重启。
// 如果是 ResourceGuard 或 Scheduler 调用的 KillProcess_isIntentionalStop 仍为 false会走下面的重启逻辑。
if (_isIntentionalStop) return;
_lastExitTime = DateTime.Now;
_logger.LogLifecycle(_config.Id, LogAction.Crash, LogTrigger.System, "侦测到进程退出", new { ExitCode = exitCode });
// 3. 稳定性判定 (Stabilization Check)
// 逻辑:如果进程活过了阈值(如60秒),说明这次退出可能是偶发意外,不是启动即崩。
// 此时应重置失败计数,给予它"重新做人"的机会。
double runDurationMs = _lastStartTime.HasValue ? (DateTime.Now - _lastStartTime.Value).TotalMilliseconds : 0;
if (runDurationMs > _config.StabilityThresholdMs)
{
if (_consecutiveFailures > 0)
_logger.LogConsole(_config.Id, $"运行稳定({runDurationMs / 1000:F0}s),重置失败计数", false);
_consecutiveFailures = 0;
}
else
{
_consecutiveFailures++;
}
// 4. 熔断判定 (Circuit Breaker)
// 如果连续失败次数超过阈值,不再立即重启,而是进入长冷却。
if (_consecutiveFailures >= _config.MaxConsecutiveFailures)
{
EnterCoolingDown();
}
else
{
EnterShortRetry();
}
}
/// <summary>
/// 进入短时重试流程
/// </summary>
private void EnterShortRetry()
{
int delay = _config.RestartDelayMs;
UpdateStatus(ProcessStatus.PendingRestart);
_nextRetryTime = DateTime.Now.AddMilliseconds(delay);
_logger.LogLifecycle(_config.Id, LogAction.Restart, LogTrigger.System,
$"准备自动重启 ({_consecutiveFailures}/{_config.MaxConsecutiveFailures})", new { DelayMs = delay });
// 异步等待后执行
WaitAndExec(delay, () => LaunchProcess());
}
/// <summary>
/// 进入熔断冷却流程
/// </summary>
private void EnterCoolingDown()
{
int delay = _config.CircuitBreakerDelayMs;
UpdateStatus(ProcessStatus.CoolingDown);
_nextRetryTime = DateTime.Now.AddMilliseconds(delay);
_logger.LogLifecycle(_config.Id, LogAction.CircuitBreak, LogTrigger.System,
"触发熔断保护", new { Minutes = delay / 1000 / 60 });
// 冷却结束后,尝试恢复
WaitAndExec(delay, () =>
{
_logger.LogLifecycle(_config.Id, LogAction.Restart, LogTrigger.System, "熔断冷却结束,尝试恢复");
LaunchProcess();
});
}
/// <summary>
/// 通用延时执行辅助方法
/// </summary>
private void WaitAndExec(int delayMs, Action action)
{
_delayCts = new CancellationTokenSource();
Task.Delay(delayMs, _delayCts.Token).ContinueWith(t =>
{
// 只有未被取消才执行
if (!t.IsCanceled) action();
}, TaskScheduler.Default);
}
#endregion
#region --- (Helpers) ---
/// <summary>
/// 强制杀死进程 (Kill -9)
/// </summary>
private void KillProcess()
{
if (_process != null && !_process.HasExited)
{
try
{
// .NET Core 3.0+ 支持 Kill 整个进程树 (包含子进程)
_process.Kill();
_process.WaitForExit(500); // 稍微等待资源释放
}
catch { /* 忽略权限不足或竞态条件下的异常 */ }
}
}
/// <summary>
/// 更新状态并通知 Manager 分发事件
/// </summary>
private void UpdateStatus(ProcessStatus status)
{
if (Status != status)
{
Status = status;
// 回调 Manager 触发外部事件
_manager.DispatchStateChange(_config.Id, status);
}
}
/// <summary>
/// 生成当前状态快照 DTO
/// </summary>
public ProcessInfoSnapshot GetSnapshot()
{
int? pid = null;
try { if (Status == ProcessStatus.Running) pid = _process?.Id; } catch { }
string msg = "";
// 计算倒计时文本
if (Status == ProcessStatus.CoolingDown && _nextRetryTime.HasValue)
{
var span = _nextRetryTime.Value - DateTime.Now;
msg = $"熔断中 (剩余 {span.Minutes}:{span.Seconds:D2})";
}
return new ProcessInfoSnapshot
{
Id = _config.Id,
DisplayName = _config.DisplayName,
Pid = pid,
Status = Status,
LastStartTime = _lastStartTime,
LastExitTime = _lastExitTime,
ConsecutiveFailures = _consecutiveFailures,
NextRetryTime = _nextRetryTime,
Message = msg
};
}
#endregion
}
}