Mercurial > servermonitor
view ServerMonitor/Objects/ServerMonitor.cs @ 35:2ffb0bda7705
Increase wait time after system resume to begin running checks to prevent false positives.
author | Brad Greco <brad@bgreco.net> |
---|---|
date | Sat, 13 Jul 2019 12:18:21 -0400 |
parents | f6235dc0a8ec |
children | 10e60b05c7ec |
line wrap: on
line source
using Microsoft.Win32; using NAudio.Wave; using Renci.SshNet; using Renci.SshNet.Common; using ServerMonitorApp.Properties; using System; using System.Collections.Generic; using System.IO; using System.Linq; using System.Media; using System.Net.NetworkInformation; using System.Threading; using System.Threading.Tasks; using System.Windows.Forms; using System.Xml.Serialization; namespace ServerMonitorApp { /// <summary>Central class for scheduling and executing checks against remote servers.</summary> public class ServerMonitor { private readonly string configFileDir; private readonly Logger logger; // Cancellation tokens for executing checks, keyed by check ID. private readonly Dictionary<int, CancellationTokenSource> tokens = new Dictionary<int, CancellationTokenSource>(); // SSH private keys, keyed by the path to the private key file. // A value of NULL indicates that the private key is inaccessible or encrypted. private readonly Dictionary<string, PrivateKeyFile> privateKeys = new Dictionary<string, PrivateKeyFile>(); // IDs of all checks that have been paused due to network unavailability, // or due to the system being suspended. // Not to be confused with checks that have been disabled by the user. private readonly List<int> pausedChecks = new List<int>(); private bool running, networkAvailable, suspend; // List of check execution tasks that have been started. // A check task begins by sleeping until the next scheduled execution time, // then executes. private Dictionary<Task<CheckResult>, int> tasks; private ServerSummaryForm mainForm; private WaveOut waveOut = new WaveOut(); MediaFoundationReader mediaReader; /// <summary>Fires when the status of a check changes.</summary> public event EventHandler<CheckStatusChangedEventArgs> CheckStatusChanged; /// <summary>The collection of registered servers.</summary> public List<Server> Servers { get; private set; } = new List<Server>(); /// <summary>A collection of all checks belonging to all registerd servers.</summary> public IEnumerable<Check> Checks => Servers.SelectMany(s => s.Checks); /// <summary>Path to the file that stores server and check configuration.</summary> public string ConfigFile { get; private set; } /// <summary>Path to the file that stores server and check configuration.</summary> public IEnumerable<string> LockedKeys { get { return privateKeys.Where(kvp => kvp.Value == null).Select(kvp => kvp.Key); } } /// <summary>ServerMonitor constructor.</summary> /// <param name="mainForm">A reference to the main form.</param> public ServerMonitor(ServerSummaryForm mainForm) { this.mainForm = mainForm; // Store configuration in %appdata%\ServerMonitor configFileDir = Path.Combine(Environment.GetFolderPath(Environment.SpecialFolder.ApplicationData), "ServerMonitor"); ConfigFile = Path.Combine(configFileDir, "servers.xml"); logger = new Logger(Path.Combine(configFileDir, "monitor.log")); } /// <summary>Registers a new server with the server monitor.</summary> /// <param name="server">The server to be added.</param> public void AddServer(Server server) { Servers.Add(server); SaveServers(); server.CheckModified += Server_CheckModified; server.EnabledChanged += Server_EnabledChanged; } /// <summary>Deletes a server from the server monitor.</summary> /// <param name="server">The server to be deleted.</param> public void DeleteServer(Server server) { Servers.Remove(server); // Cancel all queued and executing checks belonging to a // server that was deleted. foreach (Check check in server.Checks) { CancelCheck(check); } SaveServers(); } /// <summary>Loads all servers and checks from the config file.</summary> public void LoadServers() { bool triedBackup = false; Read: TextReader reader = null; try { reader = new StreamReader(ConfigFile); XmlSerializer serializer = CreateXmlSerializer(); Servers.Clear(); Servers.AddRange((List<Server>)serializer.Deserialize(reader)); // Do some more set-up now that the servers and checks have been loaded. foreach (Server server in Servers) { // Read private keys into memory if they are accessible and not encrypted. // If PrivateKeyFile != null, it means same the key has already been loaded for // a different server and nothing more needs to be done. if (server.LoginType == LoginType.PrivateKey && server.PrivateKeyFile == null) OpenPrivateKey(server.KeyFile); foreach (Check check in server.Checks) { // Update the checks so they know what server they belong to. // Would rather do this in the Server object on deserialization, but // that doesn't work when using the XML serializer for some reason. check.Server = server; // If the program last exited while the check was running, change its status // to the result of its last execution (since, at this point, the check is // not running). if (check.Status == CheckStatus.Running) check.Status = check.LastRunStatus; } server.CheckModified += Server_CheckModified; server.EnabledChanged += Server_EnabledChanged; } } // If the file doesn't exist, no special handling is needed. It will be created later. catch (FileNotFoundException) { } catch (DirectoryNotFoundException) { } catch (InvalidOperationException) { reader?.Close(); // If there was an error parsing the XML, try again with the backup config file. if (!triedBackup) { File.Copy(ConfigFile, ConfigFile + ".error", true); string backupConfig = ConfigFile + ".bak"; if (File.Exists(backupConfig)) { File.Copy(backupConfig, ConfigFile, true); } triedBackup = true; goto Read; } else { // If there was an error reading the backup file too, give up. throw; } } finally { reader?.Close(); } Application.ApplicationExit += Application_ApplicationExit; NetworkChange.NetworkAddressChanged += NetworkChange_NetworkAddressChanged; SystemEvents.PowerModeChanged += SystemEvents_PowerModeChanged; // Remove old entries from the log file according to user preferences. logger.TrimLog(); Run(); } /// <summary>Saves all servers and checks to the config file.</summary> public void SaveServers() { GenerateIds(); TextWriter writer = null; XmlSerializer serializer = null; try { // Make a backup first in case something goes wrong in the middle of writing. File.Copy(ConfigFile, ConfigFile + ".bak", true); } catch { } try { writer = new StreamWriter(ConfigFile); serializer = CreateXmlSerializer(); serializer.Serialize(writer, Servers); } catch (DirectoryNotFoundException) { // If the directory does not exist, create it and try again. Directory.CreateDirectory(configFileDir); writer = new StreamWriter(ConfigFile); serializer = CreateXmlSerializer(); serializer.Serialize(writer, Servers); } finally { writer?.Close(); } } /// <summary>Main server monitor loop. Schedules and executes checks.</summary> private async void Run() { // Do not run again if already running or if the system is suspending or resuming. if (running || suspend) return; running = true; // If the network is available, immediately execute checks that were supposed to run // earlier but could not due to network unavailability or the system being suspended. networkAvailable = Helpers.IsNetworkAvailable(); if (networkAvailable) { foreach (int id in pausedChecks) { await ExecuteCheckAsync(Checks.FirstOrDefault(c => c.Id == id)); } pausedChecks.Clear(); } // Schedule all checks to run according to their schedules. // Each check will sleep until it is scheduled to run, then execute. tasks = Checks.ToDictionary(c => ScheduleExecuteCheckAsync(c), c => c.Id); while (tasks.Count > 0) { // When any check is done sleeping and executing, remove the completed // task and queue a new task to schedule it again. Task<CheckResult> task = await Task.WhenAny(tasks.Keys); tasks.Remove(task); try { CheckResult result = await task; // Do not schedule the task again if it is now disabled. // Result will be null if a scheduled check was disabled. if (result != null && result.CheckStatus != CheckStatus.Disabled) tasks.Add(ScheduleExecuteCheckAsync(result.Check), result.Check.Id); } catch (OperationCanceledException) { // When a server's state changes to Disabled, any checks that are executing // are immediately cancelled. Silently catch these expected exceptions. } } // If there are no enabled checks scheduled, exit the main loop. // It will be restarted when a check or server is enabled. running = false; } /// <summary>Schedules a check to be run on its schedule.</summary> /// <param name="check">The check to execute.</param> /// <returns>The async check result.</returns> private async Task<CheckResult> ScheduleExecuteCheckAsync(Check check) { // Do not schedule or execute the check if it or its server is disabled. if (!check.Enabled || !check.Server.Enabled) return await Task.FromResult(new CheckResult(check, CheckStatus.Disabled, null)); // Create a cancellation token that will be used to cancel the check if it or // its server is disabled while it is executing. CancellationTokenSource cts = new CancellationTokenSource(); tokens[check.Id] = cts; // Sleep until next time the check is supposed to be executed. // Use the LastScheduledRunTime so manual executions by the user do not // interfere with the schedule. check.NextRunTime = check.Schedule.GetNextTime(check.LastScheduledRunTime); int delay = Math.Max(0, (int)(check.NextRunTime - DateTime.Now).TotalMilliseconds); await Task.Delay(delay, cts.Token); check.LastScheduledRunTime = check.NextRunTime; // Execute the check if not cancelled. if (!cts.IsCancellationRequested) { // If the network is available, execute the check. // Otherwise, add it to the list of paused checks to be executed // when the network becomes available again. if (networkAvailable) { return await ExecuteCheckAsync(check, cts.Token); } else { if (!pausedChecks.Contains(check.Id)) pausedChecks.Add(check.Id); } } return await Task.FromResult(new CheckResult(check, CheckStatus.Disabled, null)); } /// <summary>Executes a check asynchronously.</summary> /// <param name="check">The check to execute.</param> /// <param name="token">A chancellation token that may be used to cancel the check execution.</param> /// <returns>The async check result.</returns> public async Task<CheckResult> ExecuteCheckAsync(Check check, CancellationToken token = default(CancellationToken)) { // Update the status. check.Status = CheckStatus.Running; OnCheckStatusChanged(check); // Execute the check. CheckResult result = await check.ExecuteAsync(token); // Increment the consecutive failure counter on failue, or reset // the counter on success. if (result.Failed) check.ConsecutiveFailures++; else check.ConsecutiveFailures = 0; OnCheckStatusChanged(check, result); HandleResultAsync(result); return result; } /// <summary>Handles the result of a check execution.</summary> /// <param name="result">The result.</param> private void HandleResultAsync(CheckResult result) { // Log the result. logger.Log(result); // Notify the user of failure according to user preferences. // If the check succeeded, result.FailAction will be None. if (result.Check.ConsecutiveFailures >= result.Check.MaxConsecutiveFailures) { if (result.FailAction == FailAction.FlashTaskbar) mainForm.AlertServerForm(result.Check); if (result.FailAction.In(FailAction.FlashTaskbar, FailAction.NotificationBalloon)) mainForm.ShowBalloon(result); PlaySound(result.FailSound); } } /// <summary>Plays a sound.</summary> /// <param name="sound"> /// If null, no sound is played. /// If string.Empty, the Windows default error sound is played. /// Otherwise, plays the sound at the given path. /// </param> private void PlaySound(string sound) { if (sound == string.Empty) SystemSounds.Asterisk.Play(); else if (sound != null) { try { mediaReader = new MediaFoundationReader(sound); waveOut.Init(mediaReader); waveOut.PlaybackStopped += WaveOut_PlaybackStopped; waveOut.Play(); } catch { // Play the default sound if something went wrong. SystemSounds.Asterisk.Play(); } } } /// <summary>Disposes the media reader after sound playback.</summary> private void WaveOut_PlaybackStopped(object sender, StoppedEventArgs e) { mediaReader.Dispose(); } /// <summary>Reads all check results from the log for a server.</summary> /// <param name="server">The server whose check results should be read.</param> /// <returns>A list of all check results found in the log file for the given server.</returns> public IList<CheckResult> GetLog(Server server) { return logger.Read(server); } /// <summary>Saves the check settings and notifies event subscribers when the status of a check changes.</summary> /// <param name="check">The check whose status has changed.</param> /// <param name="result">The check result that caused the status to change, if any.</param> private void OnCheckStatusChanged(Check check, CheckResult result = null) { SaveServers(); CheckStatusChanged?.Invoke(check, new CheckStatusChangedEventArgs(check, result)); } /// <summary>Handles user modifications to a check's settings.</summary> /// <param name="sender">The check that was modified.</param> private void Server_CheckModified(object sender, EventArgs e) { Check check = (Check)sender; // No need to mess with the task queue if not currently running. if (running) { Task<CheckResult> task = tasks.FirstOrDefault(kvp => kvp.Value == check.Id).Key; if (task == null) { // No tasks associated with the check, so schedule a new one. tasks.Add(ScheduleExecuteCheckAsync(check), check.Id); } else { // Check was modified or deleted, so remove any waiting tasks. CancelCheck(check); if (check.Server != null) { // If the check was not deleted, schedule the new check. // But only if it's still running, otherwise restarting the monitor below // will create a duplicate run. if (running) tasks.Add(ScheduleExecuteCheckAsync(check), check.Id); } } } // Run again in case removing a task above caused it to stop. Run(); } /// <summary>Handles the enabled state of a server changing.</summary> /// <param name="sender">The server that was enabled or disabled.</param> private void Server_EnabledChanged(object sender, EventArgs e) { Server server = (Server)sender; // Make sure the monitor is running. If no servers were enabled before this // one was enabled, it is not running. if (server.Enabled) { Run(); // Schedule all checks to run. foreach (Check check in server.Checks) { Server_CheckModified(check, new EventArgs()); } } else { // Cancel all queued and executing checks belonging to a // server that was disabled. foreach (Check check in server.Checks) { CancelCheck(check); } } } /// <summary>Cancels a check that may be executing.</summary> /// <param name="check">The check to cancel.</param> private void CancelCheck(Check check) { if (tasks == null) return; // Find the waiting or executing task for the check and remove it. Task<CheckResult> task = tasks.FirstOrDefault(kvp => kvp.Value == check.Id).Key; if (task != null) tasks.Remove(task); // Remove it from the list of paused checks so it doesn't get restarted later. pausedChecks.RemoveAll(id => id == check.Id); // Cancel the current execution. if (tokens.TryGetValue(check.Id, out CancellationTokenSource cts)) cts.Cancel(); } /// <summary>Handles network state changing.</summary> private void NetworkChange_NetworkAddressChanged(object sender, EventArgs e) { networkAvailable = Helpers.IsNetworkAvailable(); // If the network is available, it might not have been before. // This method is not called from the correct thread, so special // handling is needed to start it on the UI thread again. if (networkAvailable) mainForm.Invoke((MethodInvoker)(() => Run())); } /// <summary>Handles system power mode changes.</summary> private async void SystemEvents_PowerModeChanged(object sender, PowerModeChangedEventArgs e) { // If the system is being suspended, cancel all waiting and executing checks. // Once all the checks are removed, the main loop will exit. if (e.Mode == PowerModes.Suspend) { foreach (Check check in Checks) { CancelCheck(check); } suspend = true; } else if (e.Mode == PowerModes.Resume) { // When resuming from suspend, examine each check to find out if it was // scheduled to be executed during the time period when the systems was // suspended. Add them to the paused checks list, to be executed almost // immediately. // Make sure the list is empty to start. pausedChecks.Clear(); foreach (Check check in Checks) { if (check.Enabled && check.Server.Enabled && check.NextRunTime < DateTime.Now) { pausedChecks.Add(check.Id); } } // Wait 20 seconds to give things time to quiet down after resuming. await Task.Delay(20000); suspend = false; Run(); } } /// <summary>Unregister system events when exiting.</summary> private void Application_ApplicationExit(object sender, EventArgs e) { NetworkChange.NetworkAddressChanged -= NetworkChange_NetworkAddressChanged; SystemEvents.PowerModeChanged -= SystemEvents_PowerModeChanged; } /// <summary>Attempts to read a private file.</summary> /// <param name="path">The path to the private key file.</param> /// <param name="password">The password used to encrypt the key.</param> /// <returns>A status indicating the result of the attempt.</returns> public KeyStatus OpenPrivateKey(string path, string password = null) { KeyStatus keyStatus; if (path == null) keyStatus = KeyStatus.NotAccessible; // Check if the key has already been open and read. if (privateKeys.TryGetValue(path, out PrivateKeyFile key) && key != null) keyStatus = KeyStatus.Open; else { try { key = new PrivateKeyFile(path, password); keyStatus = KeyStatus.Open; } // If the key is encrypted and the password is empty or incorrect, // return the NeedPassword status. catch (Exception e) when (e is SshPassPhraseNullOrEmptyException || e is InvalidOperationException) { keyStatus = KeyStatus.NeedPassword; } // For any other failure reason, return the NotAccessible status. catch (Exception) { keyStatus = KeyStatus.NotAccessible; } } // A single private key may be used by multiple servers. Update all servers // that use this private key with the results of the above operations. foreach (Server server in Servers) { if (server.KeyFile == path) { server.PrivateKeyFile = key; server.KeyStatus = keyStatus; } } // Keep a reference to this private key so we don't have to re-open // it later if the same key is used on a different server. privateKeys[path] = key; return keyStatus; } /// <summary>Generates internal IDs for servers and checks.</summary> private void GenerateIds() { if (Servers.Any()) { // Start at the maximum ID to make sure IDs are not reused // if a server was deleted so old log entries do not get associated // with a new server. int id = Servers.Max(s => s.Id); foreach (Server server in Servers) { if (server.Id == 0) server.Id = ++id; } } if (Checks.Any()) { // Start with the max check ID, same reasons as above. // Is there a reason this is stored in a setting? int id = Math.Max(Settings.Default.MaxCheckId, Checks.Max(c => c.Id)); foreach (Check check in Checks) { if (check.Id == 0) check.Id = ++id; } Settings.Default.MaxCheckId = id; Settings.Default.Save(); } } /// <summary>Creates an XML serializer that can handle servers and all check types.</summary> /// <returns>An XML serializer that can handle servers and all check types.</returns> private XmlSerializer CreateXmlSerializer() { return new XmlSerializer(typeof(List<Server>), Check.CheckTypes); } } /// <summary>Event arguments for when a check status changes.</summary> public class CheckStatusChangedEventArgs : EventArgs { /// <summary>The check whose status changed.</summary> public Check Check { get; private set; } /// <summary>The check result that caused the status to change, if any.</summary> public CheckResult CheckResult { get; private set; } public CheckStatusChangedEventArgs(Check check, CheckResult result) { Check = check; CheckResult = result; } } /// <summary>Possible actions that may be taken when a check fails.</summary> public enum FailAction { /// <summary>Flashes the Server Monitor tasbar program icon.</summary> FlashTaskbar = 0, /// <summary>Shows a balloon in the notification area.</summary> NotificationBalloon = 1, /// <summary>Take no action.</summary> None = 10 } }