/** Enterprise Control Configuration and Logging Copyright (C) 2012 Free Software Foundation, Inc. Written by: Richard Frith-Macdonald Date: Febrary 2010 Originally developed from 1996 to 2012 by Brainstorm, and donated to the FSF. This file is part of the GNUstep project. This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation; either version 3 of the License, or (at your option) any later version. This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with this library; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111 USA. */ #import #import #if GS_USE_GNUTLS #import #endif #import "EcProcess.h" #import "EcAlarm.h" #import "EcClientI.h" #import "EcHost.h" #import "NSFileHandle+Printf.h" #import "config.h" #define DLY 300.0 #define FIB 0.1 static NSCalendarDate * date(NSTimeInterval t) { NSCalendarDate *d; d = [NSCalendarDate dateWithTimeIntervalSinceReferenceDate: t]; [d setCalendarFormat: @"%Y-%m-%d %H:%M:%S.%F %z"]; return d; } static BOOL restartStatus(int terminationStatus) { switch (terminationStatus) { case -1: case -5: return YES; default: return NO; } } static const NSTimeInterval day = 24.0 * 60.0 * 60.0; static int tStatus = 0; static NSTimeInterval pingTime = 240.0; static int comp_len = 0; static int comp(NSString *s0, NSString *s1) { if ([s0 length] > [s1 length]) { comp_len = -1; return -1; } if ([s1 compare: s0 options: NSCaseInsensitiveSearch|NSLiteralSearch range: NSMakeRange(0, [s0 length])] == NSOrderedSame) { comp_len = [s0 length]; if (comp_len == (int)[s1 length]) { return 0; } else { return 1; } } else { comp_len = -1; return -1; } } static BOOL matchCmd(NSString *word, NSString *reference, NSArray *allow) { if (comp(word, reference) < 0) { return NO; } if (nil == allow || [allow containsObject: reference]) { return YES; } return NO; } static NSString* cmdWord(NSArray* a, unsigned int pos) { if (a != nil && [a count] > pos) { return [a objectAtIndex: pos]; } else { return @""; } } static BOOL debug = YES; static NSTimeInterval quitTime = 120.0; /* When this control process needs to shut down *all* clients, * we set the date for the shutdown to end. */ static NSDate *terminateBy = nil; static NSUInteger launchLimit = 0; static BOOL launchEnabled = NO; static NSMutableDictionary *launchInfo = nil; static NSArray *launchOrder = nil; static NSMutableArray *launchQueue = nil; typedef enum { ACLaunchFailed, // Must be first ACProcessHung, ACProcessLost // Must be last } AlarmCode; static void ACStrings(AlarmCode ac, NSString **problem, NSString **repair) { *problem = nil; *repair = nil; switch (ac) { case ACLaunchFailed: *problem = @"Launch failed"; *repair = @"Check logs and correct launch/startup failure"; break; case ACProcessHung: *problem = @"Process hung"; *repair = @"Check logs and deal with cause of unresponsiveness"; break; case ACProcessLost: *problem = @"Process lost"; *repair = @"Check logs and deal with cause of shutdown/crash"; break; } } /* The desired state of a LaunchInfo object says whether the corresponding * process has a particular state to which it should be returned. * The initial state is determined by the configuration ffor the process, * but that may be changed either by instruction (eg the Console commands) * or by a configuration change. */ typedef enum { None = 0, // The process is free to start/stop Dead, // The process should be stopped if it is live Live // The process should be started if it is dead } Desired; static NSString * desiredName(Desired state) { switch (state) { case Live: return @"Live"; case Dead: return @"Dead"; default: return @"None"; } } /** * starting means that the server has attempted to start the process * (or is waiting for some precondition of launching) but the * process has not yet established a connection to the server * and registered itself. * stopping means that the server has attempted to shut down the process * (or the process has told the server it is shutting down), so * the connection between the process and the server may not * exist (and should not be used). * desired defines whether the steady state of this process should * be running/live or shut down. When the process reaches * a steady state (ie is not starting or stopping) that does * not match the desired state, the server shold initiate a * change of state. * identifier If the process is shut down, this should be zero. * Otherwise it is the process ID used by the operating system * and indicates that the process is starting, stopping, or in * a steady live state (in which case it should also be * connected as a client of the server). * * If while starting, the process dies, we should schedule relaunches at * increasing intervals until a process survives and connects. * If starting takes too long (because launch attempts fail, the processes * die, or they stay alive but fail to connect to the server), * we should raise an alarm. * If stopping takes too long, we should forcibly terminate the process * if we can, and raise an alarm if we fail to kill it. */ @interface LaunchInfo : NSObject { /** The name of this process */ NSString *name; /** The configuration from Control.plist */ NSDictionary *conf; /** The current process ID (or zero if there isn't one). * This is set when we launch the process or when the process is launched * externally and connects and registers itself with the Command server. */ int identifier; /** The current task (if launched by us). */ NSTask *task; /** The client instance representing a registered distributed objects * connection from the process into the Command server. */ EcClientI *client; /* Set at the point when a previously registered process is shutting * down and drops the connection to the Command server. It indicates * an unintentional shutdown of the process (a failure). */ NSTimeInterval clientLostDate; /* Set at the point when a previously registered process is shutting * down and cleanly unregisters with the Command server. It indicates * an intentional shutdown of the process. */ NSTimeInterval clientQuitDate; NSTimeInterval fib0; // fibonacci sequence for delays NSTimeInterval fib1; // fibonacci sequence for delays /** Records the desired state for this process (usually when a command * is issued at the Console). This is needed for situations where the * system can no respond immediately to an instruction. For instance * the process is shutting down and a start command is given: we must * continue to complete the clean shutdown, and then start up again. */ Desired desired; // If process *should* be live/dead /** The timestamp at which the current startup operation began, or zero * if the process is not currently starting. */ NSTimeInterval startingDate; /** A timer to progress the startup process. When it fires the -starting: * method is called to check on progress and raise an alarm if startup is * taking too long. */ NSTimer *startingTimer; // Not retained /* A flag set, during the startup process, if an alarm has been raised * because startup is taking too long. This prevents re-raising of the * alarm. */ BOOL startingAlarm; /** A timestamp set if an alarm has been raised because the process is not * responding to pings. This is cleared if/when the process re-registers. */ NSTimeInterval hungDate; /** A timestamp set when a ping response is received. */ NSTimeInterval pingDate; /** The timestamp at which the process began shutting down, or zero * if the process is not currently stopping. */ NSTimeInterval stoppingDate; /** A timer to progress the stopping process. When it fires the -stopping: * method is called to check on progress and, if stopping has taken too long, * attempt to forcibly terminate the process. */ NSTimer *stoppingTimer; // Not retained /** When a process termination is detected, this variable records it. */ NSTimeInterval terminationDate; /** If, during startup, the process terminates and has to be relaunched, * we record the count of attempts here. */ unsigned terminationCount; /** Where the Command server launched the process and is able to get the * process termination status, these variables are used to record it. */ int terminationSignal; // Last exit signal int terminationStatus; // Last exit status BOOL terminationStatusKnown; /** The timestamp at which the process registered with the Command server. * or zero if it has not registered. */ NSTimeInterval registrationDate; // Time of process registration /** The timestamp at which the process told the Command server it had * completely awakend (was ready to handle requests) or zero if it has * not woken. */ NSTimeInterval awakenedDate; // When the process was awakened /** If there is a problem causing processes to fail repeatedly and autostart * to retry, we impose a slightly longer delay between each successive * relaunch. In that case the deferredDate tells us when the queued * starting process can next be launched. */ NSTimeInterval deferredDate; // Deferred re-launch interval /** If a starting process cannt be launched immediately, this records the * timestamp at which it was added to the queue of processes awaiting launch. */ NSTimeInterval queuedDate; // When queued for launch /** Once a process has been active for a while it is considered stable. * A stable process will, if it terminates without shutting down cleanly, * be elegible for immediate autolaunch. */ NSTimeInterval stableDate; // Has been running for a while /* On process registration this is set to the timestamp at which the * process may be considered stable. Normally this is a short while * after the startup, but if the process was lost it will be set to * a later date. */ NSTimeInterval nextStableDate; /** The timestamp at which we last launched this process. */ NSTimeInterval launchDate; // When we launched process /** The timestamp at which we will aborting a process (if it fails to shut * down as quickly as we need it to). */ NSTimeInterval abortDate; // When we abort process /** If we want the process to restart, this reason is why, and is passed * to the process so that it can log why it was restarted. */ NSString *restartReason; // Reason for restart or nil /** Records the reason we desire the process to be started. */ NSString *startedReason; /** Records the reason we desire the process to be stopped. */ NSString *stoppedReason; /** Records the names of other processes which must be active in order for * this process to work. Any attempt to start this process will result in * it remaining in a queue of starting processes until all the dependencies * have been met. */ NSArray *dependencies; /** Records the alarms currently raised for this process. */ NSMutableArray *alarms; /* Flag used for a manualy stopped process to show that autolaunch is * no longer to be done. */ BOOL manual; /* Flag to detect recursive call to -starting: */ BOOL inStarting; } + (NSString*) description; + (LaunchInfo*) existing: (NSString*)name; + (LaunchInfo*) find: (NSString*)abbreviation; + (LaunchInfo*) launchInfo: (NSString*)name; + (NSUInteger) launching; + (NSArray*) names; + (void) processQueue; + (void) remove: (NSString*)name; /** Adds an alarm to the list raised (removes if a clear is passed in) * creating the array if it does not exist (even if a clear is passed in). * This behavior ensures that the -alarms method returns nil if neither * raises nor clears have taken place, but an array otherwise. */ - (void) alarm: (EcAlarm*)alarm; /** Returns the alarms raised for this process, an empty array if none are * currently raised, and nil if this is not known (eg if the process raised * and alarm and then failed to clear the alarm when restarting). */ - (NSArray*) alarms; - (BOOL) autolaunch; - (void) awakened; - (BOOL) checkActive; - (BOOL) checkProcess; - (void) clearClient: (EcClientI*)c cleanly: (BOOL)unregisteredOrTransient; - (void) clearHung; - (EcClientI*) client; - (NSDictionary*) configuration; - (NSTimeInterval) delay; - (Desired) desired; - (BOOL) disabled; - (NSTimeInterval) hungDate; - (BOOL) isActive; - (BOOL) isStarting; - (BOOL) isStopping; - (BOOL) launch; - (BOOL) manual; - (BOOL) mayBecomeStable; - (BOOL) mayCoreDump; - (NSString*) name; - (int) processIdentifier; - (void) progress; - (NSString*) reasonToPreventLaunch; - (void) resetDelay; - (void) setClient: (EcClientI*)c; - (void) setConfiguration: (NSDictionary*)c; - (void) setDesired: (Desired)state; - (void) setHung; - (void) setManual: (BOOL)f; - (void) setPing; - (void) setProcessIdentifier: (int)p; - (void) setStable: (BOOL)s; - (void) setTerminationStatus: (int)s; - (BOOL) stable; /** Initiates the startup of a process. This will either add the receiver to * the queue of processes to be started (if it can't be started immediately) * or launch the process using the configuration set for it. The startup * will then continue until the launched process registers itself with the * Command server, at which point the -started method will be called. */ - (void) start; /** Initiates the startup of a process for the supplied reason, but only if * it makes sense. This method performs the checks to see if the startup * should take place, then calls -start if it should. */ - (void) start: (NSString*)reason; /** Called automatically when startup of a process completes, either as a * result of an internal -start or as a result of an externally launched * process connectin to and registering with the Command server. */ - (void) started; /** internal timer mathos for handling the progression of a startup. If the * startup takes too long, this method will raise an alarm. */ - (void) starting: (NSTimer*)t; /** Returns a human readble description of the current process status. */ - (NSString*) status; /** Initiates the shut down of a process. This will use the DO connection to * a registered process to tell it to shut itself down. * The shutdown will continue until the process no longer exists, but if it * goes on longer than the time limit, the process will be killed. */ - (void) stop; /** Initiates stopping of a process only if it makes sense. This method * performs checks and, if the process can be stopped, calls the -stop * method to do it. This sets the desired state of the process to Dead. */ - (void) stop: (NSString*)reason; /** Called at the point when a stopping process finally ceases to exist. */ - (void) stopped; - (void) stopping: (NSTimer*)t; - (NSTask*) task; - (NSArray*) unfulfilled; @end /* Special configuration options are: * * CompressDebugAfter * A positive integer number of days after which debug should be compressed * defaults to 7. * * CompressLogsAfter * A positive integer number of days after which logs should be compressed * defaults to 7. * * DeleteDebugAfter * A positive integer number of days after which debug should be deleted. * Constrained to be at least as large as CompressDebugAfter. * Defaults to 90, but debug may still be deleted as if this were set * to CompressDebugAfter if NodesFree or SpaceFree is reached. * * DeleteLogsAfter * A positive integer number of days after which logs should be deleted. * Constrained to be at least as large as CompressLogsAfter. * Defaults to 180, but logs may still be deleted as if this were set * to CompressLogsAfter if NodesFree or SpaceFree is reached. * * SetE * A dictionary setting the default environment for launched processes. * * AddE * A dictionary overriding parts of the default environment for launched * processes. * * Launch * A dictionary describing the processes which the server is responsible * for launching. * * NodesFree * A string giving a percentage of the total nodes on the disk below * which an alert should be raised. Defaults to 10. * Minimum 2, Maximum 90. * * SpaceFree * A string giving a percentage of the total space on the disk below * which an alert should be raised. Defaults to 10. * Minimum 2, Maximum 90. * */ @interface EcCommand : EcProcess { NSString *host; id control; NSMutableArray *clients; NSTimer *timer; NSString *logname; NSMutableDictionary *config; NSDictionary *environment; NSTimer *terminating; NSDate *outstanding; unsigned fwdSequence; unsigned revSequence; float nodesFree; float spaceFree; NSTimeInterval debUncompressed; NSTimeInterval debUndeleted; NSTimeInterval logUncompressed; NSTimeInterval logUndeleted; NSInteger debCompressAfter; NSInteger debDeleteAfter; NSInteger logCompressAfter; NSInteger logDeleteAfter; BOOL sweeping; } - (void) alarmCode: (AlarmCode)ac procName: (NSString*)name addText: (NSString*)additional; - (void) auditState: (LaunchInfo*)l reason: (NSString*)additional; - (void) clear: (EcAlarm*)alarm; - (void) clearAll: (NSString*)name addText: (NSString*)additional; - (void) clearCode: (AlarmCode)ac procName: (NSString*)name addText: (NSString*)additional; - (oneway void) cmdGnip: (id )from sequence: (unsigned)num extra: (NSData*)data; - (oneway void) cmdPing: (id )from sequence: (unsigned)num extra: (NSData*)data; - (oneway void) cmdQuit: (NSInteger)sig; - (void) command: (NSData*)dat to: (NSString*)t from: (NSString*)f; - (NSData *) configurationFor: (NSString *)name; - (BOOL) connection: (NSConnection*)ancestor shouldMakeNewConnection: (NSConnection*)newConn; - (id) connectionBecameInvalid: (NSNotification*)notification; - (NSDictionary*) environment; - (NSArray*) findAll: (NSArray*)a byAbbreviation: (NSString*)s; - (EcClientI*) findIn: (NSArray*)a byAbbreviation: (NSString*)s; - (EcClientI*) findIn: (NSArray*)a byName: (NSString*)s; - (EcClientI*) findIn: (NSArray*)a byObject: (id)s; - (NSString*) host; - (void) housekeeping: (NSTimer*)t; - (void) _housekeeping: (NSTimer*)t; - (void) hungRestart: (LaunchInfo*)l; - (void) information: (NSString*)inf from: (NSString*)s type: (EcLogType)t; - (void) information: (NSString*)inf from: (NSString*)s to: (NSString*)d type: (EcLogType)t; - (void) killAll; - (NSFileHandle*) logFile; - (void) logChange: (NSString*)change for: (NSString*)name; - (void) logMessage: (NSString*)msg type: (EcLogType)t for: (id)o; - (void) logMessage: (NSString*)msg type: (EcLogType)t name: (NSString*)c; - (NSString*) makeSpace; - (void) newConfig: (NSMutableDictionary*)newConfig; - (NSFileHandle*) openLog: (NSString*)lname; - (void) pingControl; - (void) quitAll; - (void) requestConfigFor: (id)c; - (NSData*) registerClient: (id)c identifier: (int)p name: (NSString*)n transient: (BOOL)t; - (void) removeClient: (EcClientI*)o cleanly: (BOOL)ok; - (void) reply: (NSString*) msg to: (NSString*)n from: (NSString*)c; - (void) terminate: (NSDate*)by; - (void) _terminate: (NSTimer*)t; - (NSMutableArray*) unconfiguredClients; - (void) unregisterByObject: (byref id)obj status: (int)s; - (void) update; - (void) updateConfig: (NSData*)data; - (void) woken: (id)obj; @end @implementation LaunchInfo + (NSString*) description { NSEnumerator *e = [launchInfo objectEnumerator]; LaunchInfo *l; unsigned autolaunch = 0; unsigned disabled = 0; unsigned launchable = 0; unsigned starting = 0; unsigned stopping = 0; unsigned suspended = 0; unsigned alive = 0; while (nil != (l = [e nextObject])) { if ([l isStarting]) { starting++; } else if ([l isStopping]) { stopping++; } else if ([l processIdentifier] > 0) { alive++; } else if ([l disabled]) { disabled++; } else { if ([l autolaunch]) { if (Dead == l->desired || [l manual]) { suspended++; } else { autolaunch++; } } else { launchable++; } } } return [NSString stringWithFormat: @"LaunchInfo alive:%u, starting:%u," @" stopping:%u disabled:%u, suspended:%u, launchable:%u (auto:%u)\n", alive, starting, stopping, disabled, suspended, launchable, autolaunch]; } + (LaunchInfo*) existing: (NSString*)name { LaunchInfo *l = RETAIN([launchInfo objectForKey: name]); return AUTORELEASE(l); } + (LaunchInfo*) find: (NSString*)abbreviation { LaunchInfo *l = [launchInfo objectForKey: abbreviation]; if (nil == l) { NSEnumerator *e = [launchInfo keyEnumerator]; NSString *s; NSInteger bestLength = 0; NSString *bestName = nil; while (nil != (s = [e nextObject])) { if (comp(abbreviation, s) == 0) { bestName = s; break; } if (comp_len > bestLength) { bestLength = comp_len; bestName = s; } } if (bestName != nil) { l = [launchInfo objectForKey: bestName]; } } return l; } + (LaunchInfo*) findTask: (NSTask*)t { LaunchInfo *l = nil; NSEnumerator *e = [launchInfo objectEnumerator]; while (nil != (l = [e nextObject])) { if (l->task == t) { return l; } } return nil; } + (void) initialize { if (nil == launchInfo) { launchInfo = [NSMutableDictionary new]; launchQueue = [NSMutableArray new]; } } + (LaunchInfo*) launchInfo: (NSString*)name { LaunchInfo *l = [launchInfo objectForKey: name]; if (nil == RETAIN(l)) { l = [self new]; l->desired = None; ASSIGNCOPY(l->name, name); [launchInfo setObject: l forKey: l->name]; } return AUTORELEASE(l); } /* Return the number of processes actually launching (ie with a task running * but starting not finished. */ + (NSUInteger) launching { NSUInteger found = 0; ENTER_POOL NSEnumerator *e = [launchInfo objectEnumerator]; LaunchInfo *l; while (nil != (l = [e nextObject])) { if ([l isStarting] && l->task != nil) { found++; } } LEAVE_POOL return found; } + (NSArray*) names { return [launchInfo allKeys]; } /* Check each process in the queue to see if it may now be launched. * Launch each process which may do so (removing it from the queue). */ + (void) processQueue { ENTER_POOL NSUInteger count; /* We work with a copy of the queue in case the process of launching * causes the queue contents to be changed. */ if ((count = [launchQueue count]) > 0) { NSArray *q = AUTORELEASE([launchQueue copy]); NSUInteger index; for (index = 0; index < count; index++) { LaunchInfo *l = [q objectAtIndex: index]; if ([launchQueue containsObject: l]) { NSString *r = [l reasonToPreventLaunch]; if (nil == r) { [launchQueue removeObject: l]; l->queuedDate = 0.0; [l starting: nil]; } } } } LEAVE_POOL } + (void) remove: (NSString*)name { LaunchInfo *l = [launchInfo objectForKey: name]; if (l != nil) { /* Detach the removed object from its client, destroy the task, * and cancel timers/notifications so that the removed object * will not try to manage anything before it is deallocated. */ [[NSNotificationCenter defaultCenter] removeObserver: l]; l->client = nil; [l taskCleanup: l->task]; [l->startingTimer invalidate]; l->startingTimer = nil; [l->stoppingTimer invalidate]; l->stoppingTimer = nil; [launchInfo removeObjectForKey: name]; } } - (NSTimeInterval) abortDateFromStoppingDate: (NSTimeInterval)stopping { NSTimeInterval when = stopping; NSTimeInterval ti = 0.0; NSString *s; s = [[self configuration] objectForKey: @"QuitTime"]; if ([s respondsToSelector: @selector(intValue)]) { ti = (NSTimeInterval)[s intValue]; } if (ti <= 0.0) { ti = quitTime; } when += ti; if (terminateBy) { ti = [terminateBy timeIntervalSinceReferenceDate]; if (ti < when) { when = ti; } } return when; } - (void) alarm: (EcAlarm*)alarm { NSUInteger index; if (nil == alarms) { alarms = [NSMutableArray new]; } index = [alarms indexOfObject: alarm]; if (EcAlarmSeverityCleared == [alarm perceivedSeverity]) { if (NSNotFound != index) { [alarms removeObjectAtIndex: index]; } } else { if (NSNotFound == index) { [alarms addObject: alarm]; } else { [alarms replaceObjectAtIndex: index withObject: alarm]; } } } - (NSArray*) alarms { return AUTORELEASE([alarms copy]); } - (BOOL) autolaunch { return [[conf objectForKey: @"Auto"] boolValue]; } - (void) awakened { awakenedDate = [NSDate timeIntervalSinceReferenceDate]; } /* Check to see if there is an active process connected (or which connects * when we contact it and ask it to). */ - (BOOL) checkActive { if ([self hungDate] > 0.0 && identifier > 0) { /* A hung process may be considered active but there is no point * trying to send a DO message to communicate with it and ask it * to respond to us, because 'hung' means that we have a DO connection * to the process but it's not talking to us over it. */ return YES; } if (nil == client) { ENTER_POOL /* When the Command server starts up, or on other rare occasions, we may * have processes which are running but unknown to the Command server. * To handle that we try, as part of the launch process, to establish a * Distributed Objects connection to a process before we try to launch * the executable. * If a DO connection is established, we ask the process to reconnect to * the command server, and that incoming connection will cause the client * ivar to be set (and the instance ivar to be set to the process ID), so * we know we don't have to start a subtask. */ NS_DURING { NSConnection *c; c = [NSConnection connectionWithRegisteredName: name host: @"" usingNameServer: [NSSocketPortNameServer sharedInstance]]; NS_DURING { id proxy; /* Do not hang waiting for the other end to respond. */ [c setRequestTimeout: 10.0]; [c setReplyTimeout: 10.0]; proxy = (id)[c rootProxy]; /* Sending an ecReconnect message to the client process should * result in our 'client' ivar being set. */ [proxy ecReconnect]; [c setRequestTimeout: 0.0]; [c setReplyTimeout: 0.0]; } NS_HANDLER { NSLog(@"Problem with reconnect for %@: %@", name, localException); if ([c isValid]) { [c setRequestTimeout: 0.0]; [c setReplyTimeout: 0.0]; [c invalidate]; } } NS_ENDHANDLER } NS_HANDLER { NSLog(@"Problem with connection for %@: %@", name, localException); } NS_ENDHANDLER LEAVE_POOL } return (nil == client) ? NO : YES; } - (BOOL) checkProcess { if (identifier > 0) { if (kill(identifier, 0) == 0) { return YES; } else { /* Process has terminated. */ identifier = 0; } } return NO; } - (void) clearClient: (EcClientI*)c cleanly: (BOOL)unregisteredOrTransient { if (client != c) { if (debug) { NSLog(@"-clearClient: %p cleanly: %s when client is %p at %@", c, (unregisteredOrTransient ? "YES" : "NO"), client, [NSThread callStackSymbols]); } return; } DESTROY(client); if (unregisteredOrTransient) { clientQuitDate = [NSDate timeIntervalSinceReferenceDate]; clientLostDate = 0.0; } else { clientQuitDate = 0.0; clientLostDate = [NSDate timeIntervalSinceReferenceDate]; } registrationDate = 0.0; awakenedDate = 0.0; stableDate = 0.0; } - (void) clearHung { if (hungDate != 0.0) { hungDate = 0.0; /* A process which is no longer hung may become operationally * stable after a short period. */ nextStableDate = [NSDate timeIntervalSinceReferenceDate] + 10.0; if (clientLostDate > 0.0) { nextStableDate += 250.0; } } } - (EcClientI*) client { return client; } - (NSDictionary*) configuration { return AUTORELEASE(RETAIN(conf)); } - (void) dealloc { [[NSNotificationCenter defaultCenter] removeObserver: self]; [startingTimer invalidate]; [stoppingTimer invalidate]; RELEASE(alarms); RELEASE(startedReason); RELEASE(stoppedReason); RELEASE(restartReason); RELEASE(dependencies); RELEASE(client); RELEASE(name); RELEASE(conf); if (task) { [self taskCleanup: task]; } [super dealloc]; } /* The next delay for launching this process. If Time is configured, * we use it (a value in seconds), otherwise we generate a fibonacci * sequence of increasingly larger delays each time a launch attempt * needs to be made. */ - (NSTimeInterval) delay { NSTimeInterval delay; NSString *t; if (nil != (t = [conf objectForKey: @"Time"]) && [t doubleValue] > 0) { delay = [t doubleValue]; } else { if (fib1 <= 0.0) { fib0 = fib1 = delay = FIB; } else { delay = fib0 + fib1; fib0 = fib1; fib1 = delay; } } deferredDate = [NSDate timeIntervalSinceReferenceDate] + delay; return delay; } - (NSString*) description { NSMutableString *m = [[super description] mutableCopy]; NSString *status = [self status]; if (identifier > 0) { [m appendFormat: @" for process '%@' (pid:%d)\n", name, identifier]; } else { [m appendFormat: @" for process '%@'\n", name]; } if (startingDate > 0.0) { [m appendFormat: @" Starting since %@ next check at %@\n", date(startingDate), [startingTimer fireDate]]; } if (queuedDate > 0.0) { [m appendFormat: @" Queued to launch since %@\n", date(queuedDate)]; } if (launchDate > 0.0) { [m appendFormat: @" Launched at %@\n", date(launchDate)]; } if (registrationDate > 0.0) { [m appendFormat: @" Registered since %@\n", date(registrationDate)]; if (awakenedDate > 0.0) { [m appendFormat: @" Awakened since %@\n", date(awakenedDate)]; } if (stableDate > 0.0) { [m appendFormat: @" Stable since %@\n", date(stableDate)]; } else if (nextStableDate > 0.0) { [m appendFormat: @" Will be considered stable at %@\n", date(nextStableDate)]; } } if (hungDate > 0.0) { [m appendFormat: @" Unresponsive since %@\n", date(hungDate)]; } if (clientLostDate > 0.0) { [m appendFormat: @" Last lost/crashed at %@\n", date(clientLostDate)]; } if (clientQuitDate > 0.0) { [m appendFormat: @" Last unregistered at %@\n", date(clientQuitDate)]; } if (pingDate > 0.0) { [m appendFormat: @" Last ping response at %@\n", date(pingDate)]; } if (stoppingDate > 0.0) { [m appendFormat: @" Stopping since %@ next check at %@\n", date(stoppingDate), [stoppingTimer fireDate]]; } [m appendFormat: @" %@\n", status]; [m appendFormat: @" %@\n", conf]; return AUTORELEASE(m); } - (Desired) desired { return desired; } - (BOOL) disabled { return [[conf objectForKey: @"Disabled"] boolValue]; } - (NSTimeInterval) hungDate { return hungDate; } /* Returns YES if the client is in a state where it can be sent commands. */ - (BOOL) isActive { return (client != nil && NO == [self isStopping]) ? YES : NO; } - (BOOL) isStarting { return (startingDate > 0.0) ? YES : NO; } - (BOOL) isStopping { return (stoppingDate > 0.0) ? YES : NO; } static NSFileHandle* valgrindLog(NSString *name) { NSFileManager *mgr = [NSFileManager defaultManager]; NSString *base = [EcProc ecUserDirectory]; NSString *path; BOOL flag; base = [base stringByAppendingPathComponent: @"DebugLogs"]; base = [base stringByAppendingPathComponent: @"Valgrind"]; if ([mgr fileExistsAtPath: base isDirectory: &flag] == NO) { if ([mgr createDirectoryAtPath: base withIntermediateDirectories: YES attributes: nil error: NULL] == NO) { if ([mgr fileExistsAtPath: base isDirectory: &flag] == NO) { NSLog(@"Unable to create directory - %@", base); return nil; } } else { flag = YES; } } if (flag == NO) { NSLog(@"The path '%@' is not a directory", base); return nil; } path = [base stringByAppendingPathComponent: name]; path = [path stringByAppendingPathExtension: @"log"]; if ([mgr fileExistsAtPath: path]) { NSString *nxt = [path stringByAppendingPathExtension: @"99"]; NSString *old; [mgr removeFileAtPath: nxt handler: nil]; for (int i = 99; i > 0; i--) { old = nxt; nxt = [path stringByAppendingPathExtension: [NSString stringWithFormat: @"%d", i]]; [mgr movePath: nxt toPath: old handler: nil]; } old = [path stringByAppendingPathExtension: @"0"]; [mgr movePath: path toPath: old handler: nil]; } if ([mgr createFileAtPath: path contents: nil attributes: nil] == NO) { NSLog(@"Log file '%@' is not writable and can't be created", path); return nil; } return [NSFileHandle fileHandleForUpdatingAtPath: path]; } /* This method should only ever be called from the -starting: method (when * the instance has permission to launch). To initiate the startup process * the -start method is called, and to progress startup the -starting: method * is called. */ - (BOOL) launch { EcCommand *command = (EcCommand*)EcProc; NSUserDefaults *defs = [command cmdDefaults]; NSMutableDictionary *env; NSArray *vgArgs; NSString *vgPath; NSString *home = [conf objectForKey: @"Home"]; NSString *prog = [conf objectForKey: @"Prog"]; NSDictionary *addE = [conf objectForKey: @"AddE"]; NSDictionary *setE = [conf objectForKey: @"SetE"]; NSString *failed = nil; NSString *m; NSAssert(NO == [self checkProcess], NSInvalidArgumentException); if (YES == [self checkActive]) { return YES; // Client registered; no need to launch } /* ValgrindPath and ValgrindArgs may be used. The values from the launch * info take precedence over the generic values from the main config. * An explicit empty path setting in ther launch info disables valgrind * for this process. */ if (NO == [(vgArgs = [conf objectForKey: @"ValgrindArgs"]) isKindOfClass: [NSArray class]]) { vgArgs = [defs arrayForKey: @"ValgrindArgs"]; } if (NO == [(vgPath = [conf objectForKey: @"ValgrindPath"]) isKindOfClass: [NSString class]]) { vgPath = [defs stringForKey: @"ValgrindPath"]; } else if ([@"" isEqual: vgPath]) { /* Use of valgrind disabled. */ vgArgs = nil; vgPath = nil; } if (nil != vgArgs && nil == vgPath) { vgPath = @"valgrind"; } ENTER_POOL if (nil == (env = AUTORELEASE([[command environment] mutableCopy]))) { NSProcessInfo *pi = [NSProcessInfo processInfo]; if (nil == (env = AUTORELEASE([[pi environment] mutableCopy]))) { env = [NSMutableDictionary dictionary]; } } if (prog != nil && [prog length] > 0) { NS_DURING { NSMutableArray *args; NSMutableDictionary *defs; args = [NSMutableArray array]; defs = [NSMutableDictionary dictionary]; /* Insert valgrind stuff if necessary. */ if (vgPath != nil) { if ([vgArgs count] > 0) { [args addObjectsFromArray: vgArgs]; } [args addObject: prog]; prog = vgPath; } /* Add argument to tell EcProcess the official name it was * launched as. All other arguments are hidden by writing * them to stdin of the child process using a pipe. * NB. The program should *only* be launched with the LaunchAs * argument if it is also going to be provided with argument * data on stdin. */ [args addObject: @"-LaunchedAs"]; [args addObject: name]; if ([[conf objectForKey: @"Args"] isKindOfClass: [NSArray class]]) { NSArray *a = [conf objectForKey: @"Args"]; NSUInteger count = [a count]; NSUInteger index; NSString *key = nil; /* From the supplied arguments, key/value pairs of the form * representing usr defaults settings are placed in a dictionary * to be passed as hidden information, and the remainder of * the values are added to the list passed as process arguments. */ for (index = 0; index < count; index++) { id val = [a objectAtIndex: index]; if (key) { [defs setObject: val forKey: key]; key = nil; } else { if ([val length] > 1 && [val hasPrefix: @"-"] && ![val hasPrefix: @"--"]) { key = [val substringFromIndex: 1]; } else { [args addObject: val]; } } } } if (setE != nil) { [env removeAllObjects]; [env addEntriesFromDictionary: setE]; } if (addE != nil) { [env addEntriesFromDictionary: addE]; } if (task != nil) { NSLog(@"-launch called for %@ with task %@ already present at %@", self, task, [NSThread callStackSymbols]); DESTROY(task); } task = [NSTask new]; [task setArguments: args]; [task setEnvironment: env]; [task setLaunchPath: prog]; if ([home isKindOfClass: [NSString class]] && [(home = [home stringByTrimmingSpaces]) length] > 0) { NSFileManager *mgr = [NSFileManager defaultManager]; NSString *base = [command ecUserDirectory]; NSString *dir = home; BOOL ok = NO; base = [base stringByStandardizingPath]; dir = [dir stringByStandardizingPath]; if (NO == [dir isAbsolutePath]) { dir = [base stringByAppendingPathComponent: home]; dir = [dir stringByStandardizingPath]; } if ([mgr fileExistsAtPath: dir isDirectory: &ok] == NO || ok == NO) { NSLog(@"Failed to find path '%@' (Home '%@') for %@", dir, home, name); dir = nil; home = nil; } else { NSUInteger l = [base length]; if ([dir length] > l && [dir hasPrefix: base] && [dir characterAtIndex: l] == '\\') { NSString *original = home; home = [dir substringFromIndex: l + 1]; if (NO == [original isEqual: home]) { NSLog(@"Home = '%@' changed to '%@' for %@", original, home, name); } } else { NSLog(@"Home = '%@' can't be used for %@", home, name); home = nil; } } if (dir) { if ([mgr isWritableFileAtPath: dir]) { [task setCurrentDirectoryPath: dir]; } else { NSLog(@"Failed to write path '%@' (Home '%@') for %@", dir, home, name); } } } else { NSLog(@"Bad Home = '%@' ignored for %@", home, name); home = nil; } if ([task validatedLaunchPath] == nil) { failed = @"failed to launch (not executable)"; m = [NSString stringWithFormat: cmdLogFormat(LT_CONSOLE, @"failed to launch (not executable) %@"), name]; [command information: m from: nil to: nil type: LT_CONSOLE]; prog = nil; } if (prog != nil) { NSData *hiddenArguments; NSString *s; NSData *d; NSPipe *p; NSMutableData *m; uint32_t l; uint32_t b; /* As a convenience, the 'Home' option sets the -HomeDirectory * for the process. */ if ([home length] > 0) { [defs setObject: home forKey: @"HomeDirectory"]; } /* If we do not want the process to core-dump, we need to add * the user default to tell it. */ if ([self mayCoreDump] == NO) { [defs setObject: @"0" forKey: @"CoreSize"]; } #if GS_USE_GNUTLS #if !defined(TLS_DISTRIBUTED_OBJECTS) if ([[command cmdDefaults] boolForKey: @"EncryptedDO"]) #endif { NSDictionary *c; NSDictionary *s; NSDictionary *g; /* Try looking up options in the launch configuration */ c = [conf objectForKey: @"ClientOptionsForTLS"]; if (c && NO == [c isKindOfClass: [NSDictionary class]]) { [NSException raise: NSInvalidArgumentException format: @"invalid ClientOptionsForTLS"]; } s = [conf objectForKey: @"ServerOptionsForTLS"]; if (s && NO == [c isKindOfClass: [NSDictionary class]]) { [NSException raise: NSInvalidArgumentException format: @"invalid ServerOptionsForTLS"]; } g = [conf objectForKey: @"OptionsForTLS"]; if (g && NO == [g isKindOfClass: [NSDictionary class]]) { [NSException raise: NSInvalidArgumentException format: @"invalid OptionsForTLS"]; } if (nil == g && nil == c && nil == s) { /* No process options specified, try using any * options specfied for the Command server. */ c = [defs objectForKey: @"ClientOptionsForTLS"]; s = [defs objectForKey: @"ServerOptionsForTLS"]; g = [defs objectForKey: @"OptionsForTLS"]; } if (nil == c && nil != s) { [NSException raise: NSInvalidArgumentException format: @"missing ClientOptionsForTLS"]; } if (nil == s && nil != c) { [NSException raise: NSInvalidArgumentException format: @"missing OptionsForTLS"]; } if (g) { if (c) { [NSException raise: NSInvalidArgumentException format: @"both OptionsForTLS and" @" ClientOptionsForTLS"]; } if (s) { [NSException raise: NSInvalidArgumentException format: @"both OptionsForTLS and" @" ServerOptionsForTLS"]; } if (nil == [g objectForKey: GSTLSCertificateFile]) { [NSException raise: NSInvalidArgumentException format: @"missing %@ in OptionsForTLS", GSTLSCertificateFile]; } if (nil == [g objectForKey: GSTLSCertificateKeyFile]) { [NSException raise: NSInvalidArgumentException format: @"missing %@ in OptionsForTLS", GSTLSCertificateKeyFile]; } } else if (s) { if (nil == [s objectForKey: GSTLSCertificateFile]) { [NSException raise: NSInvalidArgumentException format: @"missing %@ in ServerOptionsForTLS", GSTLSCertificateFile]; } if (nil == [s objectForKey: GSTLSCertificateKeyFile]) { [NSException raise: NSInvalidArgumentException format: @"missing %@ in ServerOptionsForTLS", GSTLSCertificateKeyFile]; } } /* Enable encrypted DO if supported by the base library. * If TLS is supported, all Distributed Object communications * between our processes must be encrypted. Generally we can * use shared certificate/key passed to our subprocesses on * launch, so the subprocesses don't need to waste a lot of * CPU (and real) time generating their own keys. */ if (g) { [defs setObject: g forKey: @"OptionsForTLS"]; } else if (c && s) { [defs setObject: c forKey: @"ClientOptionsForTLS"]; [defs setObject: s forKey: @"ServerOptionsForTLS"]; } else { NSUserDefaults *defs = [command cmdDefaults]; NSMutableDictionary *opts; id opt; opts = [NSMutableDictionary dictionary]; /* If no certificate key was provided in the Command * server configuration, we use our self-signed * certificate key, generating it if necessary. */ opt = [defs objectForKey: GSTLSCertificateKeyFile]; if (nil == opt) { opt = [GSTLSObject dataForTLSFile: @"self-signed-key"]; if (nil == opt && [GSTLSCredentials respondsToSelector: @selector(selfSigned:)]) { (void)[GSTLSCredentials selfSigned: YES]; opt = [GSTLSObject dataForTLSFile: @"self-signed-key"]; } } if (opt) { [opts setObject: opt forKey: GSTLSCertificateKeyFile]; } /* If no certificate was provided in the Command server * configuration, we use our self-signed certificate, * generating it if necessary. */ opt = [defs objectForKey: GSTLSCertificateFile]; if (nil == opt) { opt = [GSTLSObject dataForTLSFile: @"self-signed-crt"]; } if (opt) { [opts setObject: opt forKey: GSTLSCertificateFile]; } /* Pass on the TLS debug settings from the Command * server configuration to the client. */ if ((opt = [defs objectForKey: GSTLSDebug]) != nil) { [opts setObject: opt forKey: GSTLSDebug]; } /* Pass on the TLS priority settings and other config * from the Command server to the client. */ if ((opt = [defs objectForKey: GSTLSPriority]) != nil) { [opts setObject: opt forKey: GSTLSPriority]; } if ((opt = [defs objectForKey: GSTLSCAFile]) != nil) { [opts setObject: opt forKey: GSTLSCAFile]; } if ((opt = [defs objectForKey: GSTLSRevokeFile]) != nil) { [opts setObject: opt forKey: GSTLSRevokeFile]; } if ((opt = [defs objectForKey: GSTLSVerify])) { [opts setObject: opt forKey: GSTLSVerify]; } [defs setObject: opts forKey: @"OptionsForTLS"]; } } #endif /* Now we need to make the key/value pairs into a serialised * form to be passed to the subprocess using a pipe. */ d = [NSPropertyListSerialization dataFromPropertyList: defs format: NSPropertyListBinaryFormat_v1_0 errorDescription: NULL]; l = [d length]; b = GSSwapHostI32ToBig(l); m = [NSMutableData dataWithCapacity: l + 4]; [m appendBytes: &b length: 4]; [m appendData: d]; hiddenArguments = m; p = [NSPipe pipe]; [task setStandardInput: p]; if (nil == vgPath) { static NSFileHandle *hdl = nil; if (nil == hdl) { ASSIGN(hdl, [NSFileHandle fileHandleWithNullDevice]); } s = [conf objectForKey: @"KeepStandardOutput"]; if (NO == [s respondsToSelector: @selector(boolValue)] || NO == [s boolValue]) { [task setStandardOutput: hdl]; } s = [conf objectForKey: @"KeepStandardError"]; if (NO == [s respondsToSelector: @selector(boolValue)] || NO == [s boolValue]) { [task setStandardError: hdl]; } } else { NSFileHandle *vgl = valgrindLog(name); if (vgl != nil) { [task setStandardOutput: vgl]; [task setStandardError: vgl]; } } /* Launching ... immediately after launch we write information * to the subtask so it can initialise itself. */ [[NSNotificationCenter defaultCenter] addObserver: self selector: @selector(taskTerminated:) name: NSTaskDidTerminateNotification object: task]; [[NSNotificationCenter defaultCenter] addObserver: self selector: @selector(taskWritten:) name: GSFileHandleWriteCompletionNotification object: [p fileHandleForWriting]]; launchDate = [NSDate timeIntervalSinceReferenceDate]; [task launch]; [[p fileHandleForWriting] writeInBackgroundAndNotify: hiddenArguments]; identifier = [task processIdentifier]; [[command logFile] printf: @"%@ launched %@ with %@ and hidden values for %@\n", [NSDate date], prog, args, [defs allKeys]]; } } NS_HANDLER { identifier = 0; [[NSNotificationCenter defaultCenter] removeObserver: self name: NSTaskDidTerminateNotification object: task]; launchDate = 0.0; [self taskCleanup: task]; failed = @"failed to launch"; m = [NSString stringWithFormat: cmdLogFormat(LT_CONSOLE, @"failed to launch (%@) %@"), localException, name]; [command information: m from: nil to: nil type: LT_CONSOLE]; } NS_ENDHANDLER } else { failed = @"bad program name to launch"; m = [NSString stringWithFormat: cmdLogFormat(LT_CONSOLE, @"bad program name to launch %@"), name]; [command information: m from: nil to: nil type: LT_CONSOLE]; } if (nil != failed) { startingAlarm = YES; [command alarmCode: ACLaunchFailed procName: name addText: failed]; } LEAVE_POOL return [self checkProcess]; // On failure return NO } - (NSDate*) launchDate { return (launchDate > 0.0) ? date(launchDate) : (NSDate*)nil; } - (BOOL) manual { return manual; } - (BOOL) mayCoreDump { if (fib1 > FIB) { /* If fib1 is greater than the base value, we must have already done * one restart due to a failure in launch or crash in early life. * We therefore want to suppress core dumps from subsequent crashes * in order to avoid filling the disk too quickly. */ return NO; } return YES; } /* Checks the receiver to see if it is eligible to be removed from the * queue and launched. If the receiver should not be in the queue it * is removed. */ - (NSString*) reasonToPreventLaunch { EcCommand *command = (EcCommand*)EcProc; NSArray *unfulfilled; NSString *reason = nil; if (NO == [self isStarting]) { if ([launchQueue containsObject: self]) { NSLog(@"Found object which is not starting in queue: %@", self); [launchQueue removeObject: self]; queuedDate = 0.0; } } else if ([self isStopping]) { if ([launchQueue containsObject: self]) { NSLog(@"Found object which is stopping in queue: %@", self); [launchQueue removeObject: self]; queuedDate = 0.0; } } else if (deferredDate > 0.0 && [NSDate timeIntervalSinceReferenceDate] < deferredDate) { reason = [NSString stringWithFormat: @"waiting for retry at %@", date(deferredDate)]; } else if ([command ecIsQuitting]) { reason = @"system shutting down"; } else if (terminateBy != nil) { reason = @"all servers shutting down"; } else if (NO == launchEnabled) { reason = @"launching suspended"; } else if (launchLimit > 0 && [LaunchInfo launching] >= launchLimit) { reason = [NSString stringWithFormat: @"%u launches in progress", (unsigned)launchLimit]; } else if ([(unfulfilled = [self unfulfilled]) count] > 0) { reason = [NSString stringWithFormat: @"waiting for %@", unfulfilled]; } return reason; } - (BOOL) mayBecomeStable { if (nextStableDate <= 0.0 || nextStableDate <= [NSDate timeIntervalSinceReferenceDate]) { return YES; } return NO; } - (NSString*) name { return name; } - (int) processIdentifier { return identifier; } /* Check the current state, and if it's not the same as the desired state * start moving towards that desired state (unless already moving). */ - (void) progress { if ([self isStarting]) { if (debug) { NSLog(@"-progress ignored (already starting) for %@", self); } } else if ([self isStopping]) { if (debug) { NSLog(@"-progress ignored (already stopping) for %@", self); } } else { switch (desired) { case Live: if (nil == client) { /* Possibly the client is already running (if the Command server * has just started) or has been started externally and not yet * connected ... if so see if we can get it to connect. */ if (NO == [self checkActive]) { [self start]; } else { NSLog(@"-progress ignored (already active) for %@", self); } } else { NSLog(@"-progress ignored (already live) for %@", self); } break; case Dead: if (client) { [self stop]; } else if (debug) { NSLog(@"-progress ignored (already dead) for %@", self); } break; case None: if ([self disabled]) { /* If the config says we are disabled, we should stop. */ if (YES == [self checkActive]) { [self stop]; } else if (debug) { NSLog(@"-progress none (disabled in config) for %@", self); } } else if (restartReason) { if (client) { [self stop]; } else if (NO == [self checkActive]) { ASSIGN(startedReason, @"restart"); [self start]; } else if (debug) { NSLog(@"-progress ignored (already active) for %@", self); } } else if (terminationStatusKnown && restartStatus(terminationStatus)) { /* The process exited with a status code saying it should * be restarted. */ ASSIGN(startedReason, @"restart"); if (NO == [self checkActive]) { [self start]; } else if (debug) { NSLog(@"-progress ignored (already active) for %@", self); } } else if ([self autolaunch] && ![self manual]) { /* The config says we autolaunch and the last process * did not shut down gracefully. * So we should autolaunch again. */ ASSIGN(startedReason, @"autolaunch"); if (NO == [self checkActive]) { [self start]; } else if (debug) { NSLog(@"-progress ignored (already active) for %@", self); } } else if (debug) { NSLog(@"-progress ignored (stable) for %@", self); } break; } } } - (void) resetDelay { deferredDate = 0.0; fib0 = fib1 = 0.0; } /* If a process is running, restart it. * This should NOT start processes which are not running. */ - (void) restart: (NSString*)reason { if (NO == [self checkActive]) { NSLog(@"-restart: '%@' ignored (not running) for %@", reason, self); return; } /* The process is not manually stopped. */ [self setManual: NO]; /* For a restart, the desired state cannot be Dead */ if (Dead == desired) { if ([self autolaunch]) { [self setDesired: Live]; } else { [self setDesired: None]; } } /* Setting a restartReason ensures that the process is restarted * once it has stopped. */ ASSIGNCOPY(restartReason, reason); if (NO == [self isStopping]) { ASSIGNCOPY(stoppedReason, reason); [self stop]; } } /* Try to start up the process if possible, setting the supplied reason */ - (void) start: (NSString*)reason { if ([self disabled]) { NSLog(@"-start: '%@' ignored (disabled) for %@", reason, self); return; } /* The process is not manually stopped */ [self setManual: NO]; if (Dead == desired) { if ([self autolaunch]) { [self setDesired: Live]; } else { [self setDesired: None]; } } if ([self isStarting]) { NSLog(@"-start: '%@' ignored (already starting) for %@", reason, self); } else if ([self isStopping]) { NSLog(@"-start: '%@' becomes a restart for %@", reason, self); [self restart: reason]; } else if ([self checkActive]) { NSLog(@"-start: '%@' ignored (already Live) for %@", reason, self); } else { ASSIGN(startedReason, reason); [self start]; } } - (void) started { EcCommand *command = (EcCommand*)EcProc; NSString *reason = AUTORELEASE(startedReason); if (debug) { NSLog(@"-started for %@ at %@", self, [NSThread callStackSymbols]); } startedReason = nil; if (nil == reason) { reason = AUTORELEASE(RETAIN(restartReason)); if (nil == reason) { reason = @"started externally"; } } DESTROY(restartReason); // Restart is complete terminationCount = 0; terminationDate = 0.0; terminationStatusKnown = NO; nextStableDate = [NSDate timeIntervalSinceReferenceDate] + 10.0; if (clientLostDate > 0.0) { nextStableDate += 250.0; } clientLostDate = 0.0; clientQuitDate = 0.0; [launchQueue removeObject: self]; [self clearHung]; pingDate = 0.0; queuedDate = 0.0; if ([self isStarting]) { /* The client has connected and registered itself ... startup of * this process has completed. Alarms will be cleared when the * new process becomes stable. */ [startingTimer invalidate]; startingTimer = nil; startingDate = 0.0; } [command auditState: self reason: reason]; if (Dead == desired) { /* It is not desired that this process should be running: * initiate a shutdown. */ [self stop]; } [self progress]; [LaunchInfo processQueue]; // Maybe we can launch more now } /* When process startup has completed and the client has registered itself * with the Command server, the registration process will call this method. * Here we should do all the work associated with completion of the startup * process. */ - (void) setClient: (EcClientI*)c { int newPid; NSAssert([c isKindOfClass: [EcClientI class]], NSInternalInconsistencyException); ASSIGN(client, c); newPid = [c processIdentifier]; if (task != nil && [task processIdentifier] != newPid) { /* This could happen if someone manually launches the process * before the Command server launches it. In that case we * need to cancel the handling of the task and depends on * the excess process to shut itself down. */ NSLog(@"LaunchInfo(%@) new pid(%d) from client connection" @" differs from old pid(%d) from task launch", name, [task processIdentifier], newPid); [[NSNotificationCenter defaultCenter] removeObserver: self name: NSTaskDidTerminateNotification object: task]; launchDate = 0.0; [self taskCleanup: task]; } registrationDate = [NSDate timeIntervalSinceReferenceDate]; identifier = newPid; [self started]; } - (void) setConfiguration: (NSDictionary*)c { BOOL wasDisabled = [self disabled]; BOOL wasAuto = [self autolaunch]; ASSIGNCOPY(conf, c); if ([self disabled]) { if (NO == wasDisabled) { EcCommand *command = (EcCommand*)EcProc; [command clearAll: name addText: @"process disabled in config"]; } if (desired != Dead) { [self setDesired: Dead]; } } else if ([self autolaunch]) { if (NO == wasAuto && desired != Live) { [self setDesired: Live]; } } else { if (Live == desired) { [self setDesired: None]; } } } - (void) setDesired: (Desired)state { Desired old = desired; desired = state; if (terminateBy != nil && desired != Dead) { desired = Dead; NSLog(@"-setDesired:Dead forced by termination in progress for %@", self); } else if (Live == desired && [self disabled]) { desired = Dead; NSLog(@"-setDesired:Live overridden by Disabled config of %@", self); } else if (None == desired && [self disabled]) { desired = Dead; NSLog(@"-setDesired:None overridden by Disabled config of %@", self); } if (old == desired) { NSLog(@"-setDesired:%@ unchanged for %@", desiredName(desired), self); if (terminateBy != nil && [self isStopping] && [[stoppingTimer fireDate] earlierDate: terminateBy] == terminateBy) { /* Force timer reset to match terminateBy */ [self stopping: nil]; } } else if ([self isStarting]) { NSLog(@"-setDesired:%@ deferred pending startup of %@", desiredName(desired), self); } else if ([self isStopping]) { NSLog(@"-setDesired:%@ deferred pending shutdown of %@", desiredName(desired), self); if (nil == stoppingTimer) { NSLog(@"stoppingTimer not set - attempt to proceeed with stopping"); [self stopping: nil]; } } else { if (Live == desired) { if (identifier > 0) { NSLog(@"-setDesired:Live when already started of %@", self); } else { NSLog(@"-setDesired:Live requests startup of %@", self); } } else if (Dead == desired) { if (identifier > 0) { NSLog(@"-setDesired:Dead requests shutdown of %@", self); } else { NSLog(@"-setDesired:Dead when already stopped of %@", self); } } } } - (void) setHung { if (hungDate <= 0.0) { hungDate = [NSDate timeIntervalSinceReferenceDate]; /* A hung process is not considered stable or becoming stable, since * stable means stably operational. */ [self setStable: NO]; nextStableDate = 0.0; } } - (void) setManual: (BOOL)f { manual = f; } - (void) setPing { pingDate = [NSDate timeIntervalSinceReferenceDate]; } - (void) setProcessIdentifier: (int)p { identifier = p; } - (void) setStable: (BOOL)s { if (NO == s) { stableDate = 0.0; } else if (s && 0.0 == stableDate) { stableDate = [NSDate timeIntervalSinceReferenceDate]; [self resetDelay]; } } - (void) setTerminationStatus: (int)s { /* Called when we are informed of the death of a process which is not a * subtask. A positive status is assumed to be a signal. */ if (nil == task) { terminationStatusKnown = YES; terminationDate = [NSDate timeIntervalSinceReferenceDate]; if (s < 0) { terminationSignal = 0; terminationStatus = s; } else { terminationSignal = s; terminationStatus = 0; } } } - (BOOL) stable { return stableDate > 0.0 ? YES : NO; } /* This method may be called to initiate startup of a server. * If called when a server is already starting or shutting down * it has no effect. */ - (void) start { if (debug) { NSLog(@"-start for %@ at %@", self, [NSThread callStackSymbols]); } if ([self isStarting]) { EcExceptionMajor(nil, @"-start when already starting of %@", self); } else if (startingTimer) { EcExceptionMajor(nil, @"-start when timer already set of %@", self); } else if (nil != client) { EcExceptionMajor(nil, @"-start when already active of %@", self); } else if (identifier > 0) { EcExceptionMajor(nil, @"-start when already alive of %@", self); } else { if (startingTimer != nil) { NSLog(@"-start called for %@ with timer already present at %@", self, [NSThread callStackSymbols]); } [self resetDelay]; if (terminationStatusKnown) { if (-3 == terminationStatus) // configuration error { /* Defer the actual launch for 5 minutes since a config error * requires human intervention and is unlikely to be fixed * very quickly. */ deferredDate = [NSDate timeIntervalSinceReferenceDate] + 300.0; } } startingAlarm = NO; startingDate = [NSDate timeIntervalSinceReferenceDate]; startingTimer = [NSTimer scheduledTimerWithTimeInterval: 0.01 target: self selector: @selector(starting:) userInfo: name repeats: NO]; } } - (BOOL) checkAbandonedStartup { BOOL abandon = NO; [self checkProcess]; if (NO == [self isStarting] || client != nil) { return NO; // Not starting } /* We will only abandon startup if we want the process to be dead */ if (Dead == desired) { if (0 == identifier) { abandon = YES; // No process, easy to abandon } else { NSTimeInterval now = [NSDate timeIntervalSinceReferenceDate]; if (now - launchDate >= 30.0) { abandon = YES; // One process taking too long to start } else if (now - startingDate >= 120.0) { abandon = YES; // Multiple attempts taking too long } } } if (YES == abandon) { /* Cleanup all the information associated with process startup since * we are not actually goinf to start the process. */ [startingTimer invalidate]; startingTimer = nil; startingDate = 0.0; [launchQueue removeObject: self]; if (identifier > 0) { if (task != nil) { [[NSNotificationCenter defaultCenter] removeObserver: self name: NSTaskDidTerminateNotification object: task]; launchDate = 0.0; [self taskCleanup: task]; } kill(identifier, SIGKILL); identifier = 0; } if (startingAlarm) { EcCommand *command = (EcCommand*)EcProc; startingAlarm = NO; [command clearAll: name addText: @"process manually stopped"]; } /* Being deliberately shut down before launch completed is equivalent * to a clean shutdown of a running process, so we must set variables * to let the -stopped method know that. * We call the -stopped method to do all the work associated with the * process end, and making it ready to start again is required. */ terminationStatusKnown = YES; terminationSignal = 0; terminationStatus = 0; [self stopped]; } return abandon; } - (void) starting: (NSTimer*)t { /* On entry t is either a one-shot timer which will automatically * be invalidated after the method completes, or nil (method called * explicitly, so the timer must be invalidated here). * Either way the timer is no longer valid and a new one will need * to be created unless startup has completed. */ if (debug) { NSLog(@"-starting: for %@ at %@", self, [NSThread callStackSymbols]); } if (inStarting) { return; // Re-entrant call ignored. } inStarting = YES; NS_DURING { EcCommand *command = (EcCommand*)EcProc; NSTimeInterval ti = 0.0; [startingTimer invalidate]; startingTimer = nil; if (NO == [self isStarting]) { EcExceptionMajor(nil, @"-starting: when not starting for %@", self); inStarting = NO; NS_VOIDRETURN; } if (client != nil) { EcExceptionMajor(nil, @"-starting: after registered for %@", self); inStarting = NO; NS_VOIDRETURN; } if ([self checkAbandonedStartup]) { inStarting = NO; NS_VOIDRETURN; } if (0 == identifier) { NSString *r = [self reasonToPreventLaunch]; if (nil == r) { /* We are able to launch now */ [launchQueue removeObject: self]; queuedDate = 0.0; terminationDate = 0.0; terminationStatusKnown = NO; if (NO == [self launch]) { ti = [self delay]; // delay between launch attempts [launchQueue addObject: self]; queuedDate = [NSDate timeIntervalSinceReferenceDate]; [command logChange: @"queued (launch failed)" for: name]; } else { if (client != nil) { inStarting = NO; NS_VOIDRETURN; // Connection established. } ti = 0.0; // Calculate the time to wait below [command logChange: @"launched" for: name]; } } else { BOOL alreadyQueued; NSTimeInterval now; alreadyQueued = [launchQueue containsObject: self]; now = [NSDate timeIntervalSinceReferenceDate]; if (deferredDate > 0.0 && now < deferredDate) { /* We are waiting for a retry at a specific time. * If we are not already queued, add to queue. */ ti = deferredDate - now; if (NO == alreadyQueued) { [launchQueue addObject: self]; queuedDate = [NSDate timeIntervalSinceReferenceDate]; } } else { /* Launching is prevented for a non-time-based reason, * so we reset the time from which we count launching as * started and specify a timer for checking again. */ startingDate = [NSDate timeIntervalSinceReferenceDate]; ti = 1.0; if (NO == alreadyQueued) { [launchQueue addObject: self]; queuedDate = [NSDate timeIntervalSinceReferenceDate]; } } if (NO == alreadyQueued) { r = [NSString stringWithFormat: @"queued (%@)", r]; [command logChange: r for: name]; } } } if (0.0 == ti && startingDate > 0.0) { ti = [NSDate timeIntervalSinceReferenceDate]; if (ti - startingDate < 30.0) { /* We need to raise an alarm if it takes longer than 30 seconds * to start up the process. */ ti = 30.0 - (ti - startingDate); } else { if (NO == startingAlarm) { startingAlarm = YES; [command alarmCode: ACLaunchFailed procName: name addText: @"Client not active after launch attempt"]; } ti = 60.0; } } if (nil != startingTimer) { [startingTimer invalidate]; EcExceptionMajor(nil, @"startingTimer reset %@", self); } if (startingDate > 0.0) { startingTimer = [NSTimer scheduledTimerWithTimeInterval: ti target: self selector: _cmd userInfo: name repeats: NO]; } else { NSLog(@"Startup cancelled in -starting: for %@", self); } } NS_HANDLER { EcExceptionMajor(localException, @"Problem -starting: %@", self); } NS_ENDHANDLER inStarting = NO; } - (NSDate*) startingDate { return (startingDate > 0.0) ? date(startingDate) : (NSDate*)nil; } - (NSString*) status { NSString *status; if ([self isStarting]) { status = [NSString stringWithFormat: @"Starting (pid:%d) since %@", [self processIdentifier], date(startingDate)]; } else if ([self isStopping]) { status = [NSString stringWithFormat: @"Stopping (pid:%d) since %@", [self processIdentifier], date(stoppingDate)]; } else if ([self hungDate] > 0.0) { status = [NSString stringWithFormat: @"Hung (pid:%d) since %@", [self processIdentifier], date(hungDate)]; } else if (nil == client) { if (queuedDate > 0.0) { status = [NSString stringWithFormat: @"Queued since %@", date(queuedDate)]; } else if (manual) { status = @"Manually stopped"; } else { status = @"Not active"; } } else { if ([self stable]) { status = [NSString stringWithFormat: @"Active (pid:%d) stable", [self processIdentifier]]; } else { status = [NSString stringWithFormat: @"Active (pid:%d) since %@", [self processIdentifier], date(registrationDate)]; } } return status; } - (void) stop { if (debug) { NSLog(@"-stop for %@ at %@", self, [NSThread callStackSymbols]); } if ([self isStopping]) { NSLog(@"-stop called when already stopping for %@", self); } else if (NO == [self checkActive] && 0 == identifier) { NSLog(@"-stop called when not alive for %@", self); } else { [self resetDelay]; stoppingDate = [NSDate timeIntervalSinceReferenceDate]; abortDate = [self abortDateFromStoppingDate: stoppingDate]; if ([self hungDate] > 0.0 && identifier > 0) { /* The process is hung and we assume it can't shut down gracefully. * We should have started a subtask to get process status information * and will abort the process later. */ NSLog(@"-stop hung process %d for %@", identifier, self); } else if (nil == client) { /* No connection to client established ... try to shut it down * using a signal. */ NSLog(@"-stop kills process %d for %@", identifier, self); kill(identifier, SIGTERM); } else { NS_DURING { if (nil == restartReason) { NSLog(@"-stop sends -cmdQuit:0 for %@", self); [[client obj] cmdQuit: 0]; } else { NSLog(@"-stop sends -ecRestart:%@ for %@", restartReason, self); [[client obj] ecRestart: restartReason]; } } NS_HANDLER { /* Client failed to respond. */ if (nil != client) { [self clearClient: client cleanly: NO]; } NSLog(@"Exception sending command to %@", localException); } NS_ENDHANDLER } [self stopping: nil]; } } - (void) stop: (NSString*)reason { if (desired != Dead) { [self setDesired: Dead]; } if ([self isStopping]) { NSLog(@"-stop: '%@' ignored (already stopping) for %@", reason, self); } else if ([self isStarting]) { NSLog(@"-stop: '%@' deferred (currently starting) for %@", reason, self); ASSIGN(stoppedReason, reason); } else if ([self checkActive] || identifier > 0) { ASSIGN(stoppedReason, reason); [self stop]; } else { NSLog(@"-stop: '%@' ignored (already Dead) for %@", reason, self); } } - (void) stopped { EcCommand *command = (EcCommand*)EcProc; NSString *text = nil; // Text to appear in alarm. NSString *reason; // Reason to appear in audit if (debug) { NSLog(@"-stopped for %@ at %@", self, [NSThread callStackSymbols]); } [stoppingTimer invalidate]; stoppingTimer = nil; stoppingDate = 0.0; registrationDate = 0.0; awakenedDate = 0.0; stableDate = 0.0; abortDate = 0.0; [self clearHung]; if (clientLostDate > 0.0 || clientQuitDate > 0.0) { BOOL failed = NO; reason = AUTORELEASE(stoppedReason); stoppedReason = nil; if (nil == reason) { if (terminationStatusKnown && terminationSignal != 0) { /* If the process died due to a signal, it was lost rather * than shutting down normally. */ failed = YES; if (terminationSignal < 0) { reason = @"forcibly killed because shutdown took too long"; } else { reason = [NSString stringWithFormat: @"stopped (died with signal %d)", terminationSignal]; } } else if (terminationStatusKnown && terminationStatus != 0) { /* If the process died with a non-zero exit status it * failed rather than shutting down normally. */ failed = YES; if (-1 == terminationStatus) { reason = @"stopped (restart required: exit code -1)"; } else if (-2 == terminationStatus) { reason = @"stopped (gdomap rejection: exit code -2)"; } else if (-3 == terminationStatus) { reason = @"stopped (configuration error: exit code -3)"; } else if (-4 == terminationStatus) { reason = @"stopped (Command rejection: exit code -4)"; } else if (-5 == terminationStatus) { reason = @"stopped (MemoryMaximum reached: exit code -5)"; } else if (terminationStatus > 0) { reason = [NSString stringWithFormat: @"stopped (probably caught signal: exit code %d)", terminationStatus]; } else { reason = [NSString stringWithFormat: @"stopped (with exit code %d)", terminationStatus]; } } else if (clientLostDate > 0.0) { /* The process was lost without unregistering from the Command * server so it was not an orderly shutdown. */ failed = YES; reason = @"stopped (process lost)"; } else { /* The process was shut down with no reason supplied but in an * orderly manner, so that must have been because something * other than the Command server asked it to shut itself down. */ reason = @"stopped externally"; } } if (Live == desired && nil == restartReason) { /* We wanted the process to be kept alive and we didn't * ask for it to be restarted. Even if the shutdown was * orderly, we didn't want it to happen and should raise * an alarm about it. */ failed = YES; } if (failed || clientLostDate > 0.0) { if (terminationStatusKnown) { if (terminationSignal != 0) { if (terminationSignal < 0) { text = @"forcibly killed because shutdown took too long"; } else { text = [NSString stringWithFormat: @"termination signal %d", terminationSignal]; } } else if (-1 == terminationStatus) { text = @"termination status -1 (restart required)"; } else if (-2 == terminationStatus) { text = @"termination status -2 (gdomap rejection)"; } else if (-3 == terminationStatus) { text = @"termination status -3 (configuration error)"; } else if (-4 == terminationStatus) { text = @"termination status -4 (Command rejection)"; } else if (-5 == terminationStatus) { text = @"termination status -5 (MemoryMaximum reached)"; } else { text = [NSString stringWithFormat: @"termination status %d", terminationStatus]; } } else { text = @"termination status unknown"; } } else if (clientQuitDate > 0.0) { /* Clean shutdown (process unregistered itself). */ [self resetDelay]; } } else { /* Loss of a process which hadn't connected/registered. * This should not be audited as a stop since it did not start. */ reason = nil; } /* We schedule a restart *before* doing anything which might run the * event loop. */ if (NO == [self isStarting]) { if (nil == restartReason && terminationStatusKnown && 0 == terminationSignal && restartStatus(terminationStatus)) { restartReason = @"requested by process"; } if (restartReason) { /* The process is supposed to restart, so we schedule a start. */ if (Dead == desired) { /* I suppose there could have been a config change disabling * the process or a quit command to it which came in after * the restart was schedulede. */ NSLog(@"Restart (%@) overridden for %@", restartReason, self); DESTROY(restartReason); } else { [self start]; } } else if (Live == desired) { [self start]; } else if (Dead == desired && NO == [self disabled]) { /* manual shutdown ... revert to normal state */ [self setDesired: None]; } } if (reason) { [command auditState: self reason: reason]; } if (text) { [command alarmCode: ACProcessLost procName: name addText: text]; } [self progress]; [LaunchInfo processQueue]; } - (void) stopping: (NSTimer*)t { NSTimeInterval now; NSTimeInterval ti; if (debug) { NSLog(@"-stopping: for %@ at %@", self, [NSThread callStackSymbols]); } [stoppingTimer invalidate]; stoppingTimer = nil; /* Still alive if: * a. we still have a DO network connection to the process * or * b. the process which registered with us is still alive */ if (nil == client && NO == [self checkProcess]) { if (nil == task) { [self stopped]; return; } } now = [NSDate timeIntervalSinceReferenceDate]; if (stoppingDate <= 0.0) { stoppingDate = now; } if (abortDate <= 0.0) { abortDate = [self abortDateFromStoppingDate: stoppingDate]; } if (abortDate <= now) { /* Maximum time for clean shutdown has passed. */ [[NSNotificationCenter defaultCenter] removeObserver: self name: NSTaskDidTerminateNotification object: task]; launchDate = 0.0; if (identifier > 0) { /* Do the housekeeping from -taskTerminated: before kill */ if (nil != task) { [self taskCleanup: task]; } terminationSignal = -1; // We use -1 to indicate a forced quit terminationStatus = 0; terminationStatusKnown = YES; terminationDate = [NSDate timeIntervalSinceReferenceDate]; terminationCount++; kill(identifier, SIGKILL); identifier = 0; } if (client != nil) { [self clearClient: client cleanly: NO]; } clientLostDate = now; [self stopped]; } else { ti = abortDate - now; if (ti < 0.001) { ti = 0.001; } if (nil == client && nil == task && ti > 0.1) { /* This can happen if a process was launched externally and * connected to the Command server (so we know its PID). * We will not be notified when the process dies so we must * poll frequently for it. */ ti = 0.1; } stoppingTimer = [NSTimer scheduledTimerWithTimeInterval: ti target: self selector: _cmd userInfo: name repeats: NO]; } } - (NSDate*) stoppingDate { return (stoppingDate > 0.0) ? date(stoppingDate) : (NSDate*)nil; } - (NSTask*) task { return AUTORELEASE(RETAIN(task)); } - (void) taskCleanup: (NSTask*)t { if (nil == t) { /* For a fake termination, use existing task, if any. */ t = task; } if (nil != t) { NSFileHandle *fh = [t standardInput]; [[NSNotificationCenter defaultCenter] removeObserver: self name: NSTaskDidTerminateNotification object: t]; if ([fh isKindOfClass: [NSPipe class]]) { fh = [(NSPipe*)fh fileHandleForWriting]; } if (fh) { [[NSNotificationCenter defaultCenter] removeObserver: self name: GSFileHandleWriteCompletionNotification object: fh]; NS_DURING { [fh closeFile]; } NS_HANDLER NS_ENDHANDLER } } if (t == task) { DESTROY(task); } } - (void) taskTerminated: (NSNotification*)n { NSTask *t = (NSTask*)[n object]; if (nil == t) { /* For a fake termination, use existing task, if any. */ t = task; } if (t == task) { terminationCount++; if (NSTaskTerminationReasonUncaughtSignal == [task terminationReason]) { terminationSignal = [task terminationStatus]; terminationStatus = 0; } else { terminationSignal = 0; terminationStatus = [task terminationStatus]; /* This is an 8-bit value for the OS on Linux and won't have * been sign-extended when placed in an integer. So lets * convert it to a negative integer when necessary. */ if (terminationStatus > 127 && terminationStatus < 256) { terminationStatus = terminationStatus - 256; } } terminationStatusKnown = YES; terminationDate = [NSDate timeIntervalSinceReferenceDate]; launchDate = 0.0; if (terminationSignal != 0) { NSLog(@"Termination signal %d for %@ (pid %d)", terminationSignal, name, identifier); } else if (terminationStatus != 0) { NSLog(@"Termination status %d for %@ (pid %d)", terminationStatus, name, identifier); } identifier = 0; [self taskCleanup: t]; [self stopping: nil]; } else if (t) { [self taskCleanup: t]; } } - (void) taskWritten: (NSNotification*)n { NSFileHandle *fh = (NSFileHandle*)[n object]; [[NSNotificationCenter defaultCenter] removeObserver: self name: GSFileHandleWriteCompletionNotification object: fh]; NS_DURING { [fh closeFile]; } NS_HANDLER NS_ENDHANDLER } - (NSArray*) unfulfilled { NSMutableArray *d = [[conf objectForKey: @"Deps"] mutableCopy]; NSUInteger c = [d count]; while (c-- > 0) { NSString *n = [d objectAtIndex: c]; LaunchInfo *l = [LaunchInfo existing: n]; if ([l client] != nil) { [d removeObjectAtIndex: c]; } } return AUTORELEASE(d); } @end @implementation EcCommand - (unsigned) activeCount { return (unsigned)[clients count]; } - (oneway void) alarm: (in bycopy EcAlarm*)alarm { if (NO == [NSThread isMainThread]) { [self performSelectorOnMainThread: _cmd withObject: alarm waitUntilDone: NO]; return; } NS_DURING { [control alarm: alarm]; } NS_HANDLER { NSLog(@"Exception sending alarm to Control: %@", localException); } NS_ENDHANDLER } /* Raise an alarm for a named process. */ - (void) alarmCode: (AlarmCode)ac procName: (NSString*)name addText: (NSString*)additional { NSString *managedObject; NSString *problem; NSString *repair; EcAlarm *a; ENTER_POOL managedObject = EcMakeManagedObject(host, @"Command", name); ACStrings(ac, &problem, &repair); a = [EcAlarm alarmForManagedObject: managedObject at: nil withEventType: EcAlarmEventTypeProcessingError probableCause: EcAlarmSoftwareProgramError specificProblem: problem perceivedSeverity: EcAlarmSeverityCritical proposedRepairAction: repair additionalText: additional]; [[LaunchInfo existing: name] alarm: a]; // Update launch info [self alarm: a]; LEAVE_POOL } - (void) auditState: (LaunchInfo*)l reason: (NSString*)additional { NSString *managedObject; NSString *problem; EcAlarm *a; /* For audit purposes we generate alarm clears without a corresponding * alarm raise. The SpecificProblem field therefore does not describe * a problem in these cases. */ if ([l isActive]) { problem = @"Started (audit information)"; NSLog(@"Started (%@) %@", additional, l); } else { problem = @"Stopped (audit information)"; NSLog(@"Stopped (%@) %@", additional, l); } managedObject = EcMakeManagedObject(host, @"Command", [l name]); a = [EcAlarm alarmForManagedObject: managedObject at: nil withEventType: EcAlarmEventTypeProcessingError probableCause: EcAlarmSoftwareProgramError specificProblem: problem perceivedSeverity: EcAlarmSeverityCleared proposedRepairAction: @"none" additionalText: additional]; [a setAudit: YES]; [self alarm: a]; [self update]; } - (void) clear: (EcAlarm*)alarm { EcClientI *c; NSAssert(EcAlarmSeverityCleared == [alarm perceivedSeverity], NSInvalidArgumentException); /* Clear the alarm in the named client (if found). */ c = [self findIn: clients byName: [alarm moInstancedProcess]]; [[[c obj] ecAlarmDestination] alarm: alarm]; } /* Clears all alarms we have raised (or an earlier instance might have raised * and failed to clear due to a crash). */ - (void) clearAll: (NSString*)name addText: (NSString*)additional { NSString *managedObject; NSString *problem; NSString *repair; EcAlarm *a; LaunchInfo *l; NSArray *e; AlarmCode c; ENTER_POOL managedObject = EcMakeManagedObject(host, @"Command", name); /* Get launch info and any known existing alarms for it. The array of * alarms will be nil if we don't know what has been raised. */ l = [LaunchInfo existing: name]; e = [l alarms]; if (nil == e) { for (c = ACLaunchFailed; c <= ACProcessLost; c++) { ACStrings(c, &problem, &repair); repair = @"cleared"; a = [EcAlarm alarmForManagedObject: managedObject at: nil withEventType: EcAlarmEventTypeProcessingError probableCause: EcAlarmSoftwareProgramError specificProblem: problem perceivedSeverity: EcAlarmSeverityCleared proposedRepairAction: repair additionalText: additional]; [l alarm: a]; [self alarm: a]; } } else { NSUInteger count = [e count]; /* Clear any raised alarms */ while (count-- > 0) { a = [[e objectAtIndex: count] clear]; [l alarm: a]; [self alarm: a]; } } LEAVE_POOL } /* Clear an alarm for a named process. */ - (void) clearCode: (AlarmCode)ac procName: (NSString*)name addText: (NSString*)additional { NSString *managedObject; NSString *problem; NSString *repair; EcAlarm *a; ENTER_POOL managedObject = EcMakeManagedObject(host, @"Command", name); ACStrings(ac, &problem, &repair); a = [EcAlarm alarmForManagedObject: managedObject at: nil withEventType: EcAlarmEventTypeProcessingError probableCause: EcAlarmSoftwareProgramError specificProblem: problem perceivedSeverity: EcAlarmSeverityCleared proposedRepairAction: repair additionalText: additional]; [[LaunchInfo existing: name] alarm: a]; // Update launch info [self alarm: a]; LEAVE_POOL } - (EcClientI*) alive: (NSString*)name { EcClientI *found = nil; found = [self findIn: clients byName: name]; if (nil == found) { CREATE_AUTORELEASE_POOL(pool); id proxy = nil; NS_DURING { NSConnection *c; c = [NSConnection connectionWithRegisteredName: name host: @"" usingNameServer: [NSSocketPortNameServer sharedInstance]]; NS_DURING { /* Do not hang waiting for the other end to respond. */ [c setRequestTimeout: 120.0]; [c setReplyTimeout: 120.0]; proxy = (id)[c rootProxy]; [c setRequestTimeout: 0.0]; [c setReplyTimeout: 0.0]; } NS_HANDLER { [c setRequestTimeout: 0.0]; [c setReplyTimeout: 0.0]; } NS_ENDHANDLER if (nil != proxy) { [proxy ecReconnect]; [[self logFile] printf: @"%@ requested reconnect %@\n", [NSDate date], name]; } } NS_HANDLER { NSLog(@"Problem %@", localException); proxy = nil; } NS_ENDHANDLER if (nil != proxy) { NSDate *when = [NSDate dateWithTimeIntervalSinceNow: 1.0]; while (nil == (found = [self findIn: clients byName: name]) && [when timeIntervalSinceNow] > 0.0) { NSDate *next = [NSDate dateWithTimeIntervalSinceNow: 0.1]; [[NSRunLoop currentRunLoop] runMode: NSDefaultRunLoopMode beforeDate: next]; } } DESTROY(pool); } return found; } - (void) cmdDefaultsChanged: (NSNotification*)n { [self cmdUpdated]; } - (NSString*) cmdUpdated { NSUserDefaults *defs = [self cmdDefaults]; NSInteger i; i = [defs integerForKey: @"CompressDebugAfter"]; if (i < 1) { i = 7; } debCompressAfter = i; i = [defs integerForKey: @"DeleteDebugAfter"]; if (i < 1) { i = 90; } debDeleteAfter = i; if (debDeleteAfter < debCompressAfter) { debDeleteAfter = debCompressAfter; } i = [defs integerForKey: @"CompressLogsAfter"]; if (i < 1) { i = 7; } logCompressAfter = i; i = [defs integerForKey: @"DeleteLogsAfter"]; if (i < 1) { i = 180; } logDeleteAfter = i; if (logDeleteAfter < logCompressAfter) { logDeleteAfter = logCompressAfter; } return nil; } - (void) disableLaunching { launchEnabled = NO; } - (oneway void) domanage: (in bycopy NSString*)managedObject { NS_DURING { [control domanage: managedObject]; } NS_HANDLER { NSLog(@"Exception sending domanage: to Control: %@", localException); } NS_ENDHANDLER } - (void) ecAwaken { [self contactControl]; [super ecAwaken]; [[self ecAlarmDestination] setCoalesce: NO]; if (NO == [[self cmdDefaults] boolForKey: @"LaunchStartSuspended"]) { [self enableLaunching]; } /* Start housekeeping timer. */ [self _housekeeping: nil]; } - (void) enableLaunching { launchEnabled = YES; [LaunchInfo processQueue]; } - (oneway void) unmanage: (in bycopy NSString*)managedObject { NS_DURING { [control unmanage: managedObject]; } NS_HANDLER { NSLog(@"Exception sending unmanage: to Control: %@", localException); } NS_ENDHANDLER } - (NSFileHandle*) openLog: (NSString*)lname { NSFileManager *mgr = [NSFileManager defaultManager]; NSFileHandle *lf; if ([mgr isWritableFileAtPath: lname] == NO) { if ([mgr createFileAtPath: lname contents: nil attributes: nil] == NO) { NSLog(@"Log file '%@' is not writable and can't be created", lname); return nil; } } lf = [NSFileHandle fileHandleForUpdatingAtPath: lname]; if (lf == nil) { NSLog(@"Unable to log to %@", lname); return nil; } [lf seekToEndOfFile]; return lf; } - (void) newConfig: (NSMutableDictionary*)newConfig { NSString *diskCache; NSData *data; diskCache = [[self cmdDataDirectory] stringByAppendingPathComponent: @"CommandConfig.cache"]; if (NO == [newConfig isKindOfClass: [NSMutableDictionary class]] || 0 == [newConfig count]) { /* If we are called with a nil argument, we must obtain the config * from local disk cache (if available). */ if (nil != (data = [NSData dataWithContentsOfFile: diskCache])) { newConfig = [NSPropertyListSerialization propertyListWithData: data options: NSPropertyListMutableContainers format: 0 error: 0]; } if (NO == [newConfig isKindOfClass: [NSMutableDictionary class]] || 0 == [newConfig count]) { return; } } else { data = nil; } if (nil == config || [config isEqual: newConfig] == NO) { NSDictionary *d; NSArray *a; unsigned i; ASSIGN(config, newConfig); d = [config objectForKey: [self cmdName]]; launchLimit = 0; if ([d isKindOfClass: [NSDictionary class]] == YES) { NSMutableArray *missing; NSDictionary *conf; NSEnumerator *e; id o; NSInteger i; NSMutableArray *newOrder; NSString *k; NSString *err = nil; NSTimeInterval ti; NS_DURING { NSMutableDictionary *m = AUTORELEASE([d mutableCopy]); [self cmdUpdate: m]; d = m; } NS_HANDLER { NSLog(@"Problem before updating config (in cmdUpdate:) %@", localException); err = @"the -cmdUpdate: method raised an exception"; } NS_ENDHANDLER if (nil == err) { NS_DURING err = [self cmdUpdated]; NS_HANDLER NSLog(@"Problem after updating config (in cmdUpdated) %@", localException); err = @"the -cmdUpdated method raised an exception"; NS_ENDHANDLER } if ([err length] > 0) { EcAlarm *a; /* Truncate additional text to fit if necessary. */ err = [err stringByTrimmingSpaces]; if ([err length] > 255) { err = [err substringToIndex: 255]; while (255 < strlen([err UTF8String])) { err = [err substringToIndex: [err length] - 1]; } } a = [EcAlarm alarmForManagedObject: nil at: nil withEventType: EcAlarmEventTypeProcessingError probableCause: EcAlarmConfigurationOrCustomizationError specificProblem: @"configuration error" perceivedSeverity: EcAlarmSeverityMajor proposedRepairAction: _(@"Correct config or software (check log for details).") additionalText: err]; [self alarm: a]; } else { EcAlarm *a; a = [EcAlarm alarmForManagedObject: nil at: nil withEventType: EcAlarmEventTypeProcessingError probableCause: EcAlarmConfigurationOrCustomizationError specificProblem: @"configuration error" perceivedSeverity: EcAlarmSeverityCleared proposedRepairAction: nil additionalText: nil]; [self alarm: a]; } debug = [[d objectForKey: @"CommandDebug"] boolValue]; /* The time allowed for a process to respond to pings defaults * to 240 seconds but may be configured in the range from 10 to 600 */ ti = [[d objectForKey: @"CommandPingTime"] doubleValue]; if (ti == ti && ti > 0.0) { if (ti < 10.0) ti = 10.0; if (ti > 600.0) ti = 600.0; pingTime = ti; } else { pingTime = 120.0; } /* The time allowed for a process to shut down cleanly defaults * to 120 seconds but may be configured in the range from 10 to 600 */ ti = [[d objectForKey: @"CommandQuitTime"] doubleValue]; if (ti == ti && ti > 0.0) { if (ti < 10.0) ti = 10.0; if (ti > 600.0) ti = 600.0; quitTime = ti; } else { quitTime = 120.0; } /* We may not have more than this number of tasks launching at * any one time. Once the launch limit is reached we should * launch new tasks as and when launching tasks complete their * startup and register with this process. */ i = [[[d objectForKey: @"LaunchLimit"] description] intValue]; if (i <= 0) { launchLimit = 20; } else { launchLimit = (NSUInteger)i; } missing = AUTORELEASE([[LaunchInfo names] mutableCopy]); o = [d objectForKey: @"LaunchOrder"]; if ([o isKindOfClass: [NSArray class]]) { newOrder = AUTORELEASE([o mutableCopy]); } else { if (nil != o) { NSLog(@"bad 'LaunchOrder' config (not an array) ignored"); } newOrder = nil; } conf = [d objectForKey: @"Launch"]; if ([conf isKindOfClass: [NSDictionary class]] == NO) { NSLog(@"No 'Launch' information in latest config update"); newOrder = nil; } else { NSMutableDictionary *md = [NSMutableDictionary dictionary]; NSUInteger entryCount = 0; e = [conf keyEnumerator]; while ((k = [e nextObject]) != nil) { NSMutableDictionary *d; id o = [conf objectForKey: k]; if ([o isKindOfClass: [NSDictionary class]] == NO) { NSLog(@"bad 'Launch' information for %@", k); continue; } d = AUTORELEASE([o mutableCopy]); o = [d objectForKey: @"Auto"]; if (o != nil && [o isKindOfClass: [NSString class]] == NO) { NSLog(@"bad 'Launch' Auto for %@", k); continue; } o = [d objectForKey: @"Time"]; if (o != nil && ([o isKindOfClass: [NSString class]] == NO || [o intValue] < 1 || [o intValue] > 600)) { NSLog(@"bad 'Launch' Time for %@", k); continue; } o = [d objectForKey: @"Disabled"]; if (o != nil && [o isKindOfClass: [NSString class]] == NO) { NSLog(@"bad 'Launch' Disabled for %@", k); continue; } o = [d objectForKey: @"Args"]; if (o != nil && [o isKindOfClass: [NSArray class]] == NO) { NSLog(@"bad 'Launch' Args for %@", k); continue; } o = [d objectForKey: @"Home"]; if (o != nil && [o isKindOfClass: [NSString class]] == NO) { NSLog(@"bad 'Launch' Home for %@", k); continue; } o = [d objectForKey: @"Prog"]; if (o == nil || [o isKindOfClass: [NSString class]] == NO) { NSLog(@"bad 'Launch' Prog for %@", k); continue; } o = [d objectForKey: @"AddE"]; if (o != nil && [o isKindOfClass: [NSDictionary class]] == NO) { NSLog(@"bad 'Launch' AddE for %@", k); continue; } o = [d objectForKey: @"SetE"]; if (o != nil && [o isKindOfClass: [NSDictionary class]] == NO) { NSLog(@"bad 'Launch' SetE for %@", k); continue; } o = [d objectForKey: @"Deps"]; if (o != nil) { if ([o isKindOfClass: [NSArray class]] == NO) { NSLog(@"bad 'Launch' Deps for %@ (not an array)", k); continue; } o = AUTORELEASE([o mutableCopy]); [d setObject: o forKey: @"Deps"]; } [md setObject: d forKey: k]; } while (entryCount != [md count]) { entryCount = [md count]; e = [[md allKeys] objectEnumerator]; while (nil != (k = [e nextObject])) { NSDictionary *d = [md objectForKey: k]; NSArray *a = [d objectForKey: @"Deps"]; NSUInteger c = [a count]; while (c-- > 0) { NSString *name = [a objectAtIndex: c]; if ([name isEqual: k]) { NSLog(@"bad 'Launch' Deps for %@" @" (depends on self)", k); [md removeObjectForKey: k]; } if (nil == [md objectForKey: name]) { NSLog(@"bad 'Launch' Deps for %@" @" (depends on %@)", k, name); [md removeObjectForKey: k]; } } } } conf = md; /* Validate the LaunchOrder array. */ if (newOrder != nil) { NSUInteger c; c = [newOrder count]; while (c-- > 0) { o = [newOrder objectAtIndex: c]; if (NO == [o isKindOfClass: [NSString class]]) { NSLog(@"bad 'LaunchOrder' item ('%@' at %u) ignored" @" (not a server name)", o, (unsigned)c); [newOrder removeObjectAtIndex: c]; } else if ([newOrder indexOfObject: o] != c) { NSLog(@"bad 'LaunchOrder' item ('%@' at %u) ignored" @" (repeat of earlier item)", o, (unsigned)c); [newOrder removeObjectAtIndex: c]; } else if (nil == [conf objectForKey: o]) { NSLog(@"bad 'LaunchOrder' item ('%@' at %u) ignored" @" (not in 'Launch' dictionary)", o, (unsigned)c); [newOrder removeObjectAtIndex: c]; } } } /* Now that we have validated the config, we update * (creating if necessary) the LaunchInfo object. */ e = [conf keyEnumerator]; while (nil != (k = [e nextObject])) { LaunchInfo *l; if ((l = [LaunchInfo launchInfo: k]) != nil) { if (nil == [l client]) { EcClientI *c; /* Due to the config change, we may have a new * LaunchInfo object. There is also a possibility * that the process has already been launched * manually (or the config for the process was * removed and then restored). We must therefore * check and associate any existing registration * with the new launch info. */ c = [self findIn: clients byName: [l name]]; if (c != nil) { [l setClient: c]; } } [l setConfiguration: [conf objectForKey: k]]; [l progress]; [missing removeObject: k]; } } } /* Now any process names which have no configuration must be * removed from the list of launchable processes and have any * alarms cleared. */ e = [missing objectEnumerator]; while (nil != (k = [e nextObject])) { [self clearAll: k addText: @"process removed from config"]; [LaunchInfo remove: k]; } if ([newOrder count] == 0) { /* The default launch order is alphabetical by server name. */ o = [[LaunchInfo names] sortedArrayUsingSelector: @selector(compare:)]; ASSIGN(launchOrder, o); } else { NSEnumerator *e; NSString *k; /* Any missing servers are launched after others * they are in lexicographic order. */ o = [[LaunchInfo names] sortedArrayUsingSelector: @selector(compare:)]; e = [o objectEnumerator]; while (nil != (k = [e nextObject])) { if (NO == [newOrder containsObject: k]) { [newOrder addObject: k]; } } ASSIGNCOPY(launchOrder, newOrder); } o = [d objectForKey: @"SetE"]; if (o != nil && NO == [o isKindOfClass: [NSDictionary class]]) { NSLog(@"Bad global 'SetE' information in latest config update"); o = nil; } if (nil == o) { o = [[NSProcessInfo processInfo] environment]; } NSMutableDictionary *env = [o mutableCopy]; o = [d objectForKey: @"AddE"]; if (o != nil && [o isKindOfClass: [NSDictionary class]] == NO) { NSLog(@"Bad global 'AddE' information in latest config update"); o = nil; } if (o) { [env addEntriesFromDictionary: o]; } ASSIGN(environment, env); k = [d objectForKey: @"NodesFree"]; if (YES == [k isKindOfClass: [NSString class]]) { nodesFree = [k floatValue]; nodesFree /= 100.0; } else { nodesFree = 0.0; } if (nodesFree < 0.02 || nodesFree > 0.9) { NSLog(@"bad or missing minimum disk 'NodesFree' ... using 10%%"); nodesFree = 0.1; } k = [d objectForKey: @"SpaceFree"]; if (YES == [k isKindOfClass: [NSString class]]) { spaceFree = [k floatValue]; spaceFree /= 100.0; } else { spaceFree = 0.0; } if (spaceFree < 0.02 || spaceFree > 0.9) { NSLog(@"bad or missing minimum disk 'SpaceFree' ... using 10%%"); spaceFree = 0.1; } } else { NSLog(@"No '%@' information in latest config update", [self cmdName]); } a = [NSArray arrayWithArray: clients]; i = [a count]; while (i-- > 0) { EcClientI *c = [a objectAtIndex: i]; if ([clients indexOfObjectIdenticalTo: c] != NSNotFound) { NS_DURING { NSData *d = [self configurationFor: [c name]]; if (nil != d) { [c setConfig: d]; [[c obj] updateConfig: d]; } } NS_HANDLER { NSLog(@"Setting config for client: %@", localException); } NS_ENDHANDLER } } if (nil == data) { /* Need to update on-disk cache */ data = [NSPropertyListSerialization dataFromPropertyList: newConfig format: NSPropertyListBinaryFormat_v1_0 errorDescription: 0]; [data writeToFile: diskCache atomically: YES]; } } } - (void) pingControl { if (control == nil) { return; } if (fwdSequence == revSequence) { outstanding = RETAIN([NSDate date]); NS_DURING { [control cmdPing: self sequence: ++fwdSequence extra: nil]; } NS_HANDLER { NSLog(@"Ping to control server - %@", localException); } NS_ENDHANDLER } else { NSLog(@"Ping to control server when one is already in progress."); } } - (oneway void) cmdGnip: (id )from sequence: (unsigned)num extra: (NSData*)data { if (from == control) { if (num != revSequence + 1 && revSequence != 0) { NSLog(@"Gnip from control server seq: %u when expecting %u", num, revSequence); if (num == 0) { fwdSequence = 0; // Reset } } revSequence = num; if (revSequence == fwdSequence) { DESTROY(outstanding); } } else { EcClientI *r; /* See if we have a fitting client - and update records. * A client is considered to be stable one it has been up for * at least three pings. The -setStable: method sets the client * as being stable and the -stable method returns a boolean to let * us know if a client is already stable. */ r = [self findIn: clients byObject: (id)from]; [r gnip: num]; if (r != nil) { NSString *n = [r name]; LaunchInfo *l = [LaunchInfo existing: n]; [l setPing]; // Record the fact that we have a ping response. if ([l hungDate] > 0.0) { /* Had a ping response, so the process is no longer hung. */ [l clearHung]; } if (num > 2) { /* This was a successful launch so we don't need to impose * a delay between launch attempts. */ [l resetDelay]; } if (NO == [l stable] && [l mayBecomeStable] > 0.0 && [l mayBecomeStable] <= [NSDate timeIntervalSinceReferenceDate]) { /* After the client has been responding to pings for a while, * we assume that client has completed startup and is running OK. * We can therefore clear any loss of client alarm, any * alarm for being unable to register, and launch failure * or fatal configuration alarms. */ [l setStable: YES]; [self clearAll: [l name] addText: @"process is now stable"]; } } } } - (BOOL) cmdIsClient { return NO; // Not a client of the Command server. } - (oneway void) cmdPing: (id )from sequence: (unsigned)num extra: (NSData*)data { /* Send back a response to let the other party know we are alive. */ [from cmdGnip: self sequence: num extra: nil]; } - (oneway void) cmdQuit: (NSInteger)sig { [[NSNotificationCenter defaultCenter] removeObserver: self name: NSConnectionDidDieNotification object: nil]; [[NSNotificationCenter defaultCenter] removeObserver: self name: NSTaskDidTerminateNotification object: nil]; if (sig == tStatus && control != nil) { NS_DURING { [control unregister: self]; } NS_HANDLER { NSLog(@"Exception unregistering from Control: %@", localException); } NS_ENDHANDLER } exit(sig); } - (void) command: (NSData*)dat to: (NSString*)t from: (NSString*)f { NSMutableArray *cmd = [NSPropertyListSerialization propertyListWithData: dat options: NSPropertyListMutableContainers format: 0 error: 0]; if (cmd == nil || [cmd count] == 0) { [self information: cmdLogFormat(LT_ERROR, @"bad command array") from: nil to: f type: LT_ERROR]; } else if (t == nil) { NSArray *allow = [self ecCommands: f]; NSString *m = @""; NSString *wd = cmdWord(cmd, 0); if ([wd length] == 0) { /* Quietly ignore. */ } else if (matchCmd(wd, @"alarms", allow)) { NSMutableArray *a = [NSMutableArray array]; NSEnumerator *e = [launchInfo objectEnumerator]; LaunchInfo *l; while (nil != (l = [e nextObject])) { [a addObjectsFromArray: [l alarms]]; } if (0 == [a count]) { m = @"No alarms currently active.\n"; } else { NSMutableString *ms = [NSMutableString string]; int i; m = ms; [a sortUsingSelector: @selector(compare:)]; [ms appendFormat: @"Current alarms -\n"]; for (i = 0; i < [a count]; i++) { EcAlarm *alarm = [a objectAtIndex: i]; [ms appendFormat: @"%@\n", [alarm description]]; } } } else if (matchCmd(wd, @"archive", allow)) { m = [NSString stringWithFormat: @"\n%@\n", [self ecArchive: nil]]; } else if (matchCmd(wd, @"clear", allow)) { NSMutableArray *a = [NSMutableArray array]; NSEnumerator *e = [launchInfo objectEnumerator]; NSUInteger count = [cmd count]; LaunchInfo *l; while (nil != (l = [e nextObject])) { [a addObjectsFromArray: [l alarms]]; } if (count < 2) { m = @"The 'clear' command requires an alarm" @" address or the word all\n"; } else { NSMutableString *ms = [NSMutableString string]; NSUInteger alarmCount = [a count]; EcAlarm *alarm; EcAlarm *clear; NSUInteger index; m = ms; for (index = 1; index < count; index++) { NSUInteger addr; NSString *arg = [cmd objectAtIndex: index]; if ([arg caseInsensitiveCompare: _(@"all")] == NSOrderedSame) { NSUInteger i; for (i = 0; i < alarmCount; i++) { alarm = [a objectAtIndex: i]; clear = [alarm clear]; [ms appendFormat: @"Clearing %@\n", alarm]; l = [LaunchInfo existing: [alarm moComponent]]; [l alarm: clear]; [self alarm: clear]; } } else if (1 == sscanf([arg UTF8String], "%" PRIxPTR, &addr)) { NSUInteger i; alarm = nil; for (i = 0; i < alarmCount; i++) { alarm = [a objectAtIndex: i]; if ((NSUInteger)alarm == addr) { break; } alarm = nil; } if (nil == alarm) { [ms appendFormat: @"No alarm found with the address '%@'\n", arg]; } else { [ms appendFormat: @"Clearing %@\n", alarm]; clear = [alarm clear]; l = [LaunchInfo existing: [alarm moComponent]]; [l alarm: clear]; [self alarm: clear]; } } else { [ms appendFormat: @"Not a hexadecimal address: '%@'\n", arg]; } } } } else if (matchCmd(wd, @"help", allow)) { wd = cmdWord(cmd, 1); if ([wd length] == 0) { m = @"Commands are -\n" @"Help\tAlarms\tArchive\tClear\tControl\tLaunch\tList\tMemory\t" @"Quit\tRestart\tStatus\tTell\n\n" @"Type 'help' followed by a command word for details.\n" @"A command line consists of a sequence of words, " @"the first of which is the command to be executed. " @"A word can be a simple sequence of non-space characters, " @"or it can be a 'quoted string'. " @"Simple words are converted to lower case before " @"matching them against commands and their parameters. " @"Text in a 'quoted string' is NOT converted to lower case " @"but a '\\' character is treated in a special manner -\n" @" \\b is replaced by a backspace\n" @" \\f is replaced by a formfeed\n" @" \\n is replaced by a linefeed\n" @" \\r is replaced by a carriage-return\n" @" \\t is replaced by a tab\n" @" \\0 followed by up to 3 octal digits is replaced" @" by the octal value\n" @" \\x followed by up to 2 hex digits is replaced" @" by the hex value\n" @" \\ followed by any other character is replaced by" @" the second character.\n" @" This permits use of quotes and backslashes inside" @" a quoted string.\n"; } else { if (comp(wd, @"Alarms") >= 0) { m = @"Alarms\nLists all the alarms currently raised by this " @"Command process. Typically alarms about a client " @"process hanging or failing to launch properly.\n" @"These clear automatically when processes are stable.\n"; } else if (comp(wd, @"Archive") >= 0) { m = @"Archive\nArchives the log file. The archived log " @"file is stored in a subdirectory whose name is of " @"the form YYYYMMDDhhmmss being the date and time at " @"which the archive was created.\n"; } else if (comp(wd, @"Clear") >= 0) { m = @"Clear \nClears all alarms or the " @"alarms whose addresses are in the space separated " @"list.\nShould never be needed since these alarms " @"are cleared automatically when processes restart.\n"; } else if (comp(wd, @"Control") >= 0) { m = @"Control ...\nPasses the command to the Control " @"process. You may disconnect from this host by " @"typing 'control host'\n"; } else if (comp(wd, @"Launch") >= 0) { m = @"Launch \nAdds the named program to the list " @"of programs to be launched as soon as possible.\n" @"Launch all\nAdds all unlaunched programs which have " @"autolaunch enabled.\n"; } else if (comp(wd, @"List") >= 0) { m = @"List\nLists all the connected clients.\n" @"List launches\nLists the programs we can launch.\n" @"List limit\nReports concurrent launch attempt limit.\n" @"List order\nReports launch attempt order.\n" @"List process name\nReports detail on named process.\n"; } else if (comp(wd, @"Memory") >= 0) { m = @"Memory\nDisplays recent memory allocation stats.\n" @"Memory all\nDisplays all memory allocation stats.\n"; } else if (comp(wd, @"Quit") >= 0) { m = @"Quit 'name'\n" @"Shuts down the named client process(es).\n" @"Quit all\n" @"Shuts down all client processes.\n" @"Quit self\n" @"Shuts down the Command server for this host.\n"; } else if (comp(wd, @"Restart") >= 0) { m = @"Restart 'name'\n" @"Shuts down and starts the named client process(es).\n" @"Restart all\n" @"Shuts down and starts all client processes.\n" @"Restart self\n" @"Shuts down and starts Command server for this host.\n"; } else if (comp(wd, @"Resume") >= 0) { m = @"Resumes the launching/relaunching of tasks.\n" @"Has no effect if launching has not been suspended.\n"; } else if (comp(wd, @"Status") >= 0) { m = @"Status\nReports the status of the Command server.\n" @"Status name\nReports launch status of the process.\n"; } else if (comp(wd, @"Suspend") >= 0) { m = @"Suspends the launching/relaunching of tasks.\n" @"Has no effect if this has already been suspended.\n"; } else if (comp(wd, @"Tell") >= 0) { m = @"Tell 'name' 'command'\n" @"Sends the command to the named client(s).\n" @"You may use 'tell all ...' to send to all clients.\n"; } } } else if (matchCmd(wd, @"launch", allow)) { if (NO == launchEnabled) { m = @"Launching of tasks is suspended.\n" @"Use the Resume command to resume launching.\n"; } else if ([cmd count] > 1) { NSString *nam = [cmd objectAtIndex: 1]; NSMutableString *s = [NSMutableString string]; BOOL all = NO; if ([nam caseInsensitiveCompare: @"all"] == NSOrderedSame) { all = YES; } if ([launchOrder count] > 0) { NSArray *names; NSEnumerator *enumerator; NSString *key; /* Build array of process names matching request. */ if (YES == all) { names = launchOrder; } else { NSMutableArray *a = [NSMutableArray array]; enumerator = [launchOrder objectEnumerator]; while ((key = [enumerator nextObject]) != nil) { if (comp(nam, key) >= 0) { [a addObject: key]; } } names = a; } enumerator = [names objectEnumerator]; while ((key = [enumerator nextObject]) != nil) { LaunchInfo *l = [LaunchInfo existing: key]; if ([l disabled]) { if (NO == all) { [s appendFormat: @" %-32.32s disabled in config\n", [key UTF8String]]; } } else if (NO == [l autolaunch] && YES == all) { [s appendFormat: @" %-32.32s not autolaunchable in config\n", [key UTF8String]]; } else if ([l isActive]) { if (NO == all) { [s appendFormat: @" %-32.32s is already running\n", [key UTF8String]]; } } else if ([l isStarting]) { NSArray *u = [l unfulfilled]; if (NO == all || [u count] > 0) { NSString *r; if ([u count] > 0) { [s appendFormat: @" %-32.32s is queued waiting for %@\n", [key UTF8String], u]; } else if (nil != (r = [l reasonToPreventLaunch])) { [s appendFormat: @" %-32.32s queued: %@\n", [key UTF8String], r]; } else { [s appendFormat: @" %-32.32s is already starting\n", [key UTF8String]]; } } } else if ([l isStopping]) { [s appendFormat: @" %-32.32s is stopping (will restart)\n", [key UTF8String]]; [l start: @"Console launch command"]; } else { [s appendFormat: @" %-32.32s will be started\n", [key UTF8String]]; [l resetDelay]; [l start: @"Console launch command"]; } } if ([names count] == 0) { /* May happen if the name given doesn't match * anything in the launch array. */ [s appendString: @"I don't know how to launch that program.\n"]; } else if ([s length] == 0) { /* May happen if we were looking for all the * launchable processes and there weren't any. */ [s appendString: @"Nothing found to start.\n"]; } } else { [s appendString: @"There are no programs we can launch.\n"]; } m = s; } else { m = @"I need the name of a program to launch.\n"; } } else if (matchCmd(wd, @"list", allow)) { wd = cmdWord(cmd, 1); if ([wd length] == 0 || comp(wd, @"clients") >= 0) { if ([clients count] == 0) { m = @"No clients currently connected.\n"; } else { unsigned i; m = @"Current client processes -\n"; for (i = 0; i < [clients count]; i++) { EcClientI *c = [clients objectAtIndex: i]; LaunchInfo *l = [LaunchInfo existing: [c name]]; char *s = ""; if ([l isStopping]) { s = " stopping"; } else if ([l isStarting]) { s = " starting"; } else if ([l hungDate] > 0.0) { s = " hung"; } m = [NSString stringWithFormat: @"%@%2d. %-32.32s (pid:%d%s)\n", m, i, [[c name] cString], [c processIdentifier], s]; } } } else if (comp(wd, @"launches") >= 0) { NSArray *names = [LaunchInfo names]; if ([names count] > 0) { NSEnumerator *enumerator; NSString *key; m = @"Programs we can launch -\n"; enumerator = [[names sortedArrayUsingSelector: @selector(compare:)] objectEnumerator]; while ((key = [enumerator nextObject]) != nil) { LaunchInfo *l = [LaunchInfo existing: key]; NSString *status = [l status]; m = [m stringByAppendingFormat: @" %-32.32s ", [key cString]]; if ([l disabled] == YES) { m = [m stringByAppendingString: @"disabled in config\n"]; } else { m = [m stringByAppendingFormat: @"%@\n", status]; } } } else { m = @"There are no programs we can launch.\n"; } if (NO == launchEnabled) { m = [m stringByAppendingString: @"\nLaunching is suspended.\n"]; } } else if (comp(wd, @"limit") >= 0) { m = [NSString stringWithFormat: @"Limit of concurrent launch attempts is: %u\n", (unsigned)launchLimit]; } else if (comp(wd, @"order") >= 0) { m = [NSString stringWithFormat: @"Launch order is: %@\n", launchOrder]; } else if (comp(wd, @"process") >= 0) { NSEnumerator *enumerator; NSString *key; wd = cmdWord(cmd, 2); enumerator = [launchOrder objectEnumerator]; while ((key = [enumerator nextObject]) != nil) { if (comp(wd, key) >= 0) { LaunchInfo *l = [LaunchInfo existing: key]; m = [m stringByAppendingFormat: @"%@\n", l]; } } } } else if (matchCmd(wd, @"memory", allow)) { if (GSDebugAllocationActive(YES) == NO) { m = @"Memory statistics were not being gathered.\n" @"Statistics Will start from NOW.\n"; } else { const char* list; wd = cmdWord(cmd, 1); if ([wd length] > 0 && comp(wd, @"all") >= 0) { list = GSDebugAllocationList(NO); } else { list = GSDebugAllocationList(YES); } m = [NSString stringWithCString: list]; } } else if (matchCmd(wd, @"quit", allow)) { wd = cmdWord(cmd, 1); if ([wd length] > 0) { if (comp(wd, @"self") == 0) { if (terminating == nil) { NS_DURING { [control unregister: self]; } NS_HANDLER { NSLog(@"Exception unregistering from Control: %@", localException); } NS_ENDHANDLER exit(0); } else { m = [NSString stringWithFormat: @"Already terminating by %@\n", [terminating fireDate]]; } } else if (comp(wd, @"all") == 0) { [self quitAll]; m = @"All clients have been asked to shut down.\n"; } else { NSArray *a; unsigned i; BOOL found = NO; a = [self findAll: clients byAbbreviation: wd]; for (i = 0; i < [a count]; i++) { EcClientI *c = [a objectAtIndex: i]; NS_DURING { LaunchInfo *l; m = [m stringByAppendingFormat: @"Sending 'quit' to '%@'\n", [c name]]; m = [m stringByAppendingString: @" Please wait for this to be 'removed' before " @"proceeding.\n"]; l = [LaunchInfo existing: [c name]]; if (nil == l) { [[c obj] cmdQuit: 0]; } else { [l setManual: YES]; // autolauch overridden [l stop: @"Console quit command"]; [l checkAbandonedStartup]; } [self clearAll: [c name] addText: @"manually stopped"]; found = YES; } NS_HANDLER { NSLog(@"Caught exception: %@", localException); } NS_ENDHANDLER } if (NO == found && [launchInfo count] > 0) { NSEnumerator *enumerator; NSString *key; enumerator = [launchOrder objectEnumerator]; while ((key = [enumerator nextObject]) != nil) { if (comp(wd, key) >= 0) { LaunchInfo *l; found = YES; l = [LaunchInfo existing: key]; if ([l desired] == Dead) { m = [m stringByAppendingFormat: @"Suspended %@ already\n", key]; } else { [l stop: @"Console quit command"]; m = [m stringByAppendingFormat: @"Suspended %@\n", key]; } [l checkAbandonedStartup]; [self clearAll: [l name] addText: @"manually stopped"]; } } } if (NO == found) { m = [NSString stringWithFormat: @"Nothing to shut down as '%@'\n", wd]; } } } else { m = @"Quit what?.\n"; } } else if (matchCmd(wd, @"restart", allow)) { wd = cmdWord(cmd, 1); if ([wd length] > 0) { NSString *reason = nil; NSArray *a = nil; if (comp(wd, @"self") == 0) { if (terminating == nil) { NS_DURING { [self information: @"Re-starting Command server\n" from: t to: f type: LT_CONSOLE]; [control unregister: self]; } NS_HANDLER { NSLog(@"Exception unregistering from Control: %@", localException); } NS_ENDHANDLER exit(-1); // Watcher should restart us } else { m = [NSString stringWithFormat: @"Already terminating by %@\n", [terminating fireDate]]; } } else if (comp(wd, @"all") == 0) { a = clients; reason = [NSString stringWithFormat: @"Console 'restart all' from '%@'", f]; } else { a = [self findAll: clients byAbbreviation: wd]; reason = [NSString stringWithFormat: @"Console 'restart ...' from '%@'", f]; } if (a != nil) { unsigned i; BOOL found = NO; for (i = 0; i < [a count]; i++) { EcClientI *c = [a objectAtIndex: i]; NS_DURING { LaunchInfo *l; l = [LaunchInfo existing: [c name]]; if ([l isActive]) { NSString *when = @"shortly"; if ([l hungDate] > 0.0) { when = [NSString stringWithFormat: @"in about %d seconds", (int)quitTime]; } m = [m stringByAppendingFormat: @" The process '%@' should restart %@.\n", [l name], when]; if ([l hungDate] > 0.0 && NO == [l isStopping]) { [self hungRestart: l]; } else { [l restart: reason]; } found = YES; } } NS_HANDLER { NSLog(@"Caught exception: %@", localException); } NS_ENDHANDLER } if (NO == found) { m = [NSString stringWithFormat: @"Nothing to restart as '%@'\n", wd]; } } } else { m = @"Restart what?.\n"; } } else if (matchCmd(wd, @"resume", allow)) { if (NO == launchEnabled) { [self performSelector: @selector(enableLaunching) withObject: nil afterDelay: 0.01]; m = @"Launching will be resumed.\n"; } else { m = @"Launching was/is not suspended.\n"; } } else if (matchCmd(wd, @"status", allow)) { m = [self description]; if ([(wd = cmdWord(cmd, 1)) length] > 0) { LaunchInfo *l = [LaunchInfo find: wd]; if (nil == l) { m = [m stringByAppendingFormat: @"\nUnable to find '%@' in the launchable processes.\n", wd]; } else { NSString *n = [l name]; if ([self findIn: clients byName: n] != nil) { m = [m stringByAppendingFormat: @"\nProcess '%@' is running.", n]; } else if ([l isStarting]) { m = [m stringByAppendingFormat: @"\nProcess '%@' is starting since %@", n, [l startingDate]]; if ([l processIdentifier] > 0) { m = [m stringByAppendingFormat: @" (last launch at %@)", [l launchDate]]; } else { NSArray *u = [l unfulfilled]; if ([u count] > 0) { m = [m stringByAppendingFormat: @", waiting for %@", u]; } } m = [m stringByAppendingString: @"."]; } m = [m stringByAppendingFormat: @"\n%@\n", l]; } } } else if (matchCmd(wd, @"suspend", allow)) { if (NO == launchEnabled) { m = @"Launching was/is already suspended.\n"; } else { [self disableLaunching]; m = @"Launching is now suspended.\n"; } } else if (matchCmd(wd, @"tell", nil)) { wd = cmdWord(cmd, 1); if ([wd length] > 0) { NSString *dest = AUTORELEASE(RETAIN(wd)); [cmd removeObjectAtIndex: 0]; [cmd removeObjectAtIndex: 0]; if (comp(dest, @"all") == 0) { unsigned i; NSArray *a = [[NSArray alloc] initWithArray: clients]; for (i = 0; i < [a count]; i++) { EcClientI* c = [a objectAtIndex: i]; if ([clients indexOfObjectIdenticalTo: c]!=NSNotFound) { NS_DURING { NSData *dat = [NSPropertyListSerialization dataFromPropertyList: cmd format: NSPropertyListBinaryFormat_v1_0 errorDescription: 0]; [[c obj] cmdMesgData: dat from: f]; m = @"Sent message.\n"; } NS_HANDLER { NSLog(@"Caught exception: %@", localException); } NS_ENDHANDLER } } } else { NSArray *a; a = [self findAll: clients byAbbreviation: dest]; if ([a count] == 0) { m = [NSString stringWithFormat: @"No such client as '%@' on '%@'\n", dest, host]; } else { unsigned i; m = nil; for (i = 0; i < [a count]; i++) { EcClientI *c = [a objectAtIndex: i]; NS_DURING { NSData *dat = [NSPropertyListSerialization dataFromPropertyList: cmd format: NSPropertyListBinaryFormat_v1_0 errorDescription: 0]; [[c obj] cmdMesgData: dat from: f]; if (m == nil) { m = [NSString stringWithFormat: @"Sent message to %@", [c name]]; } else { m = [m stringByAppendingFormat: @", %@", [c name]]; } } NS_HANDLER { NSLog(@"Caught exception: %@", localException); if (m == nil) { m = @"Failed to send message!"; } else { m = [m stringByAppendingFormat: @", failed to send to %@", [c name]]; } } NS_ENDHANDLER } if (m != nil) m = [m stringByAppendingString: @"\n"]; } } } else { m = @"Tell where?.\n"; } } else { m = [NSString stringWithFormat: @"Unknown command - '%@'\n", wd]; } [self reply: m to: f from: ecFullName()]; } else { EcClientI *client = [self findIn: clients byName: t]; if (client) { NS_DURING { NSData *dat = [NSPropertyListSerialization dataFromPropertyList: cmd format: NSPropertyListBinaryFormat_v1_0 errorDescription: 0]; [[client obj] cmdMesgData: dat from: f]; } NS_HANDLER { NSLog(@"Caught exception: %@", localException); } NS_ENDHANDLER } else { NSString *m; m = [NSString stringWithFormat: @"command to unregistered client '%@'", t]; [self information: cmdLogFormat(LT_ERROR, m) from: nil to: f type: LT_ERROR]; } } } - (NSData *) configurationFor: (NSString *)name { NSMutableDictionary *dict; NSString *base; NSRange r; id o; if (nil == config || 0 == [name length]) { return nil; // Not available } r = [name rangeOfString: @"-" options: NSBackwardsSearch | NSLiteralSearch]; if (r.length > 0) { NSString *inst = [name substringFromIndex: NSMaxRange(r)]; NSUInteger len = [inst length]; base = [name substringToIndex: r.location]; if ([inst length] == 0) { base = nil; // Not an instance ID after hyphen } else { while (len-- > 0) { if (!isdigit([inst characterAtIndex: len])) { base = nil; // not positive integer after hyphen break; } } } } else { base = nil; } dict = [NSMutableDictionary dictionaryWithCapacity: 2]; o = [config objectForKey: @"*"]; if (o != nil) { [dict setObject: o forKey: @"*"]; } o = [config objectForKey: name]; // Lookup config if (base != nil) { if (nil == o) { /* No instance specific config found for process, * try using the base process name without instance ID. */ o = [config objectForKey: base]; } else { id tmp; /* We found instance specific configuration for the process, * so we merge by taking values from generic process config * (if any) and overwriting them with instance specific values. */ tmp = [config objectForKey: base]; if ([tmp isKindOfClass: [NSDictionary class]] && [o isKindOfClass: [NSDictionary class]]) { tmp = [[tmp mutableCopy] autorelease]; [tmp addEntriesFromDictionary: o]; o = tmp; } } } if (o != nil) { [dict setObject: o forKey: name]; } o = [config objectForKey: @"Operators"]; if (o != nil) { [dict setObject: o forKey: @"Operators"]; } return [NSPropertyListSerialization dataFromPropertyList: dict format: NSPropertyListBinaryFormat_v1_0 errorDescription: 0]; } - (BOOL) connection: (NSConnection*)ancestor shouldMakeNewConnection: (NSConnection*)newConn { [[NSNotificationCenter defaultCenter] addObserver: self selector: @selector(connectionBecameInvalid:) name: NSConnectionDidDieNotification object: (id)newConn]; [newConn setDelegate: self]; return YES; } - (id) connectionBecameInvalid: (NSNotification*)notification { id conn = [notification object]; [[NSNotificationCenter defaultCenter] removeObserver: self name: NSConnectionDidDieNotification object: conn]; if ([conn isKindOfClass: [NSConnection class]]) { NSMutableArray *c; NSMutableString *l = [NSMutableString stringWithCapacity: 20]; NSMutableString *e = [NSMutableString stringWithCapacity: 20]; NSMutableString *m = [NSMutableString stringWithCapacity: 20]; BOOL lostClients = NO; NSUInteger i; if (control && [(NSDistantObject*)control connectionForProxy] == conn) { [[self logFile] puts: @"Lost connection to control server.\n"]; DESTROY(control); } /* Remove any clients using this connection from the active list. * Clients which have not been registered (or which have been * unregistered) will not be in the list. */ c = AUTORELEASE([clients mutableCopy]); i = [c count]; while (i-- > 0) { EcClientI *o = [c objectAtIndex: i]; if ([(id)[o obj] connectionForProxy] == conn) { LaunchInfo *l = [LaunchInfo existing: [o name]]; BOOL failedToUnregister = NO; /* Unless this is a transient process, it should have * unregistered. */ if (NO == [o unregistered] && NO == [o transient]) { failedToUnregister = YES; } lostClients = YES; [self removeClient: o cleanly: failedToUnregister ? NO : YES]; /* If this client was associated with a LaunchInfo instance, * we need to shut it down. */ if ([l client] == o) { [l clearClient: o cleanly: failedToUnregister ? NO : YES]; [l stopping: nil]; } } } [c removeAllObjects]; if ([l length] > 0) { [[self logFile] puts: l]; } if ([m length] > 0) { [self information: m from: nil to: nil type: LT_ALERT]; } if ([e length] > 0) { [self information: e from: nil to: nil type: LT_ERROR]; } if (lostClients) { [self update]; } } else { [self error: "non-Connection sent invalidation"]; } return self; } - (BOOL) contactControl { static BOOL trying = NO; if (nil == control && NO == trying) { NSUserDefaults *defs; NSString *ctlName; NSString *ctlHost; id c; trying = YES; defs = [self cmdDefaults]; ctlName = [defs stringForKey: @"ControlName"]; if (ctlName == nil) { ctlName = @"Control"; } if (nil != (ctlHost = [NSHost controlWellKnownName])) { /* Map to operating system host name. */ ctlHost = [[NSHost hostWithWellKnownName: ctlHost] name]; } if (nil == ctlHost) { ctlHost = @"*"; } NS_DURING { NSLog(@"Connecting to %@ on %@", ctlName, ctlHost); control = (id)[NSConnection rootProxyForConnectionWithRegisteredName: ctlName host: ctlHost usingNameServer: [NSSocketPortNameServer sharedInstance] ]; if (nil == control) { NSLog(@"Connecting to Control server: faled"); } } NS_HANDLER { NSLog(@"Connecting to control server: %@", localException); control = nil; } NS_ENDHANDLER c = control; if (RETAIN(c) != nil) { /* Re-initialise control server ping */ DESTROY(outstanding); fwdSequence = 0; revSequence = 0; [(NSDistantObject*)c setProtocolForProxy: @protocol(Control)]; c = [(NSDistantObject*)c connectionForProxy]; [c setDelegate: self]; [[NSNotificationCenter defaultCenter] addObserver: self selector: @selector(connectionBecameInvalid:) name: NSConnectionDidDieNotification object: c]; NS_DURING { NSData *dat; dat = [control registerCommand: self name: host]; if (nil == dat) { // Control server not yet ready. DESTROY(control); NSLog(@"Registering %@ with Control server: not ready yet", host); } else { NSMutableDictionary *conf; conf = [NSPropertyListSerialization propertyListWithData: dat options: NSPropertyListMutableContainers format: 0 error: 0]; if ([conf objectForKey: @"rejected"] == nil) { [self updateConfig: dat]; NSLog(@"Registered %@ with Control server", host); } else { NSLog(@"Registering %@ with Control server: %@", host, [conf objectForKey: @"rejected"]); DESTROY(control); } } } NS_HANDLER { NSLog(@"Registering %@ with Control server: %@", host, localException); DESTROY(control); } NS_ENDHANDLER if (control != nil) { [self update]; } } trying = NO; } return (nil == control) ? NO : YES; } - (void) dealloc { [self cmdLogEnd: logname]; if (timer != nil) { [timer invalidate]; } DESTROY(control); RELEASE(host); RELEASE(clients); RELEASE(launchInfo); RELEASE(environment); RELEASE(outstanding); [super dealloc]; } - (NSString*) description { NSMutableString *m; m = [NSMutableString stringWithFormat: @"%@ running since %@\n", [super description], [self ecStarted]]; if (NO == launchEnabled) { [m appendString: @" Launching is currently suspended.\n"]; } [m appendFormat: @" %@\n", [LaunchInfo description]]; [m appendFormat: @" Debug Compress/Delete after %d/%d days.\n", (int)debCompressAfter, (int)debDeleteAfter]; [m appendFormat: @" Log Compress/Delete after %d/%d days.\n", (int)logCompressAfter, (int)logDeleteAfter]; return m; } - (NSDictionary*) environment { return environment; } - (NSArray*) findAll: (NSArray*)a byAbbreviation: (NSString*)s { NSMutableArray *r = [NSMutableArray arrayWithCapacity: 4]; int i; /* * Special case - a numeric value is used as an index into the array. */ if (isdigit(*[s cString])) { i = [s intValue]; if (i >= 0 && i < (int)[a count]) { [r addObject: [a objectAtIndex: i]]; } } else { EcClientI *o; for (i = 0; i < (int)[a count]; i++) { o = (EcClientI*)[a objectAtIndex: i]; if (comp(s, [o name]) == 0 || comp_len == (int)[s length]) { [r addObject: o]; } } } return r; } - (EcClientI*) findIn: (NSArray*)a byAbbreviation: (NSString*)s { EcClientI *o; int i; int best_pos = -1; int best_len = 0; /* * Special case - a numeric value is used as an index into the array. */ if (isdigit(*[s cString])) { i = [s intValue]; if (i >= 0 && i < (int)[a count]) { return (EcClientI*)[a objectAtIndex: i]; } } for (i = 0; i < (int)[a count]; i++) { o = (EcClientI*)[a objectAtIndex: i]; if (comp(s, [o name]) == 0) { return o; } if (comp_len > best_len) { best_len = comp_len; best_pos = i; } } if (best_pos >= 0) { return (EcClientI*)[a objectAtIndex: best_pos]; } return nil; } - (EcClientI*) findIn: (NSArray*)a byName: (NSString*)s { EcClientI *o; int i; for (i = 0; i < (int)[a count]; i++) { o = (EcClientI*)[a objectAtIndex: i]; if (comp([o name], s) == 0) { return o; } } return nil; } - (EcClientI*) findIn: (NSArray*)a byObject: (id)s { EcClientI *o; int i; for (i = 0; i < (int)[a count]; i++) { o = (EcClientI*)[a objectAtIndex: i]; if ([o obj] == s) { return o; } } return nil; } - (void) flush { /* * Flush logs to disk ... dummy method as we don't cache them at present. */ } - (NSString*) host { return host; } - (void) information: (NSString*)inf from: (NSString*)s type: (EcLogType)t { [self information: inf from: s to: nil type: t]; } - (void) information: (NSString*)inf from: (NSString*)s to: (NSString*)d type: (EcLogType)t { if (t != LT_DEBUG && inf != nil && [inf length] > 0) { if (NO == [self contactControl]) { NSLog(@"Information (from:%@ to:%@ type:%d) with no Control -\n%@", s, d, t, inf); } else { NS_DURING { [control information: inf type: t to: d from: s]; } NS_HANDLER { NSLog(@"Sending %@ from %@ to %@ type %x exception: %@", inf, s, d, t, localException); } NS_ENDHANDLER } } } - (id) initWithDefaults: (NSDictionary*)defs { ecSetLogsSubdirectory(@"Logs"); if (nil != (self = [super initWithDefaults: defs])) { [LaunchInfo class]; debUncompressed = 0.0; debUndeleted = 0.0; logUncompressed = 0.0; logUndeleted = 0.0; nodesFree = 0.1; spaceFree = 0.1; logname = [[self cmdName] stringByAppendingPathExtension: @"log"]; RETAIN(logname); if ([self logFile] == nil) { exit(0); } host = RETAIN([[NSHost currentHost] wellKnownName]); clients = [[NSMutableArray alloc] initWithCapacity: 10]; } return self; } - (void) killAll { #ifndef __MINGW__ NSEnumerator *e; EcClientI *c; LaunchInfo *l; /* Kill any known clients which are *not* configured with launch info * This could be transient or manually launched processes. */ e = [[self unconfiguredClients] objectEnumerator]; while (nil != (c = [e nextObject])) { int p = [c processIdentifier]; if (p > 0) { kill(p, SIGKILL); } } /* Now mark all configured clients to be shut down (so we won't restart any) * and kill any running processes we know about. */ e = [launchInfo objectEnumerator]; while (nil != (l = [e nextObject])) { int p = [l processIdentifier]; [l stop: @"killed shutdown/remote"]; if (p > 0) { kill(p, SIGKILL); } } #endif } - (BOOL) launch: (NSString*)name { LaunchInfo *l = [LaunchInfo existing: name]; if (nil == l) { NSString *m; m = [NSString stringWithFormat: cmdLogFormat(LT_CONSOLE, @"unrecognized name to launch %@"), name]; [self information: m from: nil to: nil type: LT_CONSOLE]; return NO; } else { [l start: @"remote API request"]; } return YES; } - (void) logChange: (NSString*)change for: (NSString*)name { NSString *s; NSLog(@"%@ process with name '%@' on %@", change, name, host); s = [NSString stringWithFormat: @"%@ %@ process with name '%@' on %@\n", [NSDate date], change, name, host]; [[self logFile] puts: s]; [self information: s from: nil to: nil type: LT_CONSOLE]; [self update]; } - (NSFileHandle*) logFile { return [self cmdLogFile: logname]; } - (void) logMessage: (NSString*)msg type: (EcLogType)t for: (id)o { EcClientI *r = [self findIn: clients byObject: o]; NSString *c; if (r == nil) { c = @"unregistered client"; } else { c = [r name]; } [self logMessage: msg type: t name: c]; } - (void) logMessage: (NSString*)msg type: (EcLogType)t name: (NSString*)c { NSString *m; switch (t) { case LT_DEBUG: m = msg; break; case LT_WARNING: m = msg; break; case LT_ERROR: m = msg; break; case LT_AUDIT: m = msg; break; case LT_ALERT: m = msg; break; case LT_CONSOLE: m = msg; break; default: m = [NSString stringWithFormat: @"%@: Message of unknown type - %@", c, msg]; break; } [[self logFile] puts: m]; [self information: m from: c to: nil type: t]; } - (void) quitAll { NSEnumerator *e; LaunchInfo *l; EcClientI *c; e = [launchInfo objectEnumerator]; while (nil != (l = [e nextObject])) { [l setManual: YES]; // manually stopped [l stop: @"quit all instruction"]; [l checkAbandonedStartup]; } e = [[self unconfiguredClients] objectEnumerator]; while (nil != (c = [e nextObject])) { NS_DURING { [[c obj] cmdQuit: 0]; } NS_HANDLER { NSLog(@"Caught exception: %@", localException); } NS_ENDHANDLER } } /* * Handle a request for re-config from a client. */ - (void) requestConfigFor: (id)c { EcClientI *info = [self findIn: clients byObject: (id)c]; NSData *conf = [info config]; if (nil != conf) { NS_DURING { [[info obj] updateConfig: conf]; } NS_HANDLER { NSLog(@"Sending config to client: %@", localException); } NS_ENDHANDLER } } - (NSData*) registerClient: (id)c identifier: (int)p name: (NSString*)n transient: (BOOL)t { LaunchInfo *l = [LaunchInfo existing: n]; NSMutableDictionary *dict; EcClientI *obj; EcClientI *old; [(NSDistantObject*)c setProtocolForProxy: @protocol(CmdClient)]; if (nil == config) { [self logChange: @"back-off" for: n]; dict = [NSMutableDictionary dictionaryWithCapacity: 1]; [dict setObject: @"configuration data not yet available." forKey: @"back-off"]; return [NSPropertyListSerialization dataFromPropertyList: dict format: NSPropertyListBinaryFormat_v1_0 errorDescription: 0]; } /* Do we already have this registered? */ if ([l processIdentifier] == p && (obj = [l client]) != nil) { [self logChange: @"re-registered" for: [l name]]; if ([l stable] == YES) { [self clearAll: [l name] addText: @"process re-registered"]; } return [obj config]; } /* * Create a new reference for this client. */ obj = [[EcClientI alloc] initFor: c name: n with: self]; if ((old = [self findIn: clients byName: n]) == nil) { NSData *d; [clients addObject: obj]; RELEASE(obj); [clients sortUsingSelector: @selector(compare:)]; [obj setProcessIdentifier: p]; if (nil == l) { l = [LaunchInfo launchInfo: n]; } if (t == YES) { [obj setTransient: YES]; } else { [obj setTransient: NO]; } [l setClient: obj]; [self logChange: @"registered" for: [l name]]; d = [self configurationFor: n]; if (nil != d) { [obj setConfig: d]; } return [obj config]; } else { /* Rejecting means the client is not registered (and therefore should * not be told to quit when the objct is deallocated. */ [obj setUnregistered: YES]; RELEASE(obj); [self logChange: @"rejected" for: n]; dict = [NSMutableDictionary dictionaryWithCapacity: 1]; [dict setObject: @"client with that name already registered." forKey: @"rejected"]; return [NSPropertyListSerialization dataFromPropertyList: dict format: NSPropertyListBinaryFormat_v1_0 errorDescription: 0]; } [self update]; } - (void) reply: (NSString*) msg to: (NSString*)n from: (NSString*)c { if ([self contactControl]) { NS_DURING { [control reply: msg to: n from: c]; } NS_HANDLER { NSLog(@"reply: %@ to: %@ from: %@ - %@", msg, n, c, localException); } NS_ENDHANDLER } else { NSLog(@"reply: %@ to: %@ from: %@ - discarded (no connection to Control)", msg, n, c); } } - (BOOL) restart: (NSString*)name reason: (NSString*)reason { LaunchInfo *l = [LaunchInfo existing: name]; if (nil == l) { NSString *m; m = [NSString stringWithFormat: cmdLogFormat(LT_CONSOLE, @"unrecognized name to restart %@"), name]; [self information: m from: nil to: nil type: LT_CONSOLE]; return NO; } else { [l restart: reason]; } return YES; } - (NSString*) makeSpace { NSInteger purgeAfter; NSTimeInterval latestDeleteAt; NSTimeInterval now; NSTimeInterval ti; NSFileManager *mgr; NSCalendarDate *when; NSString *logs; NSString *file; NSString *gone; NSAutoreleasePool *arp; gone = nil; arp = [NSAutoreleasePool new]; when = [NSCalendarDate date]; now = [when timeIntervalSinceReferenceDate]; logs = [[self ecUserDirectory] stringByAppendingPathComponent: @"DebugLogs"]; /* When trying to make space, we can delete up to the point when we * would start compressing but no further ... we don't want to delete * all debug! */ purgeAfter = debCompressAfter; mgr = [NSFileManager defaultManager]; if (0.0 == debUndeleted) { debUndeleted = now - 365.0 * day; } ti = debUndeleted; latestDeleteAt = now - day * purgeAfter; while (nil == gone && ti < latestDeleteAt) { when = date(ti); file = [[logs stringByAppendingPathComponent: [when descriptionWithCalendarFormat: @"%Y-%m-%d"]] stringByStandardizingPath]; if ([mgr fileExistsAtPath: file]) { [mgr removeFileAtPath: file handler: nil]; gone = [when descriptionWithCalendarFormat: @"%Y-%m-%d"]; } ti += day; } debUndeleted = ti; RETAIN(gone); DESTROY(arp); return AUTORELEASE(gone); } - (void) _sweep: (BOOL)deb at: (NSCalendarDate*)when { NSTimeInterval uncompressed; NSTimeInterval undeleted; NSTimeInterval latestCompressAt; NSTimeInterval latestDeleteAt; NSTimeInterval now; NSTimeInterval ti; NSInteger deleteAfter; NSInteger compressAfter; NSFileManager *mgr; NSString *dir; NSString *file; NSAutoreleasePool *arp; arp = [NSAutoreleasePool new]; now = [when timeIntervalSinceReferenceDate]; /* get number of days after which to do log compression/deletion. */ if (deb) { compressAfter = debCompressAfter; deleteAfter = debDeleteAfter; } else { compressAfter = logCompressAfter; deleteAfter = logDeleteAfter; } mgr = [[NSFileManager new] autorelease]; dir = [self ecUserDirectory]; if (YES == deb) { dir = [dir stringByAppendingPathComponent: @"DebugLogs"]; uncompressed = debUncompressed; undeleted = debUndeleted; } else { dir = [dir stringByAppendingPathComponent: @"Logs"]; uncompressed = logUncompressed; undeleted = logUndeleted; } latestDeleteAt = now - day * deleteAfter; if (undeleted < latestDeleteAt) { NSDirectoryEnumerator *enumerator; NSString *file; NSCalendarDate *when; unsigned limit; when = date(latestDeleteAt); limit = ([when yearOfCommonEra] * 100 + [when monthOfYear]) * 100 + [when dayOfMonth]; enumerator = [mgr enumeratorAtPath: dir]; while ((file = [enumerator nextObject]) != nil) { if ([file length] == 10) { const char *s = [file UTF8String]; unsigned y, m, d; if (sscanf(s, "%04u-%02u-%02u", &y, &m, &d) == 3) { int dayNumber = (y * 100 + m) * 100 + d; if (dayNumber < limit) { file = [dir stringByAppendingPathComponent: file]; if (NO == [mgr removeFileAtPath: file handler: nil]) { NSLog(@"Failed to delete old logs at %@", file); } } } } } undeleted = latestDeleteAt; } if (YES == deb) debUndeleted = undeleted; else logUndeleted = undeleted; if (uncompressed < undeleted) { uncompressed = undeleted; } ti = uncompressed; latestCompressAt = now - day * compressAfter; while (ti < latestCompressAt) { NSAutoreleasePool *pool = [NSAutoreleasePool new]; NSDirectoryEnumerator *enumerator; BOOL isDirectory; NSString *base; when = date(ti); base = [[dir stringByAppendingPathComponent: [when descriptionWithCalendarFormat: @"%Y-%m-%d"]] stringByStandardizingPath]; if ([mgr fileExistsAtPath: base isDirectory: &isDirectory] == NO || NO == isDirectory) { ti += day; [pool release]; continue; // No log directory for this date. } enumerator = [mgr enumeratorAtPath: base]; while ((file = [enumerator nextObject]) != nil) { NSString *src; NSString *dst; NSFileHandle *sh; NSFileHandle *dh; NSDictionary *a; NSData *d; if (YES == [[file pathExtension] isEqualToString: @"gz"]) { continue; // Already compressed } a = [enumerator fileAttributes]; if (NSFileTypeRegular != [a fileType]) { continue; // Not a regular file ... can't compress } src = [base stringByAppendingPathComponent: file]; if ([a fileSize] == 0) { [mgr removeFileAtPath: src handler: nil]; continue; // Nothing to compress } if ([a fileSize] >= [[[mgr fileSystemAttributesAtPath: src] objectForKey: NSFileSystemFreeSize] integerValue]) { [mgr removeFileAtPath: src handler: nil]; EcExceptionMajor(nil, @"Unable to compress %@ (too big; deleted)", src); continue; // Not enough space free to compress } dst = [src stringByAppendingPathExtension: @"gz"]; if ([mgr fileExistsAtPath: dst isDirectory: &isDirectory] == YES) { [mgr removeFileAtPath: dst handler: nil]; } [mgr createFileAtPath: dst contents: nil attributes: nil]; dh = [NSFileHandle fileHandleForWritingAtPath: dst]; if (NO == [dh useCompression]) { [dh closeFile]; [mgr removeFileAtPath: dst handler: nil]; EcExceptionMajor(nil, @"Unable to compress %@ to %@", src, dst); continue; } sh = nil; NS_DURING { NSAutoreleasePool *inner; sh = [NSFileHandle fileHandleForReadingAtPath: src]; inner = [NSAutoreleasePool new]; while ([(d = [sh readDataOfLength: 1000000]) length] > 0) { [dh writeData: d]; [inner release]; inner = [NSAutoreleasePool new]; } [inner release]; [sh closeFile]; [dh closeFile]; [mgr removeFileAtPath: src handler: nil]; } NS_HANDLER { [mgr removeFileAtPath: dst handler: nil]; [sh closeFile]; [dh closeFile]; } NS_ENDHANDLER } ti += day; [pool release]; } if (YES == deb) debUncompressed = ti; else logUncompressed = ti; DESTROY(arp); } /* Perform this one in another thread. * The sweep operation may compress really large logfiles and could be * very slow, so it's performed in a separate thread to avoid blocking * normal operations. */ - (void) sweep: (NSCalendarDate*)when { if (nil == when) { when = [NSCalendarDate date]; } [self _sweep: YES at: when]; [self _sweep: NO at: when]; sweeping = NO; } - (void) ecNewHour: (NSCalendarDate*)when { if (sweeping == YES) { NSLog(@"Argh - nested hourly sweep attempt"); return; } sweeping = YES; [NSThread detachNewThreadSelector: @selector(sweep:) toTarget: self withObject: when]; } /* * Tell all our clients to quit, and wait for them to do so. * If called while already terminating ... force immediate shutdown. */ - (void) _terminate: (NSTimer*)t { NSTimeInterval ti = [terminateBy timeIntervalSinceNow]; if ([clients count] == 0 && [LaunchInfo launching] == 0) { [self information: @"Final shutdown." from: nil to: nil type: LT_CONSOLE]; [terminating invalidate]; terminating = nil; [self cmdQuit: tStatus]; } else if (ti <= 0.0) { [[self logFile] puts: @"Final shutdown.\n"]; [terminating invalidate]; terminating = nil; [self killAll]; [self cmdQuit: tStatus]; } else { [self quitAll]; terminating = [NSTimer scheduledTimerWithTimeInterval: ti + 1.0 target: self selector: _cmd userInfo: nil repeats: NO]; } } - (void) terminate: (NSDate*)by { NSTimeInterval ti = 30.0; if (nil != terminateBy) { NSString *msg; msg = [NSString stringWithFormat: @"Terminate requested," @" but already terminating by %@", terminateBy]; [self information: msg from: nil to: nil type: LT_CONSOLE]; return; } if (nil != by) { ti = [by timeIntervalSinceNow]; if (ti < 0.5) { ti = 0.5; by = nil; } else if (ti > 900.0) { ti = 900.0; by = nil; } } if (nil == by) { by = [NSDate dateWithTimeIntervalSinceNow: ti]; } ASSIGN(terminateBy, by); [self information: @"Terminate initiated.\n" from: nil to: nil type: LT_CONSOLE]; terminating = [NSTimer scheduledTimerWithTimeInterval: 0.01 target: self selector: @selector(_terminate:) userInfo: nil repeats: NO]; } - (void) terminate { [self terminate: nil]; } - (void) housekeeping: (NSTimer*)t { static EcAlarm *dbgNodes = nil; static EcAlarm *dbgSpace = nil; static EcAlarm *logNodes = nil; static EcAlarm *logSpace = nil; static NSTimeInterval nextLog = 0.0; static BOOL inTimeout = NO; NSDate *now = [t fireDate]; EcAlarm *alarm; if (t == timer) { timer = nil; } if (now == nil) { now = [NSDate date]; } [[self logFile] synchronizeFile]; if (inTimeout == NO) { static unsigned pingControlCount = 0; NSFileManager *mgr; NSDictionary *d; NSMutableArray *a; NSString *s; float f; unsigned count; BOOL lost = NO; inTimeout = YES; [self contactControl]; a = AUTORELEASE([clients mutableCopy]); count = [a count]; while (count-- > 0) { EcClientI *r = [a objectAtIndex: count]; LaunchInfo *l = [LaunchInfo existing: [r name]]; NSDate *d = [r outstanding]; NSString *s; NSTimeInterval delay = 0.0; if ([clients indexOfObjectIdenticalTo: r] == NSNotFound) { continue; } s = [[l configuration] objectForKey: @"PingTime"]; if ([s respondsToSelector: @selector(intValue)]) { delay = (NSTimeInterval)[s intValue]; } if (delay <= 0.0) { delay = pingTime; // Default } if (d != nil && [d timeIntervalSinceDate: now] < -delay) { if ([l hungDate] > 0.0) { id r; /* We have what looks like a hung process: * See if it is not yet stopping and is configured to * restart after an interval when hung. */ if (NO == [l isStopping] && (r = [[l configuration] objectForKey: @"HungTime"]) && [r respondsToSelector: @selector(intValue)]) { NSTimeInterval ti; /* See how long the process has been hung for, * and if it exceeds the time allowed, restart. */ ti = [now timeIntervalSinceReferenceDate]; ti -= [l hungDate]; if (ti > (NSTimeInterval)[r intValue]) { [self hungRestart: l]; } } } else { NSString *m; [l setHung]; m = [NSString stringWithFormat: @"failed to respond for over %d seconds", (int)delay]; [self alarmCode: ACProcessHung procName: [r name] addText: m]; m = [NSString stringWithFormat: cmdLogFormat(LT_CONSOLE, @"Client '%@' failed to respond for over %d seconds"), [r name], (int)delay]; [self information: m from: nil to: nil type: LT_CONSOLE]; } } } [a removeAllObjects]; if (control != nil && outstanding != nil && [outstanding timeIntervalSinceDate: now] < -pingTime) { NSString *m; m = [NSString stringWithFormat: cmdLogFormat(LT_CONSOLE, @"Control server failed to respond for over %d seconds"), (int)pingTime]; [[(NSDistantObject*)control connectionForProxy] invalidate]; [self information: m from: nil to: nil type: LT_CONSOLE]; lost = YES; } if (lost == YES) { [self update]; } /* We ping each client in turn. */ a = AUTORELEASE([clients mutableCopy]); count = [a count]; while (count-- > 0) { EcClientI *r = [a objectAtIndex: count]; if ([clients indexOfObjectIdenticalTo: r] != NSNotFound) { [r ping]; } } // Ping the control server too - once every four times. pingControlCount++; if (pingControlCount >= 4) { pingControlCount = 0; } if (pingControlCount == 0) { [self pingControl]; } /* See if the filesystem containing our logging directory has enough * space. */ mgr = [NSFileManager defaultManager]; s = [[self ecUserDirectory] stringByAppendingPathComponent: @"DebugLogs"]; d = [mgr fileSystemAttributesAtPath: s]; f = [[d objectForKey: NSFileSystemFreeSize] floatValue] / [[d objectForKey: NSFileSystemSize] floatValue]; if (f <= spaceFree) { static NSDate *last = nil; if (nil == last || [last timeIntervalSinceNow] < -DLY) { EcAlarmSeverity severity = EcAlarmSeverityMajor; NSString *m; m = [self makeSpace]; ASSIGN(last, [NSDate date]); if ([m length] == 0) { m = [NSString stringWithFormat: @"at %02.1f percent", f * 100.0]; } else { m = [NSString stringWithFormat: @"at %02.1f percent" @" - deleted debug logs from %@ to make space", f * 100.0, m]; severity = EcAlarmSeverityCritical; } alarm = [EcAlarm alarmForManagedObject: nil at: nil withEventType: EcAlarmEventTypeProcessingError probableCause: EcAlarmStorageCapacityProblem specificProblem: @"Debug disk space" perceivedSeverity: severity proposedRepairAction: @"Make space on disk partition" @" and turn off excessive debug" additionalText: m]; ASSIGN(dbgSpace, alarm); [self alarm: alarm]; } } else if (dbgSpace) { alarm = [dbgSpace clear]; DESTROY(dbgSpace); [self alarm: alarm]; } f = [[d objectForKey: NSFileSystemFreeNodes] floatValue] / [[d objectForKey: NSFileSystemNodes] floatValue]; if (f <= nodesFree) { static NSDate *last = nil; if (nil == last || [last timeIntervalSinceNow] < -DLY) { EcAlarmSeverity severity = EcAlarmSeverityMajor; NSString *m; m = [self makeSpace]; ASSIGN(last, [NSDate date]); if ([m length] == 0) { m = [NSString stringWithFormat: @"at %02.1f percent", f * 100.0]; } else { m = [NSString stringWithFormat: @"at %02.1f percent" @" - deleted debug logs from %@ to make space", f * 100.0, m]; } alarm = [EcAlarm alarmForManagedObject: nil at: nil withEventType: EcAlarmEventTypeProcessingError probableCause: EcAlarmStorageCapacityProblem specificProblem: @"Debug disk nodes" perceivedSeverity: severity proposedRepairAction: @"Make space on disk partition" @" and turn off excessive debug" additionalText: m]; ASSIGN(dbgNodes, alarm); [self alarm: alarm]; } } else if (dbgNodes) { alarm = [dbgNodes clear]; DESTROY(dbgNodes); [self alarm: alarm]; } s = [[self ecUserDirectory] stringByAppendingPathComponent: @"Logs"]; d = [mgr fileSystemAttributesAtPath: s]; f = [[d objectForKey: NSFileSystemFreeSize] floatValue] / [[d objectForKey: NSFileSystemSize] floatValue]; if (f <= spaceFree) { static NSDate *last = nil; if (nil == last || [last timeIntervalSinceNow] < -DLY) { NSString *m; ASSIGN(last, [NSDate date]); m = [NSString stringWithFormat: @"Disk space at %02.1f percent", f * 100.0]; alarm = [EcAlarm alarmForManagedObject: nil at: nil withEventType: EcAlarmEventTypeProcessingError probableCause: EcAlarmStorageCapacityProblem specificProblem: @"Log disk space" perceivedSeverity: EcAlarmSeverityCritical proposedRepairAction: @"Make space on disk partition." additionalText: m]; ASSIGN(logSpace, alarm); [self alarm: alarm]; } } else if (logSpace) { alarm = [logSpace clear]; DESTROY(logSpace); [self alarm: alarm]; } f = [[d objectForKey: NSFileSystemFreeNodes] floatValue] / [[d objectForKey: NSFileSystemNodes] floatValue]; if (f <= nodesFree) { static NSDate *last = nil; if (nil == last || [last timeIntervalSinceNow] < -DLY) { NSString *m; ASSIGN(last, [NSDate date]); m = [NSString stringWithFormat: @"Disk nodes at %02.1f percent", f * 100.0]; alarm = [EcAlarm alarmForManagedObject: nil at: nil withEventType: EcAlarmEventTypeProcessingError probableCause: EcAlarmStorageCapacityProblem specificProblem: @"Log disk nodes" perceivedSeverity: EcAlarmSeverityCritical proposedRepairAction: @"Make space on disk partition." additionalText: m]; ASSIGN(logNodes, alarm); [self alarm: alarm]; } } else if (logNodes) { alarm = [logNodes clear]; DESTROY(logNodes); [self alarm: alarm]; } if ([now timeIntervalSinceReferenceDate] > nextLog) { NSMutableString *ms = [NSMutableString string]; NSEnumerator *enumerator; NSString *name; nextLog = [now timeIntervalSinceReferenceDate] + 300.0; enumerator = [[[LaunchInfo names] sortedArrayUsingSelector: @selector(compare:)] objectEnumerator]; while (nil != (name = [enumerator nextObject])) { LaunchInfo *l = [LaunchInfo existing: name]; if (l) { [ms appendString: [l description]]; } } NSLog(@"\n#### Launch Information Status Start ####\n" @"%@#### Launch Information Status End ####", ms); } } inTimeout = NO; } - (void) _housekeeping: (NSTimer*)t { NS_DURING [self housekeeping: t]; NS_HANDLER NSLog(@"Problem in timeout: %@", localException); NS_ENDHANDLER if (NO == [timer isValid] && NO == [self ecIsQuitting]) { timer = [NSTimer scheduledTimerWithTimeInterval: 5.0 target: self selector: @selector(_housekeeping:) userInfo: nil repeats: NO]; } } /** Initiate a restart of a hung process */ - (void) hungRestart: (LaunchInfo*)l { NSString *prog; NSTask *task; NSArray *args; NSString *base; NSString *home; NSString *pid; base = [[self ecUserDirectory] stringByAppendingPathComponent: @"DebugLogs"]; home = [[l configuration] objectForKey: @"Home"]; if (home) { home = [home stringByStandardizingPath]; if (![home isAbsolutePath]) { home = [base stringByAppendingPathComponent: home]; } } else { home = base; } pid = [NSString stringWithFormat: @"%d", [l processIdentifier]]; task = [NSTask new]; prog = [[l configuration] objectForKey: @"HungTool"]; if ([prog isKindOfClass: [NSString class]]) { /* Allow an empty tool name to mean that we shold not execute a tool. */ prog = [prog stringByTrimmingSpaces]; if ([prog length] == 0) { NSLog(@"Empty HungTool for %@; No info gathering.", [l name]); } else { [task setLaunchPath: prog]; if ([task validatedLaunchPath] == nil) { NSLog(@"Failed to find HungTool (%@). Fallback to gcore", prog); prog = nil; } } } if (prog) { /* Run the supplied program using the process name and pid as args */ args = [NSArray arrayWithObjects: [l name], pid, nil]; } else { /* Run gcode to get a coredump */ prog = @"gcore"; args = [NSArray arrayWithObjects: pid, nil]; } [task setArguments: args]; [task setLaunchPath: prog]; if (home) { [task setCurrentDirectoryPath: home]; } if ([task validatedLaunchPath] == nil) { if ([prog length] > 0) { NSLog(@"Failed to find gcore to get info for hung process %@", pid); } RELEASE(task); } else { NSString *msg; [task setStandardInput: [NSFileHandle fileHandleWithNullDevice]]; [[NSNotificationCenter defaultCenter] addObserver: self selector: @selector(hungToolTerminated:) name: NSTaskDidTerminateNotification object: task]; msg = [NSString stringWithFormat: @"Gathering hung process info using task %p for %@ (%d).", task, [l name], [l processIdentifier]]; [self information: msg from: nil to: nil type: LT_CONSOLE]; [task launch]; } /* Here we need to run a subprocess to get a core dump of the hung process */ [l restart: @"Process appears to be hung"]; } - (void) hungToolTerminated: (NSNotification*)n { NSTask *task = (NSTask*)[n object]; NSString *msg; if (NSTaskTerminationReasonUncaughtSignal == [task terminationReason]) { msg = [NSString stringWithFormat: @"Gathering hung process info using task %p" @" exited with signal %d.", task, [task terminationStatus]]; } else { msg = [NSString stringWithFormat: @"Gathering hung process info using task %p" @" returned status %d", task, [task terminationStatus]]; } [self information: msg from: nil to: nil type: LT_CONSOLE]; RELEASE(task); } - (NSMutableArray*) unconfiguredClients { NSUInteger i = [clients count]; NSMutableArray *a = nil; while (i-- > 0) { EcClientI *c = [clients objectAtIndex: i]; LaunchInfo *l = [LaunchInfo existing: [c name]]; if (nil == l) { if (nil == a) { a = [NSMutableArray new]; } [a addObject: c]; } } return a; } - (void) removeClient: (EcClientI*)o cleanly: (BOOL)ok { NSString *name = AUTORELEASE(RETAIN([o name])); NSUInteger i; [o setUnregistered: YES]; i = [clients indexOfObjectIdenticalTo: o]; if (i != NSNotFound) { [clients removeObjectAtIndex: i]; } if (ok) { [self logChange: @"unregistered" for: name]; } else { [self logChange: @"lost" for: name]; } } - (void) unregisterByObject: (byref id)obj status: (int)s { EcClientI *o = [self findIn: clients byObject: obj]; if (o != nil) { LaunchInfo *l = [launchInfo objectForKey: [o name]]; /* If we did not launch the process, trust the exit status provided */ NSLog(@"Unregister with status %d", s); [l setTerminationStatus: s]; [self removeClient: o cleanly: YES]; [l clearClient: o cleanly: YES]; [l stopping: nil]; } [self update]; } - (void) update { if (control) { NS_DURING { NSMutableArray *a; int i; a = [NSMutableArray arrayWithCapacity: [clients count]]; for (i = 0; i < (int)[clients count]; i++) { EcClientI *c; c = [clients objectAtIndex: i]; [a addObject: [c name]]; } [control servers: [NSPropertyListSerialization dataFromPropertyList: a format: NSPropertyListBinaryFormat_v1_0 errorDescription: 0] on: self]; } NS_HANDLER { NSLog(@"Exception sending names to Control: %@", localException); } NS_ENDHANDLER } if (terminating != nil && [clients count] == 0) { [self information: @"Final shutdown." from: nil to: nil type: LT_CONSOLE]; [terminating invalidate]; terminating = nil; [self cmdQuit: tStatus]; } } - (void) updateConfig: (NSData*)data { NSMutableDictionary *info; NSMutableDictionary *dict; NSMutableDictionary *newConfig; NSDictionary *operators; NSEnumerator *enumerator; NSString *key; /* Ignore invalid/empty configuration */ if (nil == data) { return; } info = [NSPropertyListSerialization propertyListWithData: data options: NSPropertyListMutableContainers format: 0 error: 0]; if (NO == [info isKindOfClass: [NSMutableDictionary class]] || 0 == [info count]) { return; } newConfig = [NSMutableDictionary dictionaryWithCapacity: 32]; /* * Put all values for this host in the config dictionary. */ dict = [info objectForKey: host]; if (dict) { [newConfig addEntriesFromDictionary: dict]; } /* * Add any default values to the config dictionary where we don't have * host specific values. */ dict = [info objectForKey: @"*"]; if (dict) { enumerator = [dict keyEnumerator]; while ((key = [enumerator nextObject]) != nil) { NSMutableDictionary *partial = [newConfig objectForKey: key]; NSMutableDictionary *general = [dict objectForKey: key]; NSString *app = key; if (partial == nil) { /* * No host-specific info for this application - * Use the general stuff for the application. */ [newConfig setObject: general forKey: key]; } else { NSEnumerator *another = [general keyEnumerator]; /* * Merge in any values for this application which * exist in the general stuff, but not in the host * specific area. */ while ((key = [another nextObject]) != nil) { if ([partial objectForKey: key] == nil) { id obj = [general objectForKey: key]; [partial setObject: obj forKey: key]; } else { [[self logFile] printf: @"General config for %@/%@ overridden by" @" host-specific version\n", app, key]; } } } } } /* * Add the list of operators to the config. */ operators = [info objectForKey: @"Operators"]; if (operators != nil) { [newConfig setObject: operators forKey: @"Operators"]; } [self ecOperators: operators]; /* Finally, replace old config with new if they differ. */ [self newConfig: newConfig]; } - (void) woken: (id)obj { EcClientI *o = [self findIn: clients byObject: obj]; if (o != nil) { LaunchInfo *l = [launchInfo objectForKey: [o name]]; [l awakened]; } } @end