restarting services and terminating processes with mom 2005

this particular example is for softgrid.  i thought it might be useful to generalize it for any purpose, though.  you probably already have services that may require a restart every now and then.  that’s pretty easy in mom.  you can do it by issuing a simple net stop && net start command as illustrated in this post.

the general perception is that admins are lazy.  to help perpetuate this obvious lie, i tried to use the simple method above but failed.  it turns out that some services don’t terminate the processes upon stopping, as you would expect.  short of trying some ridiculously long for loop statements inside of the batch response, you have to go with a script.

i really did consider going with batch script but ended up needing a bit more flexibility.  for instance, instead of blindly going through the cycle, i wanted to make sure we were still in the given condition before we went ahead with it.  to do that, we have to check the process utilization state.  anyway, the script does the following:

  • examines process(es) and the processor utilization rate
  • stops the service
  • terminates the running process(es)
  • starts the service
  • creates a log output
  • stamps the alert description with informative data

of course, we need to give it the process and service name we want it to attack.  for that, you’ll need the following parameters when you set up this script in mom.

  • sProcess – process name
  • iThreshold – threshold that the process utilization must be above
  • sService – service name to restart
  • sLogName – name of log file to generate

a bit more minutia – the script will check the process utilization 10 times in a row, then divide by 10 for the average.  if the average is above the threshold, it goes through the cycle to reset the thing.  you can change all that crap around in the script but is not exposed by parameter.

we’re in testing with opsmgr, so whenever we go live, i’ll have to convert these scripts.  i’ll post them in opsmgr format as i get them prepared.  for now, here’s the mom 2005 version:

'==========================================================================
' NAME: Service/Process Restart
'
' AUTHOR: Marcus C. Oh
' DATE  : 9/15/2008
'
' COMMENT: Recycles runaway processes and services based on a threshold
'          Logs to %windir%\temp directory
'
' VERSION: 1.0
'==========================================================================

' Standard event constants
Const EVENT_TYPE_SUCCESS = 0
Const EVENT_TYPE_ERROR   = 1
Const EVENT_TYPE_WARNING = 2
Const EVENT_TYPE_INFORMATION = 4

' Parameters for MOM
sProcess = ScriptContext.Parameters.Get("Process")
iThreshold = CInt(ScriptContext.Parameters.Get("Threshold"))
sService = ScriptContext.Parameters.Get("Service")
sLogName = ScriptContext.Parameters.Get("LogName")

sComputer = "."
bCycle = False

Set oAlert = ScriptContext.Alert


' Spin up the File System provider and create the log file
Set oShell = CreateObject("Wscript.Shell")
sWinDir = oShell.ExpandEnvironmentStrings("%WinDir%")
Set oFS = CreateObject("Scripting.FileSystemObject")
Set myLogFile = oFS.CreateTextFile(sWinDir & "\temp\" & sLogName,True)


' Spin up WMI
Set oWMIService = GetObject("winmgmts:\\" & sComputer & "\root\cimv2")


' Check the process from the parameter to see if the utilization 
' is currently above the indicated threshold.

myLog "[Starting process cycling...]"

'Set oPerfData = ScriptContext.Perfdata
myLog VbCrLf & vbTab & "Checking process(es) for: " & sProcess

Set cProcessNames = oWMIService.ExecQuery("Select handle from Win32_Process Where Name like '" & sProcess & "%'")
For Each oProcName In cProcessNames
    iLoop = 0
    iProcTime = 0
    myLog vbTab & "Examining process handle " & oProcName.handle
    While iLoop < 10
        Set cProcesses = oWMIService.ExecQuery("Select * From Win32_PerfFormattedData_PerfProc_Process Where IDProcess = '" & oProcName.handle & "'")
        For Each oProcess in cProcesses
            iProcTime = iProcTime + CInt(oProcess.PercentProcessorTime)
            myLog vbTab & oProcess.Name & " utilization aggregate - " & iProcTime & " (sample value - " & CInt(oProcess.PercentProcessorTime) & ")"
        Next
        iLoop = iLoop + 1
        mySleep(1000)
    Wend
    
    myLog vbTab & "Aggregate utilization for process handle " & oProcName.handle & " - " & iProcTime
    
    If iProcTime/10 > iThreshold Then
        myLog vbTab & "Process utilization matches criteria."
        myLog vbTab & "Divided by 10 - " & iProcTime/10
        bCycle = True
        Exit For
    Else
        myLog vbTab & "Process utilization at " & iProcTime/10 & " does not exceed threshold of " & iThreshold & VbCrLf
    End If
Next

If bCycle = True Then
    ' Stop the service.
    Call CommandService(sService,"Stop")
    mySleep(5000)
    
    
    ' Terminate all running processes.
    If VerifyService(sService,"Stopped") Then
        myLog VbCrLf & vbTab & sService & " has stopped successfully."
        myLog VbCrLf & vbTab & "Terminating process(es): " & sProcess
        Call TerminateProcess(sProcess)
    End If
    mySleep(5000)


    ' Start the service.
    Call CommandService(sService,"Start")
    mySleep(10000)
    
    
    'Verify the service started.
    If VerifyService(sService,"Started") Then
        myLog vbTab & sService & " has started successfully."
    Else
        myLog vbTab & sService & " has failed to start."
    End If

    
    ' Rewrite the original description with additional data.
    oAlert.Description = oAlert.Description & VbCrLf & VbCrLf &_
        "Remediation script for runaway processes has been executed." &_
        "Please review the following log for details: " & sWinDir & "\temp\" & sLogName
Else
    myLog vbTab & "Process utilization exceed threshold."
    
    ' Rewrite the original description with additional data.
    oAlert.Description = oAlert.Description & VbCrLf & VbCrLf &_
        "No remediation attempt required."
End If

myLog VbCrLf & "[Stopping process cycling...]"

' Close out the file
myLogFile.Close


' Subs and Functions ------------------------------------------------------

' Start/stop the service
Sub CommandService(sService,sAction)
    Set cServices = oWMIService.ExecQuery("Select * from Win32_Service where Name='" & sService & "'")
    For Each oService in cServices
        myLog VbCrLf & vbTab & sAction & " -- " & sService
        If sAction = "Stop" Then
            oService.StopService()
        ElseIf sAction = "Start" Then
            oService.StartService()
        End If
    Next
End Sub

' Verify the service state
Function VerifyService(sService,sState)
    Set cServices = oWMIService.ExecQuery("Select * From Win32_Service Where Name ='" & sService & "'")
    For Each oService in cServices
        If oService.State = sState Then
            VerifyService = True
        End If
    Next
End Function

' Terminate the processes
Sub TerminateProcess(sSGProcess)
    Set cRunningProcesses = oWMIService.ExecQuery("Select * from Win32_Process Where Name like '" & sSGProcess & "%'")
    For Each oRunningProcess in cRunningProcesses
        oRunningProcess.Terminate()
    Next
End Sub

' General sleep sub to switch between MOM and cmd line
Sub mySleep(iSleep)
    ScriptContext.Sleep(iSleep)
End Sub

Sub myLog(sData)
    myLogFile.WriteLine(sData)
End Sub

' Standard Event creation subroutine
Sub CreateEvent(iEventNumber,iEventType,sEventSource,sEventMessage)
    Set oEvent = ScriptContext.CreateEvent()
    oEvent.EventNumber = iEventNumber
    oEvent.EventType = iEventType 
    oEvent.EventSource = sEventSource
    oEvent.Message = sEventMessage
    ScriptContext.Submit oEvent
End Sub

Comments