OpenVMS Source Code Demos

WATCHDOG

1000	%TITLE "Terminator_xxx.bas"						!
	%IDENT                      "Version_50.1"				!
	declare string constant	k_version = "50.1"			,	!						&
				k_program = "Terminator"			!
	!
	!0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890
	!1         2         3         4         5         6         7         8         9         0         1         2         3
	!=========================================================================================================================
	! Title  : Terminator_xxx.bas (a.k.a. Watchdog_xxx.bas)
	! Model  : Cyberdyne Systems Model 101 (T-800 series)
	! Author : Neil S. Rieck
	! Purpose: 1) log out idle terminals
	!	   2) kill "run away" processes (tell everyone on the grid "TRON LIVES")
	! Target : VMS 4.5 (and up)
	!=========================================================================================================================
	! History:
	!
	! Ver Who When   What
	! --- --- ------ ---------------------------------------------------------------------------------------------------------
	! 30  NSR 900926 1. Total rewrite (optimized & shortened)
	!                2. the process stats array is now dimensioned at run time by the value of MaxProcessCnt which was was
	!		    returned by $GetSYI
	!                3. $GetSYI is used to find the processor type so we can adjust the minimum CPU time required when
	!		    considering a process active
	! 31  NSR 901019 1. added code to send messages to the MASTER_PID when dealing with a sub-proc
	!     NSR 910121 2. warning at 15 mins and logout at 20 minutes
	! 32  NSR 921117 1. warning at 25 mins and logout at 30 minutes
	!		 2. changed watchdog messages
	!     NSR 921120 1. warning at 20 + 40 minutes, logout at 60 minutes
	! 33  NSR 921121 1. changed the name of this program to TERMINATOR and made the messages Arnold'esk
	!		 2. now give warnings at 15, 30, + 45 minutes
	! 34  NSR 930424 1. now give warnings at 30 + 60 minutes
	!		 2. now kick them off at 90 minutes
	!     NSR 930706 3. now warn at 75 minutes only (kick at 90)
	!     NSR 931222 4. now don't log out the console (so I can test ARDIS SERVER)
	!     NSR 940104 5. now warn at 30 mins and kick off at 45 mins
	! 35  NSR 940129 1. now can log out the console again
	!		 2. changed the warning message to something less annoying (will print on the 24th line of a terminal)
	!     NSR 940301 3. now warn at 45 mins and kick off at 60 mins
	!     NSR 940308 4. give people in account PCPCSM (P.C. Partners) extra time
	!     NSR 940322 5. added 'PRINT RC' to logfile before LIB$STOP
	!		 6. added a test for SS$_SUSPENDED to GetJpi
	! 36  NSR 940420 1. now warn at 60 mins and kick off at 75 mins
	!		 2. added code to detect user's that have consumed more that 1 hour of cpu time
	! 37  NSR 941117 1. changed warn array from byte to long
	!		 2. changed the name 'Watchdog' to 'Terminator' in the messages sent to terminals
	!     NSR 960723 3. modified the logging code that detects when a user has consumed too much CPU time
	!		 4. added code to support DECWindows devices under VMS 5.5-2
	! 38  NSR 960815 1. added code to properly handle DECwindows devices under Motif 1.2-3 (using $GetDvi)
	!	  960826 2. added code to get the image name so we can deal with certain DECwindows tasks
	!	  960906 3. added code to leave DECwindows task alone during the day
	! 39  NSR 980304 1. added code to change the sleep time from system logical: CSMIS$WATCHDOG_SLEEP_MINS
	! 40  AGD 980609 1. Replaced wcsm$src with csmis$inc
	! 41  NSR 990207 1. noticed that some users can evade the watchdog by running WorkDB via spawn from TekWar (this blocks
	!		    our nightly RMS file tuning).
	!		 2. changed MinBIO from 8 to 24 ("$ sho proc/acc" will generate 17 on any size VAX)
	!		 3. changed MinCPU from 4 {40 mS} to 10 {100 mS} ("$ sho proc/acc" will use ~ 12 mS on a VAX-3800)
	! 42  NSR 000208 1. renovated for use with sys$library starlet
	!     NSR 000510 2. now user "SYSTEM" is never bothered (we could have system problems)
	!		 3. now device "OPA0:" is never bothered (we could have system problems)
	!     NSR 000824 4. added come debugging code to find out why certain tasks are being killed by the watch dog
	!     NSR 000825 5. now DECwindows tasks without a terminal are never stopped (Font Server)
	!     NSR 010622 6. alpha renovation
	!     NSR 011001 7. now NEIL, STEVE, and AL are no longer excluded from consideration
	! 43  NSR 081103 1. documentation changes
	! 44  NSR 120302 1. added code to kill runaway Apache processes (happens once every few weeks)
	! 45  NSR 120303 1. mini-cleanup
	!		 2. now terminate users who consume more than 30 minutes of CPU time (previously was 60 minutes)
	!     NSR 120305 3. a few tweaks
	! 46  NSR 120907 1. added code to kill runaway TCPware processes (happens once every few months)
	! 47  NSR 120907 1. renovated to do away with "[.inc]vms_structures.inc"
	!		 2. introduced some optimizations (why request return lengths then never use them?)		bf_47.2
	!		 3. a few more optimizations
	! 48  NSR 130327 1. now need to watchdog some user procs without an attached terminal (like "SSHD 9999A")	bf_48.1
	!		 2. increased maxium limit for Apache child processes						bf_48.2
	!		 3. increased maxium limit for Apache worker processes						bf_48.3
	!     NSR 130328 4. more tweaks to the log file
	! 49  NSR 130329 1. more changes to SSHD section after a good night's sleep :-)					bf_49.1
	!		 2. increased maxium limit for Apache child processes (again)					bf_49.2
	!		 3. restored maxium limit for Apache worker processes						bf_49.3
	!		 4. changes to logging logic
	!		 5. increased MinCPU because idle "SSHD PTD" always seem to consume enough CPU resources
	!		 6. increased MinBIO
	! 50  NSR 130330 1. changes to logging logic
	!=========================================================================================================================
	! Notes:
	!
	! 1. I have employed multiple RETURNS in subroutines so that this program will consume the fewest possible resources on
	!	my over-worked VAX-11/730. It makes the logic a little hard to follow in some places. Sorry.
	! 2. DCL commands used to COMPILE & LINK:
	!	a. BASIC	watchdog_47.bas		(yields: watchdog_30.obj)
	!	b. LINK		watchdog_47.obj		(yields: watchdog_30.exe)
	! 3. DCL commands used to RUN:
	!	a. EDIT  WATCHDOG.COM	then insert the following:
	!		$ run directory:WatchDog_47				-
	!	                /process_name = "Watch_Dog"			-
	!	                /UIC=[1,4]					-
	!	                /noswap						-
	!	                /priv   = (world,oper)				-
	!	                /input  = nl:					-
	!	                /ERROR	= csmis$log:watchdog.err		-
	!	                /output = csmis$log:watchdog.out		-
	!	                /prior  = 2
	!		$ exit
	!	b. @WatchDog
	!	c. place '@ directory:WatchDog.com' in SYS$MANAGER:SYSTARTUP.COM so watch dog is run every boot
	!=========================================================================================================================
	OPTION type = explicit							! cuz tricks are for kids
	OPTION size = (real double)						! force of habit
	on error goto common_trap						! old school trapping
	!
	!	OpenVMS system related stuff
	!
	%include "starlet"      %from %library "sys$library:basic$starlet"	! system services
	%include "$ssdef"       %from %library "sys$library:basic$starlet"	! ss$
	%include "$jpidef"      %from %library "sys$library:basic$starlet"	! jpi$
	%include "$syidef"      %from %library "sys$library:basic$starlet"	! syi$
	%include "$brkdef"      %from %library "sys$library:basic$starlet"      ! brk$
	%include "lib$routines" %from %library "sys$library:basic$starlet"	! lib$
	%include "$dvidef"      %from %library "sys$library:basic$starlet"	! dvi$
	%include "$iledef"      %from %library "sys$library:basic$starlet"      ! ile3$ (Item List Entry 3 structures)
!~~~	%include "$iosbdef"     %from %library "sys$library:basic$starlet"	x iosb$ (iosb structures)
	!
	!	I need the following iosb to get around a limitation found in the BASIC version of starlet
	!
	!	question : How did I know?
	!	answer   : Hacking
	!	reference: https://neilrieck.net/docs/openvms_notes_hacking_starlet.html
	!
	record my_iosb								!
	    variant								!
		case								! vanilla
		    group one							!
			word		iosb$w_status				!
			word		iosb$w_bcnt				!
			long		iosb$l_dev_depend			!
		    end group							!
		case								! used in sys$getqui
		    group two							!
			long		iosb$l_getxxi_status			!
			long		iosb$l_reserved				!
		    end group							!
		case								! used to satisfy the compiler
		    group three							!
			basic$quadword	iosb$quad				! unsigned quad word (system calls)
		    end group							!
	    end variant								!
	end record								!
	!
	!
	%include "[.inc]device_controls.inc"					! vt escape sequences
	!
	external string function WCSM_dt_stamp					! date-time stamp (ccyymmddhhmmss)
	!
	external string function WCSM_trnlnm(string, string)			! translate logical name
	!
	declare long		rc					,	! Return Code (system status)	&
				MinCPU					,	! minimum cpu time treshold	&
				MinBIO					,	! minimum BIO treshold		&
				log_level%				,	!				&
				sleep_minutes%				,	!				&
				temp%					,	!				&
				web%					,	!				&
				day%					,	!				&
		string		junk$					,	!				&
				prefix$					,	!				&
				snap$						! date-time snap shot
	!
	declare	my_iosb	IosbJpi							! IO Status block (for GetJpiW)
	declare	my_iosb	IosbBrk							! IO Status block (for BrkThru)
	declare	my_iosb	IosbSyi							! IO Status block (for GetSyiW)
	!
	!-----------------------------------------------------------------------
	!	declare new data type called: SyiRec
	!-----------------------------------------------------------------------
	record SyiRec								! structure of SYI Record
	    ile3	ItemVar(1)						! 0 -> 1 items
			long list_term						! mark end-of-list
	end record SyiRec							!
	!
	declare	SyiRec	SyiBuf							! Now declare a variable using it
	!
	!	storage for data returned by GetSYI
	!
	declare	long	CPU_Type					,	! CPU Identification number		&
			MaxProcessCnt					,	! Maximum Process Count (from SysGen)	&
			Master_PID						! Master PID of sub process
	!-----------------------------------------------------------------------
	!	Make SyiBuf Entries
	!-----------------------------------------------------------------------
	SyiBuf::ItemVar(0)::ile3$w_length	= 4				! long (4 bytes)
	SyiBuf::ItemVar(0)::ile3$w_code		= SYI$_CPU			! Process ID #
	SyiBuf::ItemVar(0)::ile3$ps_bufaddr	= LOC(CPU_Type)			!
	SyiBuf::ItemVar(0)::ile3$ps_retlen_addr	= 0				! 0=don't record byte count		bf_47.2
	!
	SyiBuf::ItemVar(1)::ile3$w_length	= 4				! long
	SyiBuf::ItemVar(1)::ile3$w_code		= SYI$_MaxProcessCnt		! max process count
	SyiBuf::ItemVar(1)::ile3$ps_bufaddr	= LOC(MaxProcessCnt)		!
	SyiBuf::ItemVar(1)::ile3$ps_retlen_addr	= 0				! 0=don't record byte count		bf_47.2
	!
	SyiBuf::LIST_TERM			= SYI$C_ListEnd			! mark end
	!
	!	/// DVI Setup ///
	!
	!	declare a new data type called: DviRec
	!
	record DviRec								! structure of Dvi Record
	    ile3 ItemVar(0)							! 0 -> 0 items
	    long list_term							! mark end-of-list
	end record DviRec							!
	!
	declare	DviRec	DviBuf							! Now declare a variable using it
	!
	!-----------------------------------------------------------------------
	!	declare new data type called: JpiRec
	!-----------------------------------------------------------------------
	record JpiRec								! structure of JPI Record
	    ile3 ItemVar( 12 )							! 0 -> 12 items
	    long list_term							! mark end-of-list
	end record JpiRec							!
	!
	declare	JpiRec	JpiBuf							! Now declare a variable using it
	!
	!    Storage for info returned by GETJPI
	!
	!    Note: variables with an '_RtnLn' suffix are 'Return Length' variables.
	!	   After a call, VMS places a count of the number of bytes returned in each '_RtnLn' variable
	!
	Declare	long		PID					,	! process PID			&
				NewCPU					,	! recorded CPU time		&
				NewBIO					,	! recorded I/O count		&
				Subcount				,	! recorded # of subprocesses	&
				PrcIdx						! proc index in VMS table
	MAP(Jpi)string	UserName	= 12				,	! user name			&
		long	UserName_RtnLn					,	!	length returned		&
		string	ProcName	= 15				,	! process name			&
		long	ProcName_RtnLn					,	!				&
		string	TTY_Name	= 10				,	! TTY port name (_LTA9999:)	&
		long	TTY_Name_RtnLn					,	!	length returned		&
		string	Account		= 12				,	! user's account		&
		long	Account_RtnLn					, 	!	length returned		&
		string	ImagName	= 255				,	! Image Name			&
		long	ImagName_RtnLn					,	!	length returned		&
		long	grp_buffer					,	! group (decimal)		&
		long	mem_buffer						! member (decimal)
	!
	!-----------------------------------------------------------------------
	!	Make JpiBuf Entries
	!-----------------------------------------------------------------------
	JpiBuf::ItemVar(0)::ile3$w_length	= 4				! long (4 bytes)
	JpiBuf::ItemVar(0)::ile3$w_code		= JPI$_PID			! Process ID #
	JpiBuf::ItemVar(0)::ile3$ps_bufaddr	= LOC( PID		)	! location of PID
	JpiBuf::ItemVar(0)::ile3$ps_retlen_addr	= 0				! 0=don't record byte count		bf_47.2
	!
	JpiBuf::ItemVar(1)::ile3$w_length	= 4				! long
	JpiBuf::ItemVar(1)::ile3$w_code		= JPI$_CpuTim			! CPU time used (in 10 mS ticks)
	JpiBuf::ItemVar(1)::ile3$ps_bufaddr	= LOC( NewCPU		)	!
	JpiBuf::ItemVar(1)::ile3$ps_retlen_addr	= 0				! 0=don't record byte count		bf_47.2
	!
	JpiBuf::ItemVar(2)::ile3$w_length	= 4				! long
	JpiBuf::ItemVar(2)::ile3$w_code		= JPI$_BufIo			! Buffered IO count
	JpiBuf::ItemVar(2)::ile3$ps_bufaddr	= LOC( NewBIO		)	!
	JpiBuf::ItemVar(2)::ile3$ps_retlen_addr	= 0				! 0=don't record byte count		bf_47.2
	!
	JpiBuf::ItemVar(3)::ile3$w_length	= 12				! size of UserName
	JpiBuf::ItemVar(3)::ile3$w_code		= JPI$_UserName			! Username
	JpiBuf::ItemVar(3)::ile3$ps_bufaddr	= LOC( UserName		)	!
	JpiBuf::ItemVar(3)::ile3$ps_retlen_addr	= LOC( UserName_RtnLn	)	!
	!
	JpiBuf::ItemVar(4)::ile3$w_length	= 10				! size of TTY_Name
	JpiBuf::ItemVar(4)::ile3$w_code		= JPI$_Terminal			! terminal name
	JpiBuf::ItemVar(4)::ile3$ps_bufaddr	= LOC( TTY_Name		)	!
	JpiBuf::ItemVar(4)::ile3$ps_retlen_addr	= LOC( TTY_Name_RtnLn	)	!
	!
	JpiBuf::ItemVar(5)::ile3$w_length	= 4				! long
	JpiBuf::ItemVar(5)::ile3$w_code		= JPI$_Proc_Index 		! proc_index
	JpiBuf::ItemVar(5)::ile3$ps_bufaddr	= LOC( PrcIdx		)	!
	JpiBuf::ItemVar(5)::ile3$ps_retlen_addr	= 0				! 0=don't record byte count		bf_47.2
	!
	JpiBuf::ItemVar(6)::ile3$w_length	= 4		  		! long
	JpiBuf::ItemVar(6)::ile3$w_code		= JPI$_PrcCnt     		! # of subprocs
	JpiBuf::ItemVar(6)::ile3$ps_bufaddr	= LOC( Subcount)		!
	JpiBuf::ItemVar(6)::ile3$ps_retlen_addr	= 0				! 0=don't record byte count		bf_47.2
	!
	JpiBuf::ItemVar(7)::ile3$w_length	= 4		  		! long
	JpiBuf::ItemVar(7)::ile3$w_code		= JPI$_Master_PID		!
	JpiBuf::ItemVar(7)::ile3$ps_bufaddr	= LOC( Master_PID	)	! master PID
	JpiBuf::ItemVar(7)::ile3$ps_retlen_addr	= 0				! 0=don't record byte count		bf_47.2
	!
	JpiBuf::ItemVar(8)::ile3$w_length	= 12		  		! long
	JpiBuf::ItemVar(8)::ile3$w_code		= JPI$_Account			!
	JpiBuf::ItemVar(8)::ile3$ps_bufaddr	= LOC( Account		)	! user's account
	JpiBuf::ItemVar(8)::ile3$ps_retlen_addr	= LOC( Account_RtnLn	)	!
	!
	JpiBuf::ItemVar(9)::ile3$w_length	= 255		  		! string
	JpiBuf::ItemVar(9)::ile3$w_code		= JPI$_ImagName			!
	JpiBuf::ItemVar(9)::ile3$ps_bufaddr	= LOC( ImagName		)	! image name
	JpiBuf::ItemVar(9)::ile3$ps_retlen_addr	= LOC( ImagName_RtnLn	)	!
	!
	JpiBuf::ItemVar(10)::ile3$w_length	= 4		  		! long
	JpiBuf::ItemVar(10)::ile3$w_code	= JPI$_Grp			!
	JpiBuf::ItemVar(10)::ile3$ps_bufaddr	= loc(grp_buffer	)	! group
	JpiBuf::ItemVar(10)::ile3$ps_retlen_addr= 0				! 0=don't record byte count		bf_47.2
	!
	JpiBuf::ItemVar(11)::ile3$w_length	= 4		  		! long
	JpiBuf::ItemVar(11)::ile3$w_code	= JPI$_Mem			!
	JpiBuf::ItemVar(11)::ile3$ps_bufaddr	= loc(mem_buffer	)	! member
	JpiBuf::ItemVar(11)::ile3$ps_retlen_addr= 0				! 0=don't record byte count		bf_47.2
	!
	JpiBuf::ItemVar(12)::ile3$w_length	= 15				! size of ProcName
	JpiBuf::ItemVar(12)::ile3$w_code	= JPI$_PrcNam			! Process name
	JpiBuf::ItemVar(12)::ile3$ps_bufaddr	= LOC( ProcName		)	!
	JpiBuf::ItemVar(12)::ile3$ps_retlen_addr= LOC( ProcName_RtnLn	)	!
	!
	JpiBuf::LIST_TERM			= JPI$C_ListEnd			! end of list
	!-----------------------------------------------------------------------
	! Declare a new data type called JpiRec2
	! (for use with sub procs)
	!-----------------------------------------------------------------------
	record JpiRec2								! structure of JPI Record
	    ile3 ItemVar							! 0 -> 1 items
	    long list_term							! mark end-of-list
	end record JpiRec2							!
	!
	declare	JpiRec2	JpiBuf2							! Now declare a variable using it
	!
	JpiBuf2::ItemVar::ile3$w_length		= 10				! size of TTY_Name
	JpiBuf2::ItemVar::ile3$w_code		= JPI$_Terminal			! terminal name
	JpiBuf2::ItemVar::ile3$ps_bufaddr	= LOC(TTY_Name)			!
	JpiBuf2::ItemVar::ile3$ps_retlen_addr	= LOC(TTY_Name_RtnLn)		!
	!
	JpiBuf2::LIST_TERM			= JPI$C_ListEnd			! end of list
	!-----------------------------------------------------------------------
	!	Misc Declarations
	!-----------------------------------------------------------------------
	Declare String		Message$				,	! string for breakthru	&
				trailer$				,	! etc...		&
		Long		SeedPID					,	!			&
				desired_action%					! 3 = give him the boot
	! 2 = give him the boot
	! 1 = only warn him
	! 0 = skip
	!=======================================================================
	!	Initialize
	!=======================================================================
	init:
2000	margin #0, 132								! in case logging is turned on
	print k_program +"_"+ k_version						!
	print string$(len(k_program +"_"+ k_version), asc("="))			! what will the optimizer do with this?
	snap$ = wcsm_dt_stamp							!
	print "-i-Starting at: "+ left$(snap$,8) +"."+ right$(snap$,9)		! ccyymmdd.hhmmss
	print "-i-reminder: group + member values are displayed in decimal rather than octal"
	!
	!	Use GetSYI to find out...
	!		1. processor type
	!		2. MaxProcessCnt (from SYSGEN)
	!
	rc = sys$getsyiw (,,, SyiBuf by ref, IosbSyi::iosb$quad by ref,,)	! get system info
	if rc <> SS$_Normal then						!
	    print "-e-GetSyi: "+ str$( rc )					!
	    sleep 1								!
	    call lib$Stop (rc by value)						! kill self (seems rather extreme)
	end if									!
	!
!~	select CPU_type								x
!~	    case pr$_sid_typ730						,	x 0.3 MIPS CPUs	(VAX730)	&
!~		 pr$_sid_typ750						,	x 0.6 MIPS CPUs	(VAX750)	&
!~		 pr$_sid_typ780						 	x 1.0 MIPS CPUs (VAX780)
!~		    MinCpu = 10							x 10 x 10 = 100 mS
!~		    MinBIO = 24							x
!~	    case else								x Larger MIPS System
		MinCpu = 20							! 20 x 10 = 200 mS
		MinBIO = 50							!
!~	end select								x
	!
	DIM  LONG  CpuTim (MaxProcessCnt)					! cpu time buffer
	DIM  LONG  Bufioc (MaxProcessCnt)					! buffered i/o count buffer
	DIM  LONG  PID	  (MaxProcessCnt)					! PID buffer
	DIM  LONG  Warn	  (MaxProcessCnt)					! warning count buffer
	!
	! mat CpuTim	= zer
	! mat BufIoc	= zer
	! mat PID	= zer
	! mat Warn	= zer
	!
	!=======================================================================
	!	MAIN
	!=======================================================================
	main:
	!
	log_level%	= -1							! this will force an entry to the log file
	sleep_minutes%	= 0							! this will force an entry to the log file
3000	while 1									! do this forever
	    snap$ = wcsm_dt_stamp						! get snapshot of current time
	    !-------------------------------------------------------------------
	    !	<<< support logging >>>
	    !
	    ! 0 = most logging off (except program errors + warnings)
	    !     note: all process terminations are warnings
	    ! 1 = more info
	    ! 2 = even more info (including: DECwindows)
	    ! 3 = even more info (inclduing: NEIL, STEVE, DAVE)
	    ! 4 = log everything
	    !-------------------------------------------------------------------
	    when error in							!
		junk$ = WCSM_TrnLnm ("CSMIS$WATCHDOG_LOG","LNM$SYSTEM_TABLE")	!
		temp% = integer(junk$)						!
		temp% = 3	if temp% > 3					!
	    use									!
		print "-e-logical CSMIS$WATCHDOG_LOG not numeric: "+ junk$	!
		temp% = 0							! no logging
	    end when								!
	    select temp%							!
		case < 0							!
		    temp% = 0							!
		case > 5							!
		    temp% = 5							!
	    end select								!
	    if log_level% <> temp% then						! if changing (since last pass thru)
		log_level% = temp%						!
		print "-i-Logging level set to "+ str$(log_level%) +" at "+ left$(snap$,8) +"."+ right$(snap$,9)
	    end if								!
	    !-------------------------------------------------------------------
	    !	<<< support sleep >>>
	    !
	    !	caveats:
	    !	1) do not change from 5 minutes without checking code below. Search for ">= 18"
	    !	2) run this program interactively then hit <enter> 18 times (once per second) to log everyone out
	    !-------------------------------------------------------------------
	    when error in							!
		junk$ = WCSM_TrnLnm ("CSMIS$WATCHDOG_SLEEP_MINS","LNM$SYSTEM_TABLE")
		temp% = integer(junk$)						!
		select temp%							!
		    case    < 1, > 5 						!
			temp% = 5						! default to 5 minutes
		end select							!
	    use									!
		print "-e-logical CSMIS$WATCHDOG_SLEEP_MINS not numeric: "+ junk$
		temp% = 5							! default to 5 minutes
	    end when								!
	    if sleep_minutes% <> temp% then					! if changing
		sleep_minutes% = temp%						!
		print "-i-Sleep_Minutes now set to "+ str$(sleep_minutes%) +" at "+ left$(snap$,8) +"."+ right$(snap$,9)
	    end if								!
	    !
	    select mid$(snap$,9,2)						! check the current hour		bf_48.1
		case "06" to "22"						!
		    day% = 1							!
		case else							!
		    day% = 0							!
	    end select								!
	    !===================================================================
	    !	<<< original watchdog code starts here >>>
	    !===================================================================
	    SeedPID	= -1							! Do a wildcard GETJPI
	    rc		=  0							! clear previous status
	    !
	    while rc <> SS$_NoMoreProc						! -------------------------------------------------
		rc = sys$getjpiw(	,					&
		    SeedPID		by ref,,				&
		    JpiBuf		by ref,					&
		    IosbJpi::iosb$quad	by ref,,)				! get process info
		Select rc							!
		    case SS$_Normal						! the call worked so fall thru
		    case SS$_suspended,SS$_NoPriv,SS$_NonExpr,SS$_NoMoreProc	! nothing to kill
			iterate							!
		    case else							!
			print "-i-Seed  : "+ str$( SeedPID	)		! print seed	(for debug)
			print "-e-GetJpi: "+ str$( rc		)		! print rc	(for debug)
			sleep 1							!
			call LIB$Stop(rc by value)				! oops, better kill self
		end select							!
		!
		!	this stub is just for debugging/hacking/testing etc.
		!
		select log_level%						!
		    case >= 4							! log everything
			prefix$ = "-i-debug-L4>"				!
			gosub display_proc_info					!
			goto scan_continue					!
		    case >= 3							! log selective
			select edit$(left$(UserName,UserName_RtnLn), 32+2)	!
			    case "NEIL","STEVE","DAVE"				! log developers
				prefix$ = "-i-debug-L3>"			!
				gosub display_proc_info				!
				goto scan_continue				!
			end select						!
		end select							!
		!
		if log_level% >=2 then						!
		    if pos(left$(ImagName,ImagName_RtnLn),"DECW$",1)>0 then	! log DECWindows stuff
			prefix$ = "-i-debug-L2>"				!
			gosub display_proc_info					!
		    end if							!
		end if								!
		scan_continue:							!
		!
		!===============================================================
		! This block of code deals with certain Apache runaway problems (uses too much CPU time)
		! 1) I just tuned Apache so that child processes do not live forever even though keepalives are enabled
		!	KeepAlive On
		!	MaxKeepAliveRequests 99
		!	KeepAliveTimeout 120
		!	MaxRequestsPerChild 999
		!    This means that APACHE$SWS should be killing them gracefully before watchdog does
		! 2) "I think" encrpyted connections consume more CPU time than we previously thought
		! 3) Apache worker processes only run once then exit
		! 4) Apache worker processes running Dave's powernode stuff "may" require more CPU time
		! caveat: 60 seconds of CPU time is really a really long time
		!===============================================================
		web%		= 0						! init to not-web-mode
		junk$ = edit$(left$(ProcName,ProcName_RtnLn),32+2)		! upcase, no white space
		iterate if junk$ = "APACHE$SWS"					! never touch the master Apache process
		!
		if pos(junk$,"APACHE$SWS",1)>0 then				! eg. APACHE$SWS0009 (child process)
		    web% = 999							! maximum limit = 999 CPU seconds	bf_49.2
		    goto check_this_proc					!
		end if								!
		if pos(junk$,"APACHE$WW_",1)>0 then				! eg. APACHE$WW_62712 (worker process)
		    web% = 60							! maximum limit = 60 CPU seconds	bf_49.3
		    goto check_this_proc					!
		end if								!
		!---------------------------------------------------------------
		! This is special code which deals with certain TCPware runaway problems.
		! No warning will be sent to the offender, we will just terminate it.
		!---------------------------------------------------------------
		iterate if junk$ = "SSHDMASTER"					! never touch the master SSHD process
		!---------------------------------------------------------------
		!OpenVMS V8.4  on node KAWC15   27-MAR-2013 11:22:06.82   Uptime  181 13:46:58
		!  Pid    Process Name    State  Pri      I/O       CPU       Page flts  Pages
		!00144060 SSHD 0272       LEF      6    14158   0 00:00:01.17       832    798   <<< SYSTEM (for specified USER)
		!000B3282 SSHD 0272A PTD  LEF      4     3146   0 00:00:00.67      2539    607   <<< USER=AN_DHALLA
		!001136B0 SSHD 0272B PTD  LEF      8     4922   0 00:00:00.78      1144    798   <<< USER=AN_DHALLA
		!0012624F SSHD 0272C PTD  LEF      9     6270   0 00:00:01.08      1683    759   <<< USER=AN_DHALLA
		!---------------------------------------------------------------
		if pos(junk$,"SSHD",1)>0 then					! eg. SSHD 0123 (client-related process)
		    if pos(junk$,"PTD",1)=0 then				! if this is the SYSTEM proc (no PTD)
			if day% = 1 then					! if day time				bf_48.1
			    web% = 600						! harder to kill during the day
			else							! else night time
			    web% = 300						! allow easier kill at night
			end if							!
			goto check_this_proc					!
		    end if							!
		end if								!

		!===============================================================
		! 1. if no tty, this could be a detached system task so skip
		! 2. if no tty, this could be a detached DECwindows task (like session manager) so continue
		! 3. if no tty, but master_pid <> pid (this is a subprocess), then send a message to the parent task
		!===============================================================
		if TTY_Name_RtnLn=0 then					! if no terminal ----------------------------------
		    !
!~~~		    iterate if master_PID=0					x if a system proc (VMS 4.5)
		    !
		    !	ignore SYSTEM		[1,4] .. [1,10]
		    !	ignore CUSTODIAN	[346,6]
		    !
		    select grp_buffer						! test group
			case    <= 7						! if SYSTEM (group: 1->7)
			    print "-i-skipping tests of SYSTEM process"	if log_level% >= 4
			    iterate						!
			case    ((3 * 64) + (4 * 8) + 6)			! if CUSTODIAN grp: 346 (octal:346 = decimal:230)
			    if mem_buffer = 6 then				! then ignore member 6
				print "-i-skipping tests of CUSTODIAN process" if log_level% >= 4
				iterate						!
			    end if						!
		    end select							!
		    !
		    iterate if pid = master_PID					! if not a sub proc
		    !
		    !	This is a subprocess so do another GetJPI to find out which terminal the parent process is using
		    !
		    rc = sys$getjpiw (	,Master_PID		by ref,,	&
					JpiBuf2			by ref,		&
					IosbJpi::iosb$quad	by ref,,	)
		    iterate if TTY_Name_RtnLn=0					! if still no terminal (then not a user)
		end if								! -------------------------------------------------
		!
		!	We've found a user, so let's see if he's busy
		!
		check_this_proc:
		gosub Check_User						! gather stats (etc.)
	    next								! Go back for next process ------------------------
	    !
	    sleep sleep_minutes% * 60						! Hibernate
	    snap$ = wcsm_dt_stamp						! ccyymmddhhmmss
	    print "-i-wake: "+ left$(snap$,8) +"."+ right$(snap$,9)	if log_level% > 0
	next									! Check whole system again ------------------------
	!
	!=======================================================================
	!	Check If User Has Been Active Since Last Pass
	!=======================================================================
4000	CHECK_USER:
	!
	! Program Logic Notes:
	!
	! 1. When this program ran on a small system with DZ11's, we used a small array to record process info for each device.
	!    The device name was used to index the array so that the index for OPA0: was 0, for TTA0: was 1, for TTB0: was 9,
	!    for TTC0: was 17, etc.
	! 2. This method was unusable when we went to terminal servers because the terminal server numbers are dynamic and keep
	!    cycling between LTA1: LTA9999: (even if you don't have 10,000 servers!!) and we didn't want 10,000 element arrays.
	! 3. Now we use JPI$_Proc_Index (from SYS$GetJpiW) to generate a unique number in the range of 1 -> MaxProcessCnt
	!    (sysgen parameter).
	!

	goto transcribe_n_exit2	if PID(PrcIdx) <> PID				! transcribe	only if new user in this slot...
	goto transcribe_n_exit2	if Subcount > 0					! Transcribe	only if sub procs exist...
	!
	!	note: if Apache is started by user=SYSTEM at boot, then we must test CPU usage here because
	!		user=SYSTEM escapes termination tests approximately 40 lines below
	!
	if web% > 0 then							! if a web process...
	    select NewCPU							! time in 10 mS ticks
		case    >= 100 * web%						! remember that web% is in seconds
		    prefix$ = "-w-Web-9>"					!
		    gosub display_proc_info					! display info no matter what the logging level
		    print "-w-CPU time: "+ str$(NewCPU) +" (10 mS ticks)"	! time in 10 mS ticks
		    print "-w-        : "+ str$(web%) +" (secs)"		!
		    rc = sys$delprc(PID by ref,)				! then give him the boot
		    select rc							!
			case    SS$_Normal					!
			    print "-w-web process terminated"			!
			case    SS$_NonExpr					! just logged out ???
			case else						!
			    print "-e-DelPrc: "+ str$(rc)			! could not kill proc
			    sleep 1						! never go too fast
			    call LIB$Stop(rc by value)				! kill self (this is rather extreme)
		    end select							!
		    goto skip_transcribe_exit					! cuz nothing to transcribe
		case    >= 100 * web% / 2					! remember that web% is in seconds
		    if log_level% >= 1 then					!
			prefix$ = "-i-Web-8>"					!
			gosub display_proc_info					!
			print "-i-above half CPU limit"				!
		    end if							!
		case    else							!
		    if log_level% >= 2 then					!
			prefix$ = "-i-Web-7>"					!
			gosub display_proc_info					!
			print "-i-below half CPU limit"				!
		    end if							!
	    end select 								!
	    goto transcribe_n_exit2						!
	end if									!
	!
	!	make sure certain processes never consume more than 59 minutes of CPU time
	!
	select	edit$(left$(UserName,UserName_RtnLn),32+2)			!
	    case "SYSTEM"							!
		if log_level% >= 2 then						!
		    prefix$ = "-i-sys-1>"					!
		    gosub display_proc_info					!
		    print "-i-skipping further tests on this process (grp)"	!
		end if								!
		goto transcribe_n_exit2						! don't ever kill SYSTEM processes (exit now)
	    case "NEIL","STEVE","DAVE"						! don't test max CPU time of these people
		if log_level% >= 1 then						!
		    prefix$ = "-i-usr-1>"					!
		    gosub display_proc_info					!
		    print "-i-skipping max cpu tests on this process (usr)"	!
		end if								!
	    case else								!
		select NewCPU							! let's see how much CPU time was used
		    case    >=  100 * 60 * 59					! if >= 59 minutes of cpu time
			snap$ = wcsm_dt_stamp					!
			junk$ = edit$(left$(TTY_Name,TTY_Name_RtnLn),32+2)	!
			junk$ = "NONE"	if edit$(junk$,4+2) = ""		! could be web process
			print "-e-process: "+ edit$(left$(UserName,UserName_RtnLn),32+2)	+&
			    " on term "+ junk$							+&
			    " has used too much CPU time: "+ left$(snap$,8) +"."+ right$(snap$,9)
			Warn(PrcIdx) = 999					! put peg count over the limit
			goto inactive_user					! blow him away
		    case else							!
			if log_level% >= 1 then					!
			    prefix$ = "-i-usr-9>"				!
			    gosub display_proc_info				!
			    print "-i-max cpu tests passed"			!
			end if							!
		end select							!
	end select								!
	!
	print "-i-bio stats: idx=";PrcIdx;" prev=";Bufioc(PrcIdx);" curr=";NewBIO	if log_level% >= 2
	goto transcribe_n_exit	if ((Bufioc(PrcIdx) + MinBIO) <= NewBIO	)	! if enough BIOs
	!
	print "-i-cpu stats: idx=";PrcIdx;" prev=";CpuTim(PrcIdx);" curr=";NewCpu	if log_level% >= 2
	goto transcribe_n_exit	if ((Cputim(PrcIdx) + MinCpu) <= NewCPU	)	! if enough CPU
	!
	select edit$(left$(TTY_Name,TTY_Name_RtnLn),32+2)			!
	    case    "OPA0:"							! ignore the console device
		print "-i- skipping termination on device OPA0"			!
		goto transcribe_n_exit						!
	end select								!
	!
	if pos(left$(ImagName,ImagName_RtnLn),"DECW$",1%)>0 then		! if this is a DECWindows task...
	    select mid$(wcsm_dt_stamp,9,4)					! get current 24 hour time (2359)
		case    < "0800", > "1700"					! if not busness hours, then test
		    goto inactive_user						!
		case else							! else ignore during busness hours
		    goto transcribe_n_exit					!
	    end select								!
	end if									!
	!-----------------------------------------------------------------------
	!    This User Is Inactive (since the previous pass 5 minutes ago), so...
	!	1. update the warning count (he will get a least 15)
	!	2. warn him if necessary
	!	3. blow him away if we've warned him too often
	!	4. do not transcribe (maybe he will eventually build up enough to trigger a transcribe)
	!-----------------------------------------------------------------------
	inactive_user:								!
	Warn(PrcIdx) = Warn(PrcIdx) + 1 					! increment warning count
	print "-i-warning incremented to "+str$(Warn(PrcIdx))	if log_level% >= 2
	gosub Warn_User								! warn the user (also sets desired_action%)
	if desired_action% >= 2 then						! if desired_action% >= 2 (then terminate him)
	    prefix$ = "-w-debug-U9>"						!
	    gosub display_proc_info	if log_level% >= 1			!
	    rc = sys$delprc(PID by ref,)					! you are terminated
	    select rc								! how did it go?
		case    SS$_Normal						!
		    print "-w-user process terminated"				!
		case    SS$_NonExpr						! just logged out ???
!~~~		case	SS$_NoSuchDev						x just logged out ???
		case else							!
		    print "-e-DelPrc: "+ str$(rc)				!
		    sleep 1							! never go too fast
		    call LIB$Stop(rc by value)					! kill watch dog
	    end select								!
	end if									!
	goto skip_transcribe_exit						! do not transcribe
	!-----------------------------------------------------------------------
	!	<<< Transcribe Statistics >>>
	!
	!	user was active (or new) so transcribe stats, reset warn, then exit
	!-----------------------------------------------------------------------
	transcribe_n_exit:							!
	print "-i-transcribing stats (resetting warn)"	if log_level% >= 2	!
	transcribe_n_exit2:							!
	PID   (PrcIdx)	= PID							! record PID incase of new user
	Cputim(PrcIdx)	= NewCPU						! record cpu time
	Bufioc(PrcIdx)	= NewBIO						! record i/o count
	Warn  (PrcIdx)	= 0							! RESET
	skip_transcribe_exit:							!
	Return									!
	!=======================================================================
	! Build Warning Message and send to user's terminal
	!=======================================================================
5000	Warn_User:
	!
	!	notes:
	!	1. since many of our green-screen apps have their own built in watch dogs set for 60 + 75 minutes (with a
	!	   1 minute resolution), we will set our watchdog times for 80 + 90 minutes
	!	2. since we run every 'sleep_minutes%' minutes, a value of 16 yields (16 * 'sleep_minutes%') minutes
	!
	select Warn(PrcIdx)							! get warning counts
	    case    >= 999							! too much cpu time was used
		desired_action%	= 3						!	flag = warn & stop
	    case    >= 18							! 18 times (90 minutes / 5 minutes)
		desired_action%	= 2						!	flag = warn & stop
	    case    = 17							!
		desired_action% = 0						!	flag = skip
	    case    = 16							! 16 times (80 minutes / 5 minutes)
		desired_action%	= 1						!	flag = warn only
	    case else								!
		desired_action%	= 0						!	flag = skip
	end select								!
	!
	!	send a message to the user if the desired_action% <> 0
	!
	select desired_action%							!
	    case    0								! skip
		goto warn_user_exit						!
	    case    1,								! warn only	&
		    2								! warn then kill
		snap$ = wcsm_dt_stamp						!
		message$=							!		&
		    vt$SaveCursor					+	!		&
		    vt$Normal						+	!		&
		    vt$Message						+	!		&
		    bel							+	!		&
		    "Terminator Time: "					+	!		&
		    left$( snap$, 8)					+	! ccyymmdd	&
		    "."							+	!		&
		    right$( snap$, 9)					+	! hhmmss	&
		    " Note: You have been inactive for "		+	!		&
		    str$(Warn(PrcIdx)*sleep_minutes%) +" minutes"		!
	    case    3								!
		snap$ = wcsm_dt_stamp						!
		message$=						 	!		&
		    vt$SaveCursor					+	!		&
		    vt$Normal						+	!		&
		    vt$Message						+	!		&
		    bel							+	!		&
		    "Terminator Time: "					+	!		&
		    left$( snap$, 8)					+	! ccyymmdd	&
		    "."							+	!		&
		    right$( snap$, 9)					+	! hhmmss	&
		    " Note: You have used too much CPU time"			!
	end select								!
	!
	select desired_action%							!
	    case    1								!
		message$ = message$ + VT$RestoreCursor				!
	    case    2, 3							!
		message$ = message$ + cr + lf + "Process Terminated"+ cr + lf + cr + lf
	end select								!
	!
	!	experimental code for GetDvi (possible DECwindows support) Motif 1.2-3
	!
	map(dvi)	string	phyDevNam$	= 80			,	&
			long	phyDevNam_RtnLn					!
	!
	if left$(TTY_Name,TTY_Name_RtnLn) <> "" then				! if we have a terminal name
	    !
	    DviBuf::ItemVar(0)::ile3$w_length		= 80			! length code
	    DviBuf::ItemVar(0)::ile3$w_code		= Dvi$_TT_phyDevNam	! item
	    DviBuf::ItemVar(0)::ile3$ps_bufaddr		= LOC( phyDevNam$)	! addr of var
	    DviBuf::ItemVar(0)::ile3$ps_retlen_addr	= LOC( phyDevNam_RtnLn)	! addr of var
	    !
	    rc = sys$getdviw(,,left$(TTY_Name,TTY_Name_RtnLn),DviBuf by ref,,,,)!
	    print "-e-GetDvi rc: "+ str$(rc)	if (rc and 1%) <> 1%		!
	    !
	    print								!
	    print "-i-debug-U1> tty_name> "+ left$(TTY_Name,TTY_Name_RtnLn) +	&
		  " dev_name> "+ left$(phyDevNam$,phyDevNam_RtnLn) +		&
		  " time> "+ left$( snap$, 8) +"."+ right$( snap$, 9)		! ccyymmdd.hhmmss
	    prefix$ = "-i-debug-U2>"						!
	    gosub display_proc_info						!
	    !
	    if phyDevNam_RtnLn = 0 then						!
		print "-i-disconnected task (not sending warning)"		! don't send messages to disconnected procs
		goto warn_user_exit						!
	    end if								!
	end if									!
	!
	select left$( left$(TTY_Name,TTY_Name_RtnLn), 2)			!
	    case    "TX","TT"							! async controller
	    case    "LT","FT"							! lat + DECterm
	    case    "NT","RT"							! tcp/ip + decnet
	    case    "TW","PY"							! DEC Windows Device (pre VMS 6.0)
		prefix$ = "DECwind>"						!
		gosub display_proc_info	if log_level% >= 2			!
	    case    "MB"							! mail box device
		prefix$ = "MailBox>"						!
		gosub display_proc_info	if log_level% >= 2			!
		goto warn_user_exit						! jump (not a terminal)
	    case    ""								!
		prefix$ = "None   >"						!
		gosub display_proc_info	if log_level% >= 2			!
		goto warn_user_exit						! jump (nothing to send to)
	    case else								!
		prefix$ = "Unknown>"						!
		gosub display_proc_info	if log_level% >= 2			!
	end select								!
	!
	!	Send Message to Terminal
	!
	rc = sys$brkthru(				,			&
	    message$				by desc,			&
	    left$(TTY_Name,TTY_Name_RtnLn)	by desc,			&
	    BRK$C_Device			by value,			&
	    IosbBrk::iosb$quad			by ref,,,,,,	)
	select rc								!
	    case    SS$_Normal							!
	    case    SS$_NoSuchDev						! just logged out ???
	    case else								!
		print "-e-BrkThru: "+ str$(rc)					!
		sleep 1								! never go too fast
		call LIB$Stop(rc by value)					! kill watch dog (is this wise?)
	end select								!
	!
	warn_user_exit:								!
	return									!
	!=======================================================================
	!       display process related info (to log file)
	!=======================================================================
	display_proc_info:							!
	print									!
	snap$ = wcsm_dt_stamp							!
	print "-i-time: "+ left$(snap$,8) +"."+ right$(snap$,9)			! ccyymmdd.hhmmss
	print 		format$(left$(prefix$,12)			,"'LLLLLLLLLLL"		)	+&
	    " PID>"+	format$(PID					,"########"             )	+&
	    " Prc>"+	format$(left$(ProcName ,ProcName_RtnLn	)	,"'LLLLLLLLLLLLLL"      )	+&
	    " User>"+	format$(left$(username ,username_RtnLn )	,"'LLLLLLLLLLLLLL"      )	+&
	    " Acnt>"+	left$(Account  ,Account_RtnLn  )						+&
	    " Term>"+	left$(tty_name ,tty_name_RtnLn )
	print	"             Grp>"+	format$(grp_buffer		,"###"			)       +&
	    " Mem>"+	format$(mem_buffer				,"###"			)       +&
	    " Img>"+	left$(ImagName	,ImagName_RtnLn )			!
	return									!
	!=======================================================================
	! Common Trap (BASIC error handler)
	!
	! this will go to sys$output (sys$error)
	!=======================================================================
32000	common_trap:
	snap$ = wcsm_dt_stamp							!
	print								 &
	    cr + lf + "-i-Line = "+ str$(erl) 				+&
	    cr + lf + "-i-Error= "+ str$(err) 				+&
	    cr + lf + "-i-Text = "+ ert$(err) 				+&
	    cr + lf + "-i-Time = "+ left$(snap$,8) +"."+ right$(snap$,9)	!
	!
	resume fini								! fix stack + exit
	!
	!=======================================================================
	!	adios
	!=======================================================================
	Fini:
	end									!
	!########################################################################################################################
	!
32100	%include "[.fun]wcsm_dt_stamp.fun"
	! function string WCSM_DT_Stamp
	!
32110	%include "[.fun]wcsm_trnlnm.fun"
	! function string WCSM_Trnlnm
	!