Pages

Wednesday, August 22, 2018

bash and sqlplus by Example: Kill Long Running Jobs

===============================
General
===============================
Known jobs should be running for only few minutes.
However, since they are connected via DB_LINKS to several remote databases, due to networks issues connection can be stuck, and job is hanged.
The workaround - it to kill this job, and start execution again.

Following code is executed from crontab every 15 minutes, and checks for jobs which run longer than 30 minutes.

===============================
Code
===============================

less kill_long_running_jobs.sh

#!/bin/bash
. /etc/profile
. /etc/sh/orash/oracle_login.sh igt

handle_return_code() {
  ret_code=$1
  step_name=$2
  if [[ $ret_code -ne 0 ]];then
    echo $step_name Finished With Error!! Return Code $ret_code
    exit $ret_code
  fi
}

WORK_DIR=/software/oracle/oracle/scripts
LOG_DIR=${WORK_DIR}
RUN_TIME=`date +"%Y%m%d_%H%M%S"`
LOG_FILE=killed_sessions.log
TEMP_FILE=long_running_sessions.tmp

cd ${WORK_DIR}
touch ${LOG_DIR}/${LOG_FILE}
rm -f ${TEMP_FILE}

sqlplus -s /nolog <<EOF
whenever sqlerror exit SQL.SQLCODE
@./kill_long_running_jobs.sql
EOF
handle_return_code $? ./kill_long_running_jobs.sql

if [[ ! -f ${TEMP_FILE} || ! -s ${TEMP_FILE} ]]; then
  echo "$RUN_TIME   No Long Running Sessions Found" >> ${LOG_DIR}/${LOG_FILE}
  exit 0
fi

echo reading file ${TEMP_FILE}

while read line
do
  KILL_CMD=`echo $line | awk 'BEGIN {FS="~"}{print $1}'`
  WHAT=`echo $line | awk 'BEGIN {FS="~"}{print $2}'`

  echo "$RUN_TIME   ${KILL_CMD} ${WHAT}" >> ${LOG_DIR}/${LOG_FILE}
  ${KILL_CMD}

done < ${TEMP_FILE}

less kill_long_running_jobs.sql

@./set_user.sql
connect &&user/&&pass@&&conn_str
PROMPT CONNECTED TO &&user/&&pass@&&conn_str

SET HEADING OFF
SET PAGESIZE 0
SET LINESIZE 200
SET NEWPAGE NONE
SET FEEDBACK OFF
SET VERIFY OFF
SPOOL long_running_sessions.tmp

SELECT 'kill -9 '||V_PROCESS.spid||'~'||DBA_JOBS.what AS LINUX_KILL
FROM   V$SESSION V_SESSION,
       V$PROCESS V_PROCESS,
       DBA_JOBS_RUNNING STUCK_JOBS,
       DBA_JOBS
WHERE  V_PROCESS.addr = V_SESSION.paddr
  AND  V_SESSION.type != 'BACKGROUND'
  AND  V_SESSION.sid = STUCK_JOBS.sid
  AND  DBA_JOBS.job = STUCK_JOBS.job
  AND  logon_time < (SYSDATE - 30/1440)
  AND  UPPER(DBA_JOBS.what) IN ('EXT_MOCO.SCHEDULE;','EXT_OVMD.SCHEDULE;','EXT_IPN.SCHEDULE;','EXT_GLR.SCHEDULE;','EXT_SPARX.SCH
EDULE;')
UNION ALL
SELECT 'kill -9 4545~EXT_SPARX.SCHEDULE;' AS LINUX_KILL
FROM DUAL
WHERE 1=2;

SPOOL OFF

less set_user.sql

DEFINE user=user_name
DEFINE pass=user_pass
DEFINE conn_str=orainst

No comments:

Post a Comment