junit-stats.py | searchcode

/tools/jenkins/junit-stats.py

https://github.com/VoltDB/voltdb
Python | 1041 lines | 990 code | 21 blank | 30 comment | 0 complexity | 1503ee7be6595236f04ed5e26134749c MD5 | raw file

#!/usr/bin/env python

# This file is part of VoltDB.
# Copyright (C) 2008-2022 Volt Active Data Inc.

# A command line tool for getting junit job statistics from Jenkins CI

import logging
import os
import sys
import mysql.connector

from datetime import datetime, timedelta
from jenkinsbot import JenkinsBot
from mysql.connector.errors import Error as MySQLError
from numpy import std, mean
from re import search, sub
from string import whitespace
from traceback import format_exc
from urllib2 import HTTPError, URLError, urlopen

# Constants used in posting messages on Slack
JUNIT = os.environ.get('junit', None)
AUTO_FILED = os.environ.get('auto-filed', None)
# For now, this should work (?); need a constant for the 'auto-filed' channel
SLACK_CHANNEL_FOR_AUTO_FILING = JUNIT

# set to True if you need to suppress updating the 'qa' database or JIRA
DRY_RUN = False

# Default is None (null); but once initialized, it may be reused
JENKINSBOT = None

# All possible failure "types", as detailed below
ALL_FAILURE_TYPES = ['NEW', 'INTERMITTENT', 'FREQUENT', 'CONSISTENT', 'INCONSISTENT', 'OLD']

# Constants used to determine which test failures are considered Consistent,
# Intermittent or New failures
NEW_FAILURE_WINDOW_SIZE             = 10
NEW_NUM_FAILURES_THRESHOLD          = 2  # 2 out of 10 failures is 'New' - if not seen recently
INTERMITTENT_FAILURE_WINDOW_SIZE    = 25
INTERMITTENT_NUM_FAILURES_THRESHOLD = 3  # 3 out of 25 failures is 'Intermittent'
CONSISTENT_FAILURE_WINDOW_SIZE      = 3
CONSISTENT_NUM_FAILURES_THRESHOLD   = 3  # 3 out of 3 (consecutive) failures is 'Consistent'

# Constants used to determine after how many passing it should be closed, or
# when its status should be changed to Old, Inconsistent or Intermittent.
# Note: "Inconsistent" means formerly deemed Consistent, but we're not yet
# certain whether it is fixed or actually Intermittent.
CHANGE_NEW_TO_OLD_WINDOW_SIZE            = 5
CHANGE_NEW_TO_OLD_NUM_FAILURES_THRESHOLD = 1  # if less than 1 in 5 failed, downgrade to 'Old'
CHANGE_INTERMITTENT_TO_OLD_WINDOW_SIZE            = 10
CHANGE_INTERMITTENT_TO_OLD_NUM_FAILURES_THRESHOLD = 1  # if less than 1 in 10 failed, downgrade to 'Old'
CHANGE_INTERMITTENT_TO_CONSISTENT_WINDOW_SIZE            = 5
CHANGE_INTERMITTENT_TO_CONSISTENT_NUM_FAILURES_THRESHOLD = 5  # if 5 out of 5 failed, upgrade to 'Consistent'
CHANGE_INTERMITTENT_TO_FREQUENT_WINDOW_SIZE              = 10
CHANGE_INTERMITTENT_TO_FREQUENT_NUM_FAILURES_THRESHOLD   = 8  # if 8 out of 10 failed, upgrade to 'Frequent'
CHANGE_FREQUENT_TO_CONSISTENT_WINDOW_SIZE                = 10
CHANGE_FREQUENT_TO_CONSISTENT_NUM_FAILURES_THRESHOLD     = 10 # if 10 out of 10 failed, change to 'Consistent'
CHANGE_FREQUENT_TO_INTERMITTENT_WINDOW_SIZE              = 10
CHANGE_FREQUENT_TO_INTERMITTENT_NUM_FAILURES_THRESHOLD   = 7  # if less than 7 in 10 failed, downgrade to 'Intermittent'
CHANGE_CONSISTENT_TO_INCONSISTENT_WINDOW_SIZE            = 1
CHANGE_CONSISTENT_TO_INCONSISTENT_NUM_FAILURES_THRESHOLD = 1  # if less than 1 in 1 failed, downgrade to 'Inconsistent'
CHANGE_INCONSISTENT_TO_INTERMITTENT_WINDOW_SIZE            = 1
CHANGE_INCONSISTENT_TO_INTERMITTENT_NUM_FAILURES_THRESHOLD = 1  # if 1 out of 1 failed, change to 'Intermittent'

# Constants used to determine after how many passing it should be closed
CLOSE_INCONSISTENT_WINDOW_SIZE            = 5
CLOSE_INCONSISTENT_NUM_FAILURES_THRESHOLD = 1  # if less than 1 in 5 failed, close the ticket
CLOSE_OLD_WINDOW_SIZE            = 10
CLOSE_OLD_NUM_FAILURES_THRESHOLD = 1  # if less than 1 in 10 failed, close the ticket

# Constants used in filing (or modifying) Jira tickets
JIRA_PRIORITY_FOR_CONSISTENT_FAILURES   = 'Critical'
JIRA_PRIORITY_FOR_FREQUENT_FAILURES     = 'Critical'
JIRA_PRIORITY_FOR_INCONSISTENT_FAILURES = 'Major'
JIRA_PRIORITY_FOR_INTERMITTENT_FAILURES = 'Major'
JIRA_PRIORITY_FOR_NEW_FAILURES = 'Minor'
JIRA_PRIORITY_FOR_OLD_FAILURES = 'Trivial'
JIRA_LABEL_FOR_AUTO_FILING     = 'auto-filed'
JIRA_LABEL_FOR_CONSISTENT_FAILURES   = 'junit-consistent-failure'
JIRA_LABEL_FOR_FREQUENT_FAILURES     = 'junit-intermittent-failure'
JIRA_LABEL_FOR_INCONSISTENT_FAILURES = 'junit-intermittent-failure'
JIRA_LABEL_FOR_INTERMITTENT_FAILURES = 'junit-intermittent-failure'
JIRA_LABEL_FOR_NEW_FAILURES          = 'junit-intermittent-failure'
JIRA_LABEL_FOR_OLD_FAILURES          = 'junit-intermittent-failure'
MAX_NUM_ATTACHMENTS_PER_JIRA_TICKET  = 8

# Used to help prevent a Jira ticket from exceeding Jira's maximum
# description size (32,767 characters, total)
MAX_NUM_CHARS_PER_JIRA_DESCRIPTION  = 32767
MAX_NUM_CHARS_PER_DESCRIPTION_PIECE = 2000

# Characters that don't work well in Jira seqrches
JIRA_SEARCH_PROBLEMATIC_CHARACTERS = '._'

# Used in Jira ticket descriptions:
DASHES = '-------------------------'
STACK_TRACE_LINE = '\n'+DASHES+'\-Stack Trace\-'+DASHES+'\n\n'
SEPARATOR_LINE   = '\n'+DASHES+'--------------' +DASHES+'\n\n'
JENKINS_JOBS = {
    'branch-2-community-junit-master'         : {'nickname' : 'community-junit',  'label' : 'junit-community-failure'},
    'branch-2-pro-junit-master'               : {'nickname' : 'pro-junit',        'label' : 'junit-pro-failure'},
    'test-nextrelease-debug-pro'              : {'nickname' : 'debug-pro',        'label' : 'junit-debug-failure'},
    'test-nextrelease-memcheck-pro'           : {'nickname' : 'memcheck-pro',     'label' : 'junit-memcheck-debug-failure'},
    'test-nextrelease-memcheck-nodebug-pro'   : {'nickname' : 'memcheck-nodebug', 'label' : 'junit-memcheck-failure'},
    'test-nextrelease-fulljmemcheck-pro-junit': {'nickname' : 'fulljmemcheck',    'label' : 'junit-fulljmemcheck-failure'},
    'test-nextrelease-nonflaky-pro-junit'     : {'nickname' : 'nonflaky-pro',     'label' : 'junit-nonflaky-failure'},
    'test-nextrelease-pool-community-junit'   : {'nickname' : 'pool-community',   'label' : 'junit-pool-community-failure'},
    'test-nextrelease-pool-pro-junit'         : {'nickname' : 'pool-pro',         'label' : 'junit-pool-pro-failure'},
    }

# Used for getting the preferred URL prefix; we prefer the latter to the former,
# because it works even over the VPN
BAD_URL_PREFIX  = 'ci:8080'
GOOD_URL_PREFIX = 'ci.voltdb.lan:8080'

# Used to count errors and warnings encountered during execution
ERROR_COUNT   = 0
WARNING_COUNT = 0

# Use to modify URLs by changing problematic characters into underscores
from string import maketrans
TT = maketrans("[]-<> ", "______")

# Print a log (info) message after every group of this many test cases are processed
# (in each "run" of a build, e.g. junit_other_p4 vs. junit_regression_h2)
LOG_MESSAGE_EVERY_NUM_TEST_CASES = 200

# TODO: possibly obsolete?? :
# set threshold (greater than or equal to) of failures in a row to be significant
FAIL_THRESHOLD = 2

# TODO: probably obsolete:
QUERY1 = """
    SELECT count(*) AS fails
    FROM `junit-test-failures` m
    WHERE m.job = %(job)s
        AND m.name = %(name)s
        AND m.status in ('FAILED', 'REGRESSION')
        AND m.stamp > %(stamp)s - INTERVAL 30 DAY
        AND m.build <= %(build)s
"""

QUERY2 = """
    SELECT count(*) AS fixes
    FROM `junit-test-failures` m
    WHERE m.job = %(job)s
        AND m.name = %(name)s
        AND m.status in ('FIXED')
        AND m.stamp > %(stamp)s - INTERVAL 30 DAY
        AND m.build <= %(build)s
    HAVING fixes > 0
    LIMIT 1
"""

QUERY3 = """
    SELECT count(*) as runs
    FROM `junit-builds` m
    WHERE m.name = %(job)s
        AND m.stamp > %(stamp)s - INTERVAL 30 DAY
        AND m.stamp <= %(stamp)s
"""

QUERY4 = """
    SELECT job, build, name, ord-1-COALESCE(pre, 0) AS runs, current
    FROM
        (SELECT job, build, name, status, ord, stamp,
                LAG(ord) OVER w2 AS pre,
                LEAD(ord) OVER w2 AS post,
                (SELECT last-MAX(ord)
                FROM
                    (SELECT job, name, status, stamp,
                            ROW_NUMBER() OVER w1 AS ord,
                            (SELECT count(*)
                            FROM `junit-test-failures` n
                            WHERE n.job=%(job)s
                                AND n.name=%(name)s
                                AND n.stamp > %(stamp)s - INTERVAL 30 DAY
                                AND n.build <= %(build)s
                            ) last
                    FROM `junit-test-failures` n
                    WHERE n.job=%(job)s
                        AND n.name=%(name)s
                        AND n.stamp > %(stamp)s - INTERVAL 30 DAY
                        AND n.build <= %(build)s
                    WINDOW w1 AS (ORDER BY build)
                    ) q1
                WHERE q1.status in ('FIXED')
                LIMIT 1
                ) current
        FROM
            (SELECT job, build, name, status, stamp,
                    ROW_NUMBER() OVER w1 AS ord
            FROM `junit-test-failures` n
            WHERE n.job=%(job)s
                AND n.name=%(name)s
                AND n.stamp > %(stamp)s - INTERVAL 30 DAY
                AND n.build <= %(build)s
            WINDOW w1 AS (ORDER BY build)
            ) q2
        WHERE q2.status in ('FIXED')
        WINDOW w2 AS (ORDER BY ord)
        ) q3;
"""

class Stats(object):
    def __init__(self):
        self.jhost = 'http://ci.voltdb.lan'
        self.dbhost = 'junitstatsdb.voltdb.lan'
        self.dbuser = os.environ.get('dbuser', None)
        self.dbpass = os.environ.get('dbpass', None)
        self.dbname = os.environ.get('dbname', 'qa')
        self.cmdhelp = """
        usage: junit-stats <job> <build_range>
        ex: junit-stats branch-2-pro-junit-master 800-990
        ex: junit-stats branch-2-community-junit-master 550-550
        You can also specify 'job' and 'build_range' environment variables
        """
        log_format = '%(asctime)s %(module)14s:%(lineno)-6d %(levelname)-8s [%(threadName)-10s] %(message)s'
        # logging.basicConfig(stream=sys.stdout, level=logging.INFO)
        # file = logging.FileHandler("junit-stats.log", mode='w')
        # file.setLevel(logging.INFO)
        # formatter = logging.Formatter(log_format)
        # file.setFormatter(formatter)
        # logging.getLogger('').handlers = []
        # logging.getLogger('').addHandler(file)
        loglevel = logging.INFO
        console_loglevel = loglevel
        logfile = "junit-stats.log"
        logger = logging.getLogger()
        logger.setLevel(logging.NOTSET)
        logger.propogate = True
        file = logging.FileHandler(logfile, mode='a')
        console = logging.StreamHandler()
        file.setLevel(loglevel)
        console.setLevel(console_loglevel)
        formatter = logging.Formatter(log_format)
        file.setFormatter(formatter)
        console.setFormatter(formatter)
        logging.getLogger('').handlers = []
        logging.getLogger('').addHandler(file)
        logging.getLogger('').addHandler(console)
        logging.info("starting... %s" % sys.argv)


    def error(self, message='', caused_by=None):
        """TODO
        :param 
        """
        global ERROR_COUNT
        ERROR_COUNT = ERROR_COUNT + 1
        if caused_by:
            message += '\nCaused by:\n' + str(caused_by)
        logging.error(message)


    def warn(self, message='', caused_by=None):
        """TODO
        :param 
        """
        global WARNING_COUNT
        WARNING_COUNT = WARNING_COUNT + 1
        if caused_by:
            message += '\nCaused by:\n' + str(caused_by)
        logging.warn(message)


    def fix_url(self, url):
        """
        :param url: url to download data from
        :return: TODO
        """
        if not url:
            return None
        return GOOD_URL_PREFIX.join(url.split(BAD_URL_PREFIX))


    def read_url(self, url, ignore404=False):
        """
        :param url: url to download data from
        :return: Dictionary representation of json object
        """
        logging.debug('In read_url:')
        logging.debug('    url: '+url)

        url = self.fix_url(url)
        logging.debug('    url: '+url)

        data = None
        try:
            data = eval(urlopen(url).read())
        except Exception as e:
            if (ignore404 and type(e) is HTTPError and e.code == 404):
                logging.debug('Ignoring HTTPError (%s) at URL:\n    %s' % (str(e), str(url)))
            else:
                self.error('Exception trying to open data from URL:\n    %s'
                           '\n    The URL may not be formed correctly.'
                           % str(url), e )
        return data


    def get_number_of_jenkins_failures(self, cursor, testName, jenkins_job,
                                       last_build, num_builds):
        """TODO
        """
        logging.debug('In get_number_of_jenkins_failures:')
        logging.debug('    cursor      : '+str(cursor))
        logging.debug('    testName    : '+str(testName))
        logging.debug('    jenkins_job : '+str(jenkins_job))
        logging.debug('    last_build  : '+str(last_build))
        logging.debug('    num_builds  : '+str(num_builds))

        query_base = """SELECT count(*) as numfails
                        FROM `junit-test-failures` f
                        WHERE f.name = '%s'
                          AND f.job  = '%s'
                          AND f.status in ('FAILED', 'REGRESSION')
                          AND f.build <= %s
                          AND f.build  > %s
                     """
        query = query_base % (testName, jenkins_job, last_build,
                              (last_build - num_builds))
        logging.debug('    query       :\n    '+str(query))

        cursor.execute(query)
        num_failures = float(cursor.fetchone()[0])
        #num_failures = int(cursor.fetchone()[0])

        logging.debug('    num_failures: '+str(num_failures))

        return num_failures


    def get_intermittent_failure_percent(self, cursor=None, testName=None,
                                         jenkins_job=None, last_build=None,
                                         num_failures=None):
        """TODO
        """
        logging.debug('In get_intermittent_failure_percent...')

        if not num_failures:
            num_failures = self.get_number_of_jenkins_failures(cursor, testName,
                            jenkins_job, last_build, INTERMITTENT_FAILURE_WINDOW_SIZE)

        return (100.0 * num_failures
                / INTERMITTENT_FAILURE_WINDOW_SIZE)


    def qualifies_as_new_failure(self, cursor, testName,
                                 jenkins_job, last_build, status):
        """TODO
        """
        logging.debug('In qualifies_as_new_failure...')

        # Possible shortcut to skip querying the 'qa' database,
        # if we're just checking the most recent build
        if (status is 'REGRESSION' and
                NEW_FAILURE_WINDOW_SIZE is 1 and
                NEW_NUM_FAILURES_THRESHOLD is 1):
            logging.debug('...qualifies as new, via shortcut (assuming 4% failure percent)')
            return 4.0    # assume 1 failure out of the last 25 builds

        num_failures = self.get_number_of_jenkins_failures(cursor, testName,
                        jenkins_job, last_build, NEW_FAILURE_WINDOW_SIZE)

        if num_failures >= NEW_NUM_FAILURES_THRESHOLD:
            # recompute failure percent, as if an intermittent failure
            return self.get_intermittent_failure_percent(cursor, testName,
                                                         jenkins_job, last_build)
        else:
            return 0  # does not qualify


    def qualifies_as_intermittent_failure(self, cursor, testName,
                                          jenkins_job, last_build):
        """TODO
        """
        logging.debug('In qualifies_as_intermittent_failure...')

        num_failures = self.get_number_of_jenkins_failures(cursor, testName,
                        jenkins_job, last_build, INTERMITTENT_FAILURE_WINDOW_SIZE)

        if num_failures >= INTERMITTENT_NUM_FAILURES_THRESHOLD:
            # compute failure percent, as an intermittent failure
            return self.get_intermittent_failure_percent(num_failures=num_failures)
        else:
            return 0  # does not qualify


    def qualifies_as_consistent_failure(self, cursor, testName,
                                        jenkins_job, last_build):
        """TODO
        """
        logging.debug('In qualifies_as_consistent_failure...')

        num_failures = self.get_number_of_jenkins_failures(cursor, testName,
                        jenkins_job, last_build, CONSISTENT_FAILURE_WINDOW_SIZE)

        if num_failures >= CONSISTENT_NUM_FAILURES_THRESHOLD:
            failurePercent = (100.0 * num_failures
                              / CONSISTENT_FAILURE_WINDOW_SIZE)
            return failurePercent
        else:
            return 0  # does not qualify


    def change_intermittent_failure_to_frequent(self, cursor, testName,
                                                jenkins_job, last_build):
        """TODO
        """
        logging.debug('In change_intermittent_failure_to_frequent...')

        num_failures = self.get_number_of_jenkins_failures(cursor, testName,
                        jenkins_job, last_build, CHANGE_INTERMITTENT_TO_FREQUENT_WINDOW_SIZE)

        if num_failures >= CHANGE_INTERMITTENT_TO_FREQUENT_NUM_FAILURES_THRESHOLD:
            failurePercent = (100.0 * num_failures
                              / CHANGE_INTERMITTENT_TO_FREQUENT_WINDOW_SIZE)
            return failurePercent
        else:
            return 0  # do not change


    def change_intermittent_failure_to_consistent(self, cursor, testName,
                                                  jenkins_job, last_build):
        """TODO
        """
        logging.debug('In change_intermittent_failure_to_consistent_...')

        num_failures = self.get_number_of_jenkins_failures(cursor, testName,
                        jenkins_job, last_build, CHANGE_INTERMITTENT_TO_CONSISTENT_WINDOW_SIZE)

        if num_failures >= CHANGE_INTERMITTENT_TO_CONSISTENT_NUM_FAILURES_THRESHOLD:
            failurePercent = (100.0 * num_failures
                              / CHANGE_INTERMITTENT_TO_CONSISTENT_WINDOW_SIZE)
            return failurePercent
        else:
            return 0  # do not change


    def change_new_failure_to_old(self, cursor, testName,
                                  jenkins_job, last_build):
        """TODO
        """
        logging.debug('In change_new_failure_to_old...')

        num_failures = self.get_number_of_jenkins_failures(cursor, testName,
                        jenkins_job, last_build, CHANGE_NEW_TO_OLD_WINDOW_SIZE)

        if num_failures < CHANGE_NEW_TO_OLD_NUM_FAILURES_THRESHOLD:
            # recompute failure percent, as if an intermittent failure
            return self.get_intermittent_failure_percent(cursor, testName,
                                                         jenkins_job, last_build)
        else:
            return 0  # do not change


    def change_intermittent_failure_to_old(self, cursor, testName,
                                           jenkins_job, last_build):
        """TODO
        """
        logging.debug('In change_intermittent_failure_to_old...')

        num_failures = self.get_number_of_jenkins_failures(cursor, testName,
                        jenkins_job, last_build, CHANGE_INTERMITTENT_TO_OLD_WINDOW_SIZE)

        if num_failures < CHANGE_INTERMITTENT_TO_OLD_NUM_FAILURES_THRESHOLD:
            # recompute failure percent, as if still an intermittent failure
            return self.get_intermittent_failure_percent(cursor, testName,
                                                         jenkins_job, last_build)
        else:
            return 0  # do not change


    def change_consistent_failure_to_inconsistent(self, cursor, testName,
                                                  jenkins_job, last_build, status):
        """TODO
        """
        logging.debug('In change_consistent_failure_to_inconsistent...')

        # Possible shortcut to skip querying the 'qa' database,
        # if we're just checking the most recent build
        if (status is 'FIXED' and
                CHANGE_CONSISTENT_TO_INCONSISTENT_WINDOW_SIZE is 1 and
                CHANGE_CONSISTENT_TO_INCONSISTENT_NUM_FAILURES_THRESHOLD is 1):
            logging.debug('...do change to inconsistent, via shortcut (recompute failure percent)')
            # recompute failure percent, as if an intermittent failure
            return self.get_intermittent_failure_percent(cursor, testName,
                                                         jenkins_job, last_build)

        num_failures = self.get_number_of_jenkins_failures(cursor, testName,
                        jenkins_job, last_build, CHANGE_CONSISTENT_TO_INCONSISTENT_WINDOW_SIZE)

        if num_failures < CHANGE_CONSISTENT_TO_INCONSISTENT_NUM_FAILURES_THRESHOLD:
            # recompute failure percent, as if an intermittent failure
            return self.get_intermittent_failure_percent(cursor, testName,
                                                         jenkins_job, last_build)
        else:
            return 0  # do not change


    def change_inconsistent_failure_to_intermittent(self, cursor, testName,
                                                    jenkins_job, last_build, status):
        """TODO
        """
        logging.debug('In change_inconsistent_failure_to_intermittent...')

        # Possible shortcut to skip querying the 'qa' database,
        # if we're just checking the most recent build
        if (status is 'REGRESSION' and
                CHANGE_INCONSISTENT_TO_INTERMITTENT_WINDOW_SIZE is 1 and
                CHANGE_INCONSISTENT_TO_INTERMITTENT_NUM_FAILURES_THRESHOLD is 1):
            logging.debug('...do change to intermittent, via shortcut (recompute failure percent)')
            # recompute failure percent, as an intermittent failure
            return self.get_intermittent_failure_percent(cursor, testName,
                                                         jenkins_job, last_build)

        num_failures = self.get_number_of_jenkins_failures(cursor, testName,
                        jenkins_job, last_build, CHANGE_INCONSISTENT_TO_INTERMITTENT_WINDOW_SIZE)

        if num_failures >= CHANGE_INCONSISTENT_TO_INTERMITTENT_NUM_FAILURES_THRESHOLD:
            # recompute failure percent, as an intermittent failure
            return self.get_intermittent_failure_percent(cursor, testName,
                                                         jenkins_job, last_build)
        else:
            return 0  # do not change


    def change_frequent_failure_to_consistent(self, cursor, testName,
                                              jenkins_job, last_build, status):
        """TODO
        """
        logging.debug('In change_frequent_failure_to_consistent...')

        # Possible shortcut to skip querying the 'qa' database,
        # if we're just checking the most recent build
        if (status is 'REGRESSION' and
                CHANGE_FREQUENT_TO_CONSISTENT_WINDOW_SIZE is 1 and
                CHANGE_FREQUENT_TO_CONSISTENT_NUM_FAILURES_THRESHOLD is 1):
            logging.debug('...do change to consistent, via shortcut (recompute failure percent)')
            # recompute failure percent, as an intermittent failure
            return self.get_intermittent_failure_percent(cursor, testName,
                                                         jenkins_job, last_build)

        num_failures = self.get_number_of_jenkins_failures(cursor, testName,
                        jenkins_job, last_build, CHANGE_FREQUENT_TO_CONSISTENT_WINDOW_SIZE)

        if num_failures >= CHANGE_FREQUENT_TO_CONSISTENT_NUM_FAILURES_THRESHOLD:
            # recompute failure percent, as an intermittent failure
            return self.get_intermittent_failure_percent(cursor, testName,
                                                         jenkins_job, last_build)
        else:
            return 0  # do not change


    def change_frequent_failure_to_intermittent(self, cursor, testName,
                                                jenkins_job, last_build, status):
        """TODO
        """
        logging.debug('In change_frequent_failure_to_intermittent...')

        # Possible shortcut to skip querying the 'qa' database,
        # if we're just checking the most recent build
        if (status is 'REGRESSION' and
                CHANGE_FREQUENT_TO_INTERMITTENT_WINDOW_SIZE is 1 and
                CHANGE_FREQUENT_TO_INTERMITTENT_NUM_FAILURES_THRESHOLD is 1):
            logging.debug('...do change to intermittent, via shortcut (recompute failure percent)')
            # recompute failure percent, as an intermittent failure
            return self.get_intermittent_failure_percent(cursor, testName,
                                                         jenkins_job, last_build)

        num_failures = self.get_number_of_jenkins_failures(cursor, testName,
                        jenkins_job, last_build, CHANGE_FREQUENT_TO_INTERMITTENT_WINDOW_SIZE)

        if num_failures < CHANGE_FREQUENT_TO_INTERMITTENT_NUM_FAILURES_THRESHOLD:
            # recompute failure percent, as an intermittent failure
            return self.get_intermittent_failure_percent(cursor, testName,
                                                         jenkins_job, last_build)
        else:
            return 0  # do not change


    def should_close_inconsistent_failure(self, cursor, testName,
                                          jenkins_job, last_build,
                                          ticket_description=''):
        """TODO
        """
        logging.debug('In should_close_intermittent_failure...')

        num_failures = self.get_number_of_jenkins_failures(cursor, testName,
                        jenkins_job, last_build, CLOSE_INCONSISTENT_WINDOW_SIZE)

        if (num_failures < CLOSE_INCONSISTENT_NUM_FAILURES_THRESHOLD
                and jenkins_job in ticket_description):
            return True
        else:
            return False  # do not close


    def should_close_old_failure(self, cursor, testName,
                                 jenkins_job, last_build,
                                 ticket_description=''):
        """TODO
        """
        logging.debug('In should_close_old_failure...')

        num_failures = self.get_number_of_jenkins_failures(cursor, testName,
                        jenkins_job, last_build, CLOSE_OLD_WINDOW_SIZE)

        if (num_failures < CLOSE_OLD_NUM_FAILURES_THRESHOLD
                and jenkins_job in ticket_description):
            return True
        else:
            return False  # do not close


    def truncate_if_needed(self, text, truncate_in_middle=True,
                           insert_text='\n...[truncated]...\n',
                           max_num_chars=MAX_NUM_CHARS_PER_DESCRIPTION_PIECE):
        """Takes a text string (typically part of a Jira ticket description),
           and makes sure that it does not exceed a specified maximum number of
           characters, and truncates it if it does. By default: the max_num_chars
           is the constant MAX_NUM_CHARS_PER_DESCRIPTION_PIECE; it 'truncates'
           the text in the middle, taking half the available characters from the
           beginning and half from the end of the original text; and it adds
           '\n...[truncated]...\n' in the middle. But by specifying the optional
           parameters, you may change it to instead truncate the end of the text
           (when truncate_in_middle=False); or to insert a different piece of
           text to indicate the truncation; or to use a different maximum number
           of characters.
        """
        if len(text) <= max_num_chars:
            return text

        logging.debug('  In junit-stats.truncate_if_needed:')
        new_text = ''
        insert_length = len(insert_text)

        # "Truncate" the text in the middle
        if truncate_in_middle:
            half_length = int( (max_num_chars - insert_length) / 2 )
            new_text = text[:half_length] + insert_text \
                     + text[-half_length:]
            logging.warn('Jira description piece of length %d characters '
                         'truncated to %d characters:\n    %s'
                         % (len(text), len(new_text), new_text) )

        # Truncate the text at the end
        else:
            last_char_index = max_num_chars - insert_length
            new_text = text[:last_char_index] + insert_text
            logging.warn('Updated Jira description truncated to:\n    %s' % new_text)
            logging.warn('This part was left out of the truncated Jira description:\n    %s'
                         % text[last_char_index:] )

        logging.debug('    new (truncated) text:\n    %s' % str(new_text))
        return new_text


    def combine_since_build_messages(self, description, new_since_build,
                                     info_messages, jenkins_job_name=None,
                                     build_type=None):
        """If the current (Jira ticket) description includes 'since-build'
           messages related to the current Jenkins job, compress them and the
           new_since_build message into one, and return the resulting (Jira
           ticket) description.
        """
        updated_description = False
        if jenkins_job_name:
            # Older formats of "since-build" messages:
            format1_message = '\nFailing consistently since '+jenkins_job_name+' build #'
            format2_message = '\nFailing intermittently since '+jenkins_job_name+' build #'
            # Current format of "since-build" messages (omitting beginning):
            format3_message = 'failure in '+jenkins_job_name+' since build #'
            # Set initial values
            found_old_version_of_same_message = False
            old_build_number = sys.maxint  # an initial, very large value

            for msg in [format1_message, format2_message, format3_message]:
                count = 0
                while msg in description and count < 10:
                    count += 1  # just in case the text replacement does not work
                    found_old_version_of_same_message = True

                    # Get the index of the message's start - from the
                    # beginning of the line
                    message_start = description.index(msg)
                    if description[message_start:message_start+1] != '\n':
                        message_start = description.rfind('\n', 0, message_start)
                    # Get the index of the message's end - to the beginning
                    # of the next line (or of the entire description)
                    message_end = description.find('\n', message_start+1)
                    if message_end < 0:
                        message_end = len(description)

                    matching_message = description[message_start:message_end]
                    msg_build_start  = matching_message.rfind('#') + 1
                    msg_build_end    = len(matching_message)
                    match = search('\D', matching_message[msg_build_start:])
                    if match:
                        msg_build_end = msg_build_start + match.start()

                    logging.debug('  msg             : '+str(msg))
                    logging.debug('  message_start   : '+str(message_start))
                    logging.debug('  message_end     : '+str(message_end))
                    logging.debug('  matching_message: '+matching_message)
                    logging.debug('  msg_build_start : '+str(msg_build_start))
                    logging.debug('  msg_build_end   : '+str(msg_build_end))
                    logging.debug('  matching_message[msg_build_start:msg_build_end]: '
                                  + matching_message[msg_build_start:msg_build_end])

                    try:
                        msg_build_number = int(matching_message[msg_build_start:msg_build_end])
                        old_build_number = min(old_build_number, msg_build_number)
                    except ValueError as e:
                        self.error('While trying to get build number from old description '
                                   '(index %d-%d):\n    %s\n    Found Exception:\n    %s'
                                   % (msg_build_start, msg_build_end, matching_message, str(e)) )
                        break
                    description = description.replace(matching_message,'')
                    updated_description = True

            # For a Consistent failure, keep the build number in the new message,
            # which is presumably the build at which the current Jenkins job started
            # failing consistently; for others (Intermittent, Inconsistent, etc.),
            # change it to the oldest build in which we've seen this failure
            if found_old_version_of_same_message and build_type is not 'CONSISTENT':
                msg_build_index = new_since_build.rfind('#') + 1
                try:
                    new_build_number = int(new_since_build[msg_build_index:])
                except ValueError as e:
                    self.error("While trying to get build number from new 'since-build' "
                               "message (index %d):\n    %s\n    Found Exception:\n    %s"
                               % (msg_build_index, new_since_build, str(e)) )
                    new_build_number = sys.maxint
                if old_build_number < new_build_number:
                    new_since_build = new_since_build.replace(str(new_build_number),
                                                              str(old_build_number))

        if updated_description:
            info_messages.append('since-build message updated to: %s' % new_since_build)
        else:
            info_messages.append('added since-build message: %s' % new_since_build)

        return description + new_since_build


    def get_modified_description(self, old_description, new_description_pieces,
                                 jenkins_job_name=None, build_type=None,
                                 issue_key=''):
        """Combines an old description (if any) of a Jira ticket with new pieces
           of text that we want to add to that description, if they are not
           already there; in some cases, the old and new descriptions will be
           combined in some way. Unlike the previous version, this method is
           very aware of, and treats slightly differently, the 4 usual parts
           (plus 'other') of an Auto-filer ticket description: the full name of
           the failing test (including package name, which is omitted from the
           Summary); a link to the failure history of this test (in a particular
           Jenkings job); a stack trace; and a brief message saying how often
           (in a Jenkings job) this test has failed, and since which build. Note
           that, over time, multiple versions of each section may appear in the
           same Jira ticket. For example, links to the failure history for each
           Jenkins job in which the test has failed; or multiple stack traces,
           if the stack trace is not always identical, etc.
        """
        logging.debug('In junit-stats.get_modified_description:')
        logging.debug('  old_description:\n  %s' % str(old_description))
        logging.debug('  new_description_pieces:\n  %s' % str(new_description_pieces))
        logging.debug('  jenkins_job_name: %s' % str(jenkins_job_name))
        logging.debug('  build_type      : %s' % str(build_type))
        logging.debug('  issue_key       : %s' % str(issue_key))

        new_description = ''

        # Used to identify the relevant pieces of the old description
        old_desc_pieces = {}
        old_desc_pieces['failingtest'] = []
        old_desc_pieces['failurehistory'] = []
        old_desc_pieces['stacktrace'] = []
        old_desc_pieces['sincebuild'] = []
        old_desc_pieces['other'] = []

        # Loop through sections of the old description, which are generally
        # separated by a line of dashes
        old_desc_index = 0
        while old_desc_index < len(old_description):
            stack_trace = False
            # Initial guesses for where the next section starts and ends
            next_section_start = old_desc_index
            next_section_end   = old_description.find(DASHES, next_section_start)

            logging.debug('  old_desc_index    : %d' % old_desc_index)
            logging.debug('  next_section_start: %d' % next_section_start)
            logging.debug('  next_section_end  : %d' % next_section_end)

            # If the current section of the old_description starts with dashes,
            # skip over that line, and any white space or dashes that follow
            if next_section_start == next_section_end:
                next_section_start = old_description.find('\n', next_section_start)
                if next_section_start < 0:
                    break  # if the last line starts with dashes, ignore it
                while next_section_start < len(old_description) and (
                        old_description[next_section_start:next_section_start+1]
                        in whitespace+'-' ):
                    next_section_start += 1
                next_section_end = old_description.find(DASHES, next_section_start)

                # Unlike other sections, a Stack Trace should include its dashes
                # (if we have multiple Stack Traces, we want them separated)
                if 'Stack Trace' in old_description[old_desc_index:next_section_start]:
                    stack_trace = True
                    next_section_start = old_desc_index

            # If there are no more dashes, then this is the last section, so it
            # ends at the end of the old_description
            if next_section_end < 0:
                next_section_end = len(old_description)

            logging.debug('  stack_trace       : %s' % str(stack_trace))
            logging.debug('  next_section_start: %d' % next_section_start)
            logging.debug('  next_section_end  : %d' % next_section_end)

            if next_section_start >= next_section_end:
                self.warn('In get_modified_description, next_section_start (%d) '
                          '>= next_section_end (%d): this should not normally '
                          'happen; for comparison, old_description has length %d.'
                          % (next_section_start, next_section_end, len(old_description)) )
                break

            old_desc_index = next_section_end
            next_section = self.truncate_if_needed(
                old_description[next_section_start:next_section_end] )

            logging.debug('  next_section:\n  %s' % str(next_section))

            if stack_trace:
                old_desc_pieces['stacktrace'].append(next_section)
                logging.debug('  - added as Stack Trace')
            elif next_section.startswith('Failure history'):
                old_desc_pieces['failurehistory'].append(next_section)
                logging.debug('  - added as Failure history')
            elif next_section.startswith('Failing Test:'):
                old_desc_pieces['failingtest'].append(next_section)
                logging.debug('  - added as Failing Test')
            elif next_section.startswith('Failing') or any(
                    next_section.startswith(ft) for ft in ALL_FAILURE_TYPES ):
                old_desc_pieces['sincebuild'].append(next_section)
                logging.debug('  - added as Since build')
            else:
                old_desc_pieces['other'].append(next_section)
                logging.debug("  - added as 'other'")

        logging.debug('  old_desc_pieces:\n  %s' % str(old_desc_pieces))

        # Collect the various pieces of the old description
        old_failing_test    = '\n'.join(sec for sec in old_desc_pieces['failingtest'])
        old_failure_history = '\n'.join(sec for sec in old_desc_pieces['failurehistory'])
        old_stack_trace     = '\n'.join(sec for sec in old_desc_pieces['stacktrace'])
        old_other           = '\n'.join(sec for sec in old_desc_pieces['other'])
        old_since_build     = '\n'.join(sec for sec in old_desc_pieces['sincebuild'])

        # Collect the various pieces of the new description
        new_failing_test    = self.truncate_if_needed(new_description_pieces.get('failingtest', ''))
        new_failure_history = self.truncate_if_needed(new_description_pieces.get('failurehistory', ''))
        new_stack_trace     = self.truncate_if_needed(new_description_pieces.get('stacktrace', ''))
        new_other           = self.truncate_if_needed(new_description_pieces.get('other', ''))
        new_since_build     = self.truncate_if_needed(new_description_pieces.get('sincebuild', ''))

        logging.debug('  old_failing_test:   \n  %s' % str(old_failing_test.replace('\r', 'CR').replace('\n', 'LF\n')) )
        logging.debug('  old_failure_history:\n  %s' % str(old_failure_history.replace('\r', 'CR').replace('\n', 'LF\n')) )
        logging.debug('  old_stack_trace:    \n  %s' % str(old_stack_trace.replace('\r', 'CR').replace('\n', 'LF\n')) )
        logging.debug('  old_other:          \n  %s' % str(old_other.replace('\r', 'CR').replace('\n', 'LF\n')) )
        logging.debug('  old_since_build:    \n  %s' % str(old_since_build.replace('\r', 'CR').replace('\n', 'LF\n')) )
        logging.debug('  new_failing_test:   \n  %s' % str(new_failing_test.replace('\r', 'CR').replace('\n', 'LF\n')) )
        logging.debug('  new_failure_history:\n  %s' % str(new_failure_history.replace('\r', 'CR').replace('\n', 'LF\n')) )
        logging.debug('  new_stack_trace:    \n  %s' % str(new_stack_trace.replace('\r', 'CR').replace('\n', 'LF\n')) )
        logging.debug('  new_other:          \n  %s' % str(new_other.replace('\r', 'CR').replace('\n', 'LF\n')) )
        logging.debug('  new_since_build:    \n  %s' % str(new_since_build.replace('\r', 'CR').replace('\n', 'LF\n')) )

        # Combine the various pieces of the old and new descriptions, in the
        # 'proper' order (regardless of how they used to be); ignore carriage
        # returns, which Jira tends to add, when checking whether new
        # description pieces are already found in the (old) description
        new_description += old_failing_test
        info_messages = []
        if new_failing_test.strip().replace('\r', '') not in new_description.replace('\r', ''):
            new_description += new_failing_test
            info_messages.append('failing test')

        new_description += SEPARATOR_LINE + old_failure_history
        if new_failure_history.strip().replace('\r', '') not in new_description.replace('\r', ''):
            new_description += new_failure_history
            info_messages.append('failure history')

        new_description += old_stack_trace
        # Do not add new Stack Traces that are identical except for
        # the line numbers (or other digits)
        if (    sub('\d', 'x', new_stack_trace.replace('\r', '').strip()) not in
                sub('\d', 'x', new_description.replace('\r', '')) ):
            new_description += new_stack_trace
            info_messages.append('stack trace')

        if old_other or new_other:
            new_description += SEPARATOR_LINE + old_other
            if new_other.strip().replace('\r', '') not in new_description.replace('\r', ''):
                new_description += new_other
                info_messages.append('other description')

        new_description += SEPARATOR_LINE + old_since_build
        if new_since_build.strip().replace('\r', '') not in new_description.replace('\r', ''):
            new_description = self.combine_since_build_messages(new_description, new_since_build,
                                                                info_messages, jenkins_job_name,
                                                                build_type)

        # Make sure there are not too many new line (line feed) characters in a row
        for i in range(10):
            if '\n\n\n\n\n\n' in new_description.replace('\r', ''):
                new_description = new_description.replace('\r', '').replace('\n\n\n\n\n\n', '\n\n\n')
            else:
                break

        logging.debug('  new_description:\n  %s' % str(new_description))
        if info_messages and old_description:
            logging.info('Description of ticket %s modified, including: %s'
                         % (issue_key, '; '.join(info_messages)) )

        return self.truncate_if_needed(new_description, False, '\n[Truncated]',
                                       MAX_NUM_CHARS_PER_JIRA_DESCRIPTION)


    def get_modified_labels(self, old_labels, new_labels, description,
                            jenkins_job_name=None):
        """TODO
        """
        modified_labels = []
        modified_labels.extend(old_labels)

        for label in new_labels:
            if label not in modified_labels:
                modified_labels.append(label)
            # A Jira ticket should not normally be labeled as both Consistent
            # and Intermittent
            if (label == JIRA_LABEL_FOR_CONSISTENT_FAILURES
                    and  JIRA_LABEL_FOR_INTERMITTENT_FAILURES in modified_labels):
                modified_labels.remove(JIRA_LABEL_FOR_INTERMITTENT_FAILURES)
            elif (label == JIRA_LABEL_FOR_INTERMITTENT_FAILURES
                    and    JIRA_LABEL_FOR_CONSISTENT_FAILURES in modified_labels):
                modified_labels.remove(JIRA_LABEL_FOR_CONSISTENT_FAILURES)

        if jenkins_job_name:
            jenkins_job_label = JENKINS_JOBS.get(jenkins_job_name, {}).get('label')
            if jenkins_job_label and jenkins_job_label not in modified_labels:
                modified_labels.append(jenkins_job_label)

        logging.debug('In get_modified_labels:')
        logging.debug('  old_labels:\n  %s' % str(old_labels))
        logging.debug('  new_labels:\n  %s' % str(new_labels))
        logging.debug('  modified_labels:\n  %s' % str(modified_labels))

        return modified_labels


    def get_short_test_name(self, testName):
        """Given a testName, returns a (possibly) shorter test name, that omits
           any suffix starting with '_localCluster'. For example, the following
           test names:
               TestSqlUpdateSuite.testUpdate_localCluster_1_1_JNI
               TestSqlUpdateSuite.testUpdate_localCluster_2_3_JNI
               TestSqlUpdateSuite.testUpdate_localCluster_1_1_VALGRIND_IPC
           are actually failures of the same test, so just one Jira ticket should
           be filed, not three.
        """
        result = testName.translate(TT)
        localCluster_index = result.find('_localCluster')
        if localCluster_index > 0:
            result = result[:localCluster_index]
        return result


    def get_summary_keys(self, className, testName):
        """Given a className and a testName, returns a list containing the
           'keys' to be searched for in a Jira summary.  Normally, these keys
           consist simply of two items, the className and the (possibly
           shortened) testName; however, if either one contains certain
           characters that cause problems for Jira searches (e.g. '.' or '_'),
           then the keys will be split up to include certain substrings on
           either side of those characters.
        """
        summary_keys = []

        # Shorten the testName to omit any suffix beginning with '_localCluster'
        for name in [className, self.get_short_test_name(testName)]:
            if all(char not in name for char in JIRA_SEARCH_PROBLEMATIC_CHARACTERS):
                summary_keys.append(name)
                continue

            # Handle any underscore ('_') characters: use only the substrings
            # before the first and after the last underscore
            first_underscore_index = name.find('_')
            last_underscore_index  = name.rfind('_')
            if first_underscore_index < 0 or last_underscore_index < 0:
                pieces = [name]
            else:
                pieces = [name[:first_underscore_index], name[last_underscore_index+1:]]

            # Handle any dot ('.') characters: use each substring before and
            # after any dots
            for piece in pieces:
                indexes = [i for i, char in enumerate(piece) if char == '.']
                indexes.append(len(piece))
                previous_index = -1
                for index in indexes:
                    summary_keys.append(piece[previous_index+1:index])

        return summary_keys


    def file_jira_issue(self, issue, DRY_RUN=False, failing_consistently=False):
        global JENKINSBOT
        if not JENKINSBOT:
            JENKINSBOT = JenkinsBot()
        error_url  = issue['url']
        error_report = self.read_url(error_url + '/api/python')
        if error_report is None:
            return None

        fullTestName = issue['packageName']+'.'+issue['className']+'.'+issue['testName']
        summary_keys = [issue['className'], issue['testName']]
#         summary_keys = self.get_summary_keys(issue['className'], issue['testName'])
        channel      = issue['channel']
        labels       = issue['labels']
        priority     = issue['priority']
        build_number = issue['build']
        jenkins_job  = issue['job']
        jenkins_job_nickname = JENKINS_JOBS.get(jenkins_job, {}).get('nickname', jenkins_job)
        existing_ticket = issue['existing_ticket']

        logging.debug('In file_jira_issue:')
        logging.debug('  issue        : '+str(issue))
        logging.debug('  fullTestName : '