diff options
author | Magnus Granberg <zorry@gentoo.org> | 2022-07-13 21:49:23 +0200 |
---|---|---|
committer | Magnus Granberg <zorry@gentoo.org> | 2022-07-13 21:49:23 +0200 |
commit | 1250899a4c17be77c99048576090ae1b32e6d227 (patch) | |
tree | 50a5d71ef0ab9218e02979aeb04c24edfdcb5e34 | |
parent | Add support log docker worker (diff) | |
download | tinderbox-cluster-1250899a4c17be77c99048576090ae1b32e6d227.tar.gz tinderbox-cluster-1250899a4c17be77c99048576090ae1b32e6d227.tar.bz2 tinderbox-cluster-1250899a4c17be77c99048576090ae1b32e6d227.zip |
Use log docker for log parser
Signed-off-by: Magnus Granberg <zorry@gentoo.org>
-rw-r--r-- | bin/ci_log_parser | 20 | ||||
-rw-r--r-- | buildbot_gentoo_ci/logs/log_parser.py | 180 | ||||
-rw-r--r-- | buildbot_gentoo_ci/steps/logs.py | 88 | ||||
-rw-r--r-- | py/log_parser.py | 159 |
4 files changed, 212 insertions, 235 deletions
diff --git a/bin/ci_log_parser b/bin/ci_log_parser deleted file mode 100644 index 6401a49..0000000 --- a/bin/ci_log_parser +++ /dev/null @@ -1,20 +0,0 @@ -#!/usr/bin/python -# -# Copyright 2021 Gentoo Authors -# Distributed under the terms of the GNU General Public License v2 - -import argparse -import sys -from buildbot_gentoo_ci.logs.log_parser import runLogParser - -def main(): -# get filename, project_uuid default_project_uuid - parser = argparse.ArgumentParser() - parser.add_argument("-f", "--file", required=True) - parser.add_argument("-u", "--uuid", required=True) - args = parser.parse_args() - runLogParser(args) - sys.exit() - -if __name__ == "__main__": - main() diff --git a/buildbot_gentoo_ci/logs/log_parser.py b/buildbot_gentoo_ci/logs/log_parser.py deleted file mode 100644 index b890c12..0000000 --- a/buildbot_gentoo_ci/logs/log_parser.py +++ /dev/null @@ -1,180 +0,0 @@ -# Copyright 2021 Gentoo Authors -# Distributed under the terms of the GNU General Public License v2 - -import sys -from multiprocessing import Pool, cpu_count -import re -import io -import gzip -import json -import os -from sqlalchemy.ext.declarative import declarative_base -import sqlalchemy as sa - -Base = declarative_base() - -class ProjectsPattern(Base): - __tablename__ = "projects_pattern" - id = sa.Column(sa.Integer, primary_key=True) - project_uuid = sa.Column(sa.String(36), nullable=False) - search = sa.Column(sa.String(50), nullable=False) - start = sa.Column(sa.Integer, default=0) - end = sa.Column(sa.Integer, default=0) - status = sa.Column(sa.Enum('info', 'warning', 'ignore', 'error'), default='info') - type = sa.Column(sa.Enum('info', 'qa', 'compile', 'configure', 'install', 'postinst', 'prepare', 'pretend', 'setup', 'test', 'unpack', 'ignore', 'issues', 'misc', 'elog'), default='info') - search_type = sa.Column(sa.Enum('in', 'startswith', 'endswith', 'search'), default='in') - -def getDBSession(config): - #FIXME: Read the user/pass from file - engine = sa.create_engine(config['database']) - Session = sa.orm.sessionmaker(bind = engine) - return Session() - -def getMultiprocessingPool(config): - return Pool(processes = int(config['core'])) - -def addPatternToList(Session, pattern_list, uuid): - for project_pattern in Session.query(ProjectsPattern).filter_by(project_uuid=uuid).all(): - # check if the search pattern is vaild - project_pattern_search = project_pattern.search - try: - re.compile(project_pattern_search) - except re.error: - print("Non valid regex pattern") - print(project_pattern.search) - print(project_pattern.id) - else: - patten_dict = {} - patten_dict['id'] = project_pattern.id - patten_dict['project_uuid'] = project_pattern.project_uuid - patten_dict['search'] = project_pattern_search - patten_dict['start'] = project_pattern.start - patten_dict['end'] = project_pattern.end - patten_dict['status'] = project_pattern.status - patten_dict['type'] = project_pattern.type - patten_dict['search_type'] = project_pattern.search_type - pattern_list.append(patten_dict) - return pattern_list - -def get_log_search_pattern(Session, uuid, default_uuid): - # get pattern from the projects - # add that to log_search_pattern_list - log_search_pattern_list = [] - log_search_pattern_list = addPatternToList(Session, log_search_pattern_list, uuid) - log_search_pattern_list = addPatternToList(Session, log_search_pattern_list, default_uuid) - return log_search_pattern_list - -def search_buildlog(log_search_pattern_list, logfile_text_dict, tmp_index, max_text_lines): - # get text line to search - text_line = logfile_text_dict[tmp_index] - summery_dict = {} - # loop true the pattern list for match - for search_pattern in log_search_pattern_list: - search_hit = False - ignore_line = False - # check if should ignore the line - #FIXME take the ignore line pattern from db - if re.search('^>>> /', text_line): - ignore_line = True - #if else re.search('./\w+/'): - # pass - else: - # search for match - if search_pattern['search_type'] == 'in': - if search_pattern['search'] in text_line: - search_hit = True - if search_pattern['search_type'] == 'startswith': - if text_line.startswith(search_pattern['search']): - search_hit = True - if search_pattern['search_type'] == 'endswith': - if text_line.endswith(search_pattern['search']): - search_hit = True - if search_pattern['search_type'] == 'search': - if re.search(search_pattern['search'], text_line): - search_hit = True - # add the line if the pattern match - if search_hit: - summery_dict[tmp_index] = {} - summery_dict[tmp_index]['text'] = text_line - summery_dict[tmp_index]['type'] = search_pattern['type'] - summery_dict[tmp_index]['status'] = search_pattern['status'] - summery_dict[tmp_index]['id'] = search_pattern['id'] - summery_dict[tmp_index]['search_pattern'] = search_pattern['search'] - # add upper text lines if requested - # max 5 - if search_pattern['start'] != 0: - i = tmp_index - search_pattern['start'] - 1 - match = True - while match: - i = i + 1 - if i < (tmp_index - 9) or i == tmp_index: - match = False - else: - if not i in summery_dict: - summery_dict[i] = {} - summery_dict[i]['text'] = logfile_text_dict[i] - summery_dict[i]['type'] = 'info' - summery_dict[i]['status'] = 'info' - summery_dict[i]['id'] = 0 - summery_dict[i]['search_pattern'] = 'auto' - # add lower text lines if requested - # max 5 - if search_pattern['end'] != 0: - i = tmp_index - end = tmp_index + search_pattern['end'] - match = True - while match: - i = i + 1 - if i > max_text_lines or i > end: - match = False - else: - if not i in summery_dict: - summery_dict[i] = {} - summery_dict[i]['text'] = logfile_text_dict[i] - summery_dict[i]['type'] = 'info' - summery_dict[i]['status'] = 'info' - summery_dict[i]['id'] = 0 - summery_dict[i]['search_pattern'] = 'auto' - if not ignore_line or not search_hit: - # we add all line that start with ' * ' as info - # we add all line that start with '>>>' as info - if text_line.startswith(' * ') or text_line.startswith('>>>'): - if not tmp_index in summery_dict: - summery_dict[tmp_index] = {} - summery_dict[tmp_index]['text'] = text_line - summery_dict[tmp_index]['type'] = 'info' - summery_dict[tmp_index]['status'] = 'info' - summery_dict[tmp_index]['id'] = 0 - summery_dict[tmp_index]['search_pattern'] = 'auto' - if summery_dict == {}: - return False - return summery_dict - -def getConfigSettings(): - configpath = os.getcwd().split('workers/')[0] - with open(configpath + 'logparser.json') as f: - config = json.load(f) - return config - -def runLogParser(args): - index = 1 - max_text_lines = 0 - logfile_text_dict = {} - config = getConfigSettings() - Session = getDBSession(config) - mp_pool = getMultiprocessingPool(config) - #NOTE: The patten is from https://github.com/toralf/tinderbox/tree/master/data files. - # Is stored in a db instead of files. - log_search_pattern_list = get_log_search_pattern(Session, args.uuid, config['default_uuid']) - Session.close() - for text_line in io.TextIOWrapper(io.BufferedReader(gzip.open(args.file)), encoding='utf8', errors='ignore'): - logfile_text_dict[index] = text_line.strip('\n') - index = index + 1 - max_text_lines = index - # run the parse patten on the line - for tmp_index, text in logfile_text_dict.items(): - res = mp_pool.apply_async(search_buildlog, (log_search_pattern_list, logfile_text_dict, tmp_index, max_text_lines,)) - if res.get(): - print(json.dumps(res.get())) - mp_pool.close() - mp_pool.join() diff --git a/buildbot_gentoo_ci/steps/logs.py b/buildbot_gentoo_ci/steps/logs.py index 6bd3279..e139cf9 100644 --- a/buildbot_gentoo_ci/steps/logs.py +++ b/buildbot_gentoo_ci/steps/logs.py @@ -86,6 +86,12 @@ class SetupPropertys(BuildStep): self.setProperty("default_project_data", default_project_data, 'default_project_data') self.setProperty("version_data", version_data, 'version_data') self.setProperty("status", 'completed', 'status') + if self.getProperty('faild_cpv'): + log_cpv = self.getProperty('faild_cpv') + else: + log_cpv = self.getProperty('cpv') + self.setProperty("log_cpv", log_cpv, 'log_cpv') + self.descriptionDone = 'Runing log checker on ' + log_cpv return SUCCESS class SetupParserBuildLoger(BuildStep): @@ -102,24 +108,42 @@ class SetupParserBuildLoger(BuildStep): @defer.inlineCallbacks def run(self): + self.aftersteps_list = [] workdir = yield os.path.join(self.master.basedir, 'workers', self.getProperty('build_workername'), str(self.getProperty("project_build_data")['buildbot_build_id'])) - if self.getProperty('faild_cpv'): - log_cpv = self.getProperty('log_build_data')[self.getProperty('faild_cpv')] - else: - log_cpv = self.getProperty('log_build_data')[self.getProperty('cpv')] + log_cpv = self.getProperty('log_build_data')[self.getProperty('log_cpv')] + mastersrc_log = yield os.path.join(workdir, log_cpv['full_logname']) + log_py = 'log_parser.py' + config_log_py = 'logparser.json' + mastersrc_py = yield os.path.join(self.master.basedir, log_py) + mastersrc_config = yield os.path.join(self.master.basedir, config_log_py) + # Upload logfile to worker + self.aftersteps_list.append(steps.FileDownload( + mastersrc=mastersrc_log, + workerdest=log_cpv['full_logname'] + )) + # Upload log parser py code + self.aftersteps_list.append(steps.FileDownload( + mastersrc=mastersrc_py, + workerdest=log_py + )) + # Upload log parser py config + self.aftersteps_list.append(steps.FileDownload( + mastersrc=mastersrc_config, + workerdest=config_log_py + )) + # Run the log parser code command = [] - command.append('ci_log_parser') + command.append('python3') + command.append(log_py) command.append('-f') command.append(log_cpv['full_logname']) command.append('-u') command.append(self.getProperty('project_data')['uuid']) - self.aftersteps_list = [] - self.aftersteps_list.append(master_steps.MasterSetPropertyFromCommand( + self.aftersteps_list.append(steps.SetPropertyFromCommand( name = 'RunBuildLogParser', haltOnFailure = True, flunkOnFailure = True, command=command, - workdir=workdir, strip=False, extract_fn=PersOutputOfLogParser )) @@ -296,19 +320,15 @@ class MakeIssue(BuildStep): separator1 = '\n' separator2 = ' ' log = yield self.addLog('issue') - if self.getProperty('faild_cpv'): - cpv = self.getProperty('faild_cpv') - else: - cpv = self.getProperty('cpv') - self.error_dict['cpv'] = cpv + self.error_dict['cpv'] = self.getProperty('log_cpv') yield log.addStdout('Title:' + '\n') - yield log.addStdout(separator2.join([cpv, '-', self.error_dict['title']]) + separator1) + yield log.addStdout(separator2.join([self.getProperty('log_cpv'), '-', self.error_dict['title']]) + separator1) yield log.addStdout('Summary:' + '\n') for line in self.summary_log_list: yield log.addStdout(line + '\n') yield log.addStdout('Attachments:' + '\n') yield log.addStdout('emerge_info.log' + '\n') - log_cpv = self.getProperty('log_build_data')[cpv] + log_cpv = self.getProperty('log_build_data')[self.getProperty('log_cpv')] yield log.addStdout(log_cpv['full_logname'] + '\n') yield log.addStdout('world.log' + '\n') @@ -389,9 +409,9 @@ class setBuildbotLog(BuildStep): yield log.addStdout(line + '\n') return SUCCESS -class SetupParserEmergeInfoLog(BuildStep): +class ReadEmergeInfoLog(BuildStep): - name = 'SetupParserEmergeInfoLog' + name = 'ReadEmergeInfoLog' description = 'Running' descriptionDone = 'Ran' descriptionSuffix = None @@ -404,21 +424,22 @@ class SetupParserEmergeInfoLog(BuildStep): @defer.inlineCallbacks def run(self): + emerge_info_output = {} + emerge_info_list = [] + emerge_package_info = [] + # Read the file and add it to a property workdir = yield os.path.join(self.master.basedir, 'workers', self.getProperty('build_workername'), str(self.getProperty("project_build_data")['buildbot_build_id'])) - command = [] - command.append('cat') - command.append('emerge_info.txt') - self.aftersteps_list = [] - self.aftersteps_list.append(master_steps.MasterSetPropertyFromCommand( - name = 'RunEmergeInfoLogParser', - haltOnFailure = True, - flunkOnFailure = True, - command=command, - workdir=workdir, - strip=False, - extract_fn=PersOutputOfEmergeInfo - )) - yield self.build.addStepsAfterCurrentStep(self.aftersteps_list) + with open(os.path.join(workdir, 'emerge_info.txt'), encoding='utf-8') as source: + emerge_info = source.read() + # set emerge_info_output Property + for line in emerge_info.split('\n'): + if line.startswith('['): + emerge_package_info.append(line) + else: + emerge_info_list.append(line) + emerge_info_output['emerge_info'] = emerge_info_list + emerge_info_output['emerge_package_info'] = emerge_package_info + self.setProperty("emerge_info_output", emerge_info_output, 'emerge_info_output') return SUCCESS class setEmergeInfoLog(BuildStep): @@ -483,10 +504,7 @@ class Upload(BuildStep): @defer.inlineCallbacks def run(self): - if self.getProperty('faild_cpv'): - log_cpv = self.getProperty('log_build_data')[self.getProperty('faild_cpv')] - else: - log_cpv = self.getProperty('log_build_data')[self.getProperty('cpv')] + log_cpv = self.getProperty('log_build_data')[self.getProperty('log_cpv')] bucket = self.getProperty('project_data')['uuid'] + '-' + 'logs' file_path = yield os.path.join(self.master.basedir, 'workers', self.getProperty('build_workername'), str(self.getProperty("project_build_data")['buildbot_build_id']) ,log_cpv['full_logname']) aftersteps_list = [] diff --git a/py/log_parser.py b/py/log_parser.py new file mode 100644 index 0000000..dd48295 --- /dev/null +++ b/py/log_parser.py @@ -0,0 +1,159 @@ +# Copyright 2022 Gentoo Authors +# Distributed under the terms of the GNU General Public License v2 + +import sys +from multiprocessing import Pool, cpu_count +import re +import io +import gzip +import json +import os +from sqlalchemy.ext.declarative import declarative_base +import sqlalchemy as sa +import argparse + +Base = declarative_base() + +class ProjectsPattern(Base): + __tablename__ = "projects_pattern" + id = sa.Column(sa.Integer, primary_key=True) + project_uuid = sa.Column(sa.String(36), nullable=False) + search = sa.Column(sa.String(50), nullable=False) + start = sa.Column(sa.Integer, default=0) + end = sa.Column(sa.Integer, default=0) + status = sa.Column(sa.Enum('info', 'warning', 'ignore', 'error'), default='info') + type = sa.Column(sa.Enum('info', 'qa', 'compile', 'configure', 'install', 'postinst', 'prepare', 'pretend', 'setup', 'test', 'unpack', 'ignore', 'issues', 'misc', 'elog'), default='info') + search_type = sa.Column(sa.Enum('in', 'startswith', 'endswith', 'search'), default='in') + +def get_pattern_dict(project_pattern): + patten_dict = {} + patten_dict['id'] = project_pattern.id + patten_dict['project_uuid'] = project_pattern.project_uuid + patten_dict['search'] = project_pattern.search + patten_dict['status'] = project_pattern.status + patten_dict['type'] = project_pattern.type + return patten_dict + +def addPatternToList(Session, log_search_pattern, uuid): + for project_pattern in Session.query(ProjectsPattern).filter_by(project_uuid=uuid).all(): + # check if the search pattern is vaild + project_pattern_search = project_pattern.search + try: + re.compile(project_pattern_search) + except re.error: + print("Non valid regex pattern") + print(project_pattern.search) + print(project_pattern.id) + else: + if project_pattern.type == 'ignore': + log_search_pattern['ignore'].append(get_pattern_dict(project_pattern)) + if project_pattern.type == 'test': + log_search_pattern['test'].append(get_pattern_dict(project_pattern)) + else: + log_search_pattern['default'].append(get_pattern_dict(project_pattern)) + return log_search_pattern + +def get_log_search_pattern(Session, uuid, default_uuid): + # get pattern from the projects and add that to log_search_pattern + log_search_pattern = {} + log_search_pattern['ignore'] = [] + log_search_pattern['default'] = [] + log_search_pattern['test'] = [] + log_search_pattern = addPatternToList(Session, log_search_pattern, uuid) + log_search_pattern = addPatternToList(Session, log_search_pattern, default_uuid) + return log_search_pattern + +def get_search_pattern_match(log_search_pattern, text_line): + for search_pattern in log_search_pattern: + if re.search(search_pattern['search'], text_line): + return search_pattern + return False + +def search_buildlog(log_search_pattern, text_line, index): + summary = {} + #FIXME: add check for test + # don't log ignore lines + if get_search_pattern_match(log_search_pattern['ignore'], text_line): + return False + # search default pattern + search_pattern_match = get_search_pattern_match(log_search_pattern['default'], text_line) + if search_pattern_match: + summary[index] = dict( + text = text_line, + type = search_pattern_match['type'], + status = search_pattern_match['status'], + id = search_pattern_match['id'], + search_pattern = search_pattern_match['search'] + ) + return summary + # we add all line that start with ' * ' or '>>>' as info + if text_line.startswith(' * ') or text_line.startswith('>>>'): + summary[index] = dict( + text = text_line, + type = 'info', + status = 'info', + id = 0, + search_pattern = 'auto' + ) + return summary + return False + +def getConfigSettings(): + #configpath = os.getcwd() + with open('logparser.json') as f: + config = json.load(f) + return config + +def getDBSession(config): + engine = sa.create_engine(config['database']) + Session = sa.orm.sessionmaker(bind = engine) + return Session() + +def getMultiprocessingPool(config): + return Pool(processes = int(config['core'])) + +def getJsonResult(results): + for r in results: + try: + value = r.get() + except Exception as e: + print(f'Failed with: {e}') + else: + if value: + print(json.dumps(value), flush=True) + +def runLogParser(args): + index = 1 + logfile_text_dict = {} + config = getConfigSettings() + Session = getDBSession(config) + #mp_pool = getMultiprocessingPool(config) + summary = {} + #NOTE: The patten is from https://github.com/toralf/tinderbox/tree/master/data files. + # Is stored in a db instead of files. + log_search_pattern = get_log_search_pattern(Session, args.uuid, config['default_uuid']) + Session.close() + # read the log file to dict + for text_line in io.TextIOWrapper(io.BufferedReader(gzip.open(args.file)), encoding='utf8', errors='ignore'): + logfile_text_dict[index] = text_line.strip('\n') + index = index + 1 + # run the search parse pattern on the text lines + #params = [(log_search_pattern, text, line_index,) for line_index, text in logfile_text_dict.items()] + with getMultiprocessingPool(config) as pool: + results = list(pool.apply_async(search_buildlog, args=(log_search_pattern, text, line_index,)) for line_index, text in logfile_text_dict.items()) + #results = pool.starmap(search_buildlog, params) + getJsonResult(results) + pool.close() + pool.join() + +def main(): +# get filename, project_uuid default_project_uuid + parser = argparse.ArgumentParser() + parser.add_argument("-f", "--file", required=True) + parser.add_argument("-u", "--uuid", required=True) + args = parser.parse_args() + runLogParser(args) + sys.exit() + +if __name__ == "__main__": + main() |