Skip to content

Commit

Permalink
master: Add an option to janitor to delete logs for specific builders.
Browse files Browse the repository at this point in the history
  • Loading branch information
rufinio committed Feb 18, 2022
1 parent b6f5a50 commit fa26fff
Show file tree
Hide file tree
Showing 11 changed files with 358 additions and 56 deletions.
1 change: 1 addition & 0 deletions common/code_spelling_ignore_words.txt
Original file line number Diff line number Diff line change
Expand Up @@ -1467,6 +1467,7 @@ thursday
tid
timedelta
timestamp
timestamps
timezone
timezones
tld
Expand Down
47 changes: 38 additions & 9 deletions master/buildbot/configurators/janitor.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@

from twisted.internet import defer

from buildbot import config as bbconfig
from buildbot.config import BuilderConfig
from buildbot.configurators import ConfiguratorBase
from buildbot.process.buildstep import BuildStep
Expand All @@ -39,43 +40,67 @@ class LogChunksJanitor(BuildStep):
name = 'LogChunksJanitor'
renderables = ["logHorizon"]

def __init__(self, logHorizon):
def __init__(self, logHorizon=None, horizonPerBuilder=None):
super().__init__()
self.logHorizon = logHorizon
self.horizonPerBuilder = horizonPerBuilder

@defer.inlineCallbacks
def run(self):
older_than_timestamp = datetime2epoch(now() - self.logHorizon)
deleted = yield self.master.db.logs.deleteOldLogChunks(older_than_timestamp)
self.descriptionDone = ["deleted", str(deleted), "logchunks"]
if self.logHorizon is not None:
older_than_timestamp = datetime2epoch(
now() - self.logHorizon)
deleted = yield self.master.db.logs.deleteOldLogChunks(
older_than_timestamp=older_than_timestamp)
self.descriptionDone = ["deleted", str(deleted), "logchunks"]

if self.horizonPerBuilder is not None:
deleted = yield self.master.db.logs.deleteOldLogChunks(
horizonPerBuilder=self.horizonPerBuilder)
self.descriptionDone = ["deleted", str(deleted), "logchunks"]

return SUCCESS


class BuildDataJanitor(BuildStep):
name = 'BuildDataJanitor'
renderables = ["build_data_horizon"]

def __init__(self, build_data_horizon):
def __init__(self, build_data_horizon=None, horizonPerBuilder=None):
super().__init__()
self.build_data_horizon = build_data_horizon
self.horizonPerBuilder = horizonPerBuilder

@defer.inlineCallbacks
def run(self):
older_than_timestamp = datetime2epoch(now() - self.build_data_horizon)
deleted = yield self.master.db.build_data.deleteOldBuildData(older_than_timestamp)
self.descriptionDone = ["deleted", str(deleted), "build data key-value pairs"]
if self.build_data_horizon is not None:
older_than_timestamp = datetime2epoch(now() - self.build_data_horizon)
deleted = yield self.master.db.build_data.deleteOldBuildData(
older_than_timestamp=older_than_timestamp)
self.descriptionDone = ["deleted", str(deleted), "build data key-value pairs"]

if self.horizonPerBuilder is not None:
deleted = yield self.master.db.build_data.deleteOldBuildData(
horizonPerBuilder=self.horizonPerBuilder)
self.descriptionDone = ["deleted", str(deleted), "build data key-value pairs"]
return SUCCESS


class JanitorConfigurator(ConfiguratorBase):
""" Janitor is a configurator which create a Janitor Builder with all needed Janitor steps"""

def __init__(self, logHorizon=None, hour=0, build_data_horizon=None, **kwargs):
def __init__(self, logHorizon=None, hour=0, build_data_horizon=None, horizonPerBuilder=None,
**kwargs):
super().__init__()
self.logHorizon = logHorizon
self.build_data_horizon = build_data_horizon
self.horizonPerBuilder = horizonPerBuilder
self.hour = hour
self.kwargs = kwargs
if ((self.logHorizon is not None or self.build_data_horizon is not None)
and self.horizonPerBuilder is not None):
bbconfig.error("JanitorConfigurator: horizonPerBuilder only " +
"possible without logHorizon and build_data_horizon set.")

def configure(self, config_dict):
steps = []
Expand All @@ -84,6 +109,10 @@ def configure(self, config_dict):
if self.build_data_horizon is not None:
steps.append(BuildDataJanitor(build_data_horizon=self.build_data_horizon))

if self.horizonPerBuilder is not None:
steps.append(LogChunksJanitor(horizonPerBuilder=self.horizonPerBuilder))
steps.append(BuildDataJanitor(horizonPerBuilder=self.horizonPerBuilder))

if not steps:
return

Expand Down
63 changes: 47 additions & 16 deletions master/buildbot/db/build_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,12 +13,15 @@
#
# Copyright Buildbot Team Members

from datetime import datetime

import sqlalchemy as sa

from twisted.internet import defer

from buildbot.db import NULL
from buildbot.db import base
from buildbot.util import datetime2epoch


class BuildDataDict(dict):
Expand Down Expand Up @@ -123,9 +126,10 @@ def thd(conn):
return res

@defer.inlineCallbacks
def deleteOldBuildData(self, older_than_timestamp):
def deleteOldBuildData(self, older_than_timestamp=None, horizonPerBuilder=None):
build_data = self.db.model.build_data
builds = self.db.model.builds
builders = self.db.model.builders

def count_build_datum(conn):
res = conn.execute(sa.select([sa.func.count(build_data.c.id)]))
Expand All @@ -136,22 +140,49 @@ def count_build_datum(conn):
def thd(conn):
count_before = count_build_datum(conn)

if self.db._engine.dialect.name == 'sqlite':
# sqlite does not support delete with a join, so for this case we use a subquery,
# which is much slower

q = sa.select([builds.c.id])
q = q.where((builds.c.complete_at >= older_than_timestamp) |
(builds.c.complete_at == NULL))

q = build_data.delete().where(build_data.c.buildid.notin_(q))
if horizonPerBuilder is not None:
for builderName in horizonPerBuilder:
older_than_timestamp_ = datetime2epoch(datetime.now() -
horizonPerBuilder[builderName]["buildDataHorizon"])
if self.db._engine.dialect.name == 'sqlite':
# sqlite does not support delete with a join,
# so for this case we use a subquery,
# which is much slower

q = sa.select([builds.c.id])
q = q.where(sa.and_((builds.c.complete_at < older_than_timestamp_),
builds.c.builderid == builders.c.id,
builders.c.name.like(builderName)))

q = build_data.delete().where(build_data.c.buildid.in_(q))

else:
q = build_data.delete()
q = q.where(sa.and_(builds.c.id == build_data.c.buildid,
builds.c.builderid == builders.c.id,
builds.c.complete_at < older_than_timestamp_,
builders.c.name.like(builderName)))

res = conn.execute(q)
res.close()
else:
q = build_data.delete()
q = q.where(builds.c.id == build_data.c.buildid)
q = q.where((builds.c.complete_at >= older_than_timestamp) |
(builds.c.complete_at == NULL))
res = conn.execute(q)
res.close()
if self.db._engine.dialect.name == 'sqlite':
# sqlite does not support delete with a join,
# so for this case we use a subquery,
# which is much slower

q = sa.select([builds.c.id])
q = q.where((builds.c.complete_at >= older_than_timestamp) |
(builds.c.complete_at == NULL))

q = build_data.delete().where(build_data.c.buildid.notin_(q))
else:
q = build_data.delete()
q = q.where(builds.c.id == build_data.c.buildid)
q = q.where((builds.c.complete_at >= older_than_timestamp) |
(builds.c.complete_at == NULL))
res = conn.execute(q)
res.close()

count_after = count_build_datum(conn)
return count_before - count_after
Expand Down
78 changes: 56 additions & 22 deletions master/buildbot/db/logs.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,13 +15,15 @@

import bz2
import zlib
from datetime import datetime

import sqlalchemy as sa

from twisted.internet import defer
from twisted.python import log

from buildbot.db import base
from buildbot.util import datetime2epoch

try:
# lz4 > 0.9.0
Expand Down Expand Up @@ -345,16 +347,17 @@ def thdcompressLog(conn):
return saved

# returns a Deferred that returns a value
def deleteOldLogChunks(self, older_than_timestamp):
def thddeleteOldLogs(conn):
model = self.db.model
def deleteOldLogChunks(self, older_than_timestamp=None, horizonPerBuilder=None):
model = self.db.model

def countLogchunks(conn):
res = conn.execute(sa.select([sa.func.count(model.logchunks.c.logid)]))
count1 = res.fetchone()[0]
count = res.fetchone()[0]
res.close()
return count

# update log types older than timestamps
# we do it first to avoid having UI discrepancy

# find the steps.id at the upper bound of steps
def getStepidMax(conn, older_than_timestamp):
# N.B.: we utilize the fact that steps.id is auto-increment, thus steps.started_at
# times are effectively sorted and we only need to find the steps.id at the upper
# bound of steps to update.
Expand All @@ -372,18 +375,10 @@ def thddeleteOldLogs(conn):
if res_list:
stepid_max = res_list[0]
res.close()
return stepid_max

# UPDATE logs SET logs.type = 'd' WHERE logs.stepid <= stepid_max AND type != 'd';
if stepid_max:
res = conn.execute(
model.logs.update()
.where(sa.and_(model.logs.c.stepid <= stepid_max,
model.logs.c.type != 'd'))
.values(type='d')
)
res.close()

# query all logs with type 'd' and delete their chunks.
# query all logs with type 'd' and delete their chunks.
def deleteLogsWithTypeD(conn):
if self.db._engine.dialect.name == 'sqlite':
# sqlite does not support delete with a join, so for this case we use a subquery,
# which is much slower
Expand All @@ -400,10 +395,49 @@ def thddeleteOldLogs(conn):

res = conn.execute(q)
res.close()
res = conn.execute(sa.select([sa.func.count(model.logchunks.c.logid)]))
count2 = res.fetchone()[0]
res.close()
return count1 - count2

def thddeleteOldLogs(conn):
count_before = countLogchunks(conn)

# update log types older than timestamps
# we do it first to avoid having UI discrepancy

if horizonPerBuilder:
for builderName in horizonPerBuilder:
older_than_timestamp_ = datetime2epoch(datetime.now() -
horizonPerBuilder[builderName]["logHorizon"])
stepid_max = getStepidMax(conn, older_than_timestamp_)
if stepid_max:
subquery = sa.select([model.steps.c.id]).where(
sa.and_(model.steps.c.buildid == model.builds.c.id,
model.builds.c.builderid == model.builders.c.id,
model.builders.c.name.like(builderName)))
res = conn.execute(
model.logs.update()
.where(sa.and_(model.logs.c.stepid.in_(subquery),
model.logs.c.stepid <= stepid_max,
model.logs.c.type != 'd'))
.values(type='d')
)
res.close()
else:
stepid_max = getStepidMax(conn, older_than_timestamp)
if stepid_max:
# UPDATE logs SET logs.type = 'd'
# WHERE logs.stepid <= stepid_max AND type != 'd';
res = conn.execute(
model.logs.update()
.where(sa.and_(model.logs.c.stepid <= stepid_max,
model.logs.c.type != 'd'))
.values(type='d')
)
res.close()

deleteLogsWithTypeD(conn)

count_after = countLogchunks(conn)

return count_before - count_after
return self.db.pool.do(thddeleteOldLogs)

def _logdictFromRow(self, row):
Expand Down
2 changes: 1 addition & 1 deletion master/buildbot/test/fakedb/build_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@ def getAllBuildDataNoValues(self, buildid):
return defer.succeed(ret)

# returns a Deferred
def deleteOldBuildData(self, older_than_timestamp):
def deleteOldBuildData(self, older_than_timestamp=None, horizonPerBuilder=None):
buildids_to_keep = []
for build_dict in self.db.builds.builds.values():
if build_dict['complete_at'] is None or \
Expand Down
2 changes: 1 addition & 1 deletion master/buildbot/test/fakedb/logs.py
Original file line number Diff line number Diff line change
Expand Up @@ -138,7 +138,7 @@ def finishLog(self, logid):
def compressLog(self, logid, force=False):
return defer.succeed(None)

def deleteOldLogChunks(self, older_than_timestamp):
def deleteOldLogChunks(self, older_than_timestamp=None, horizonPerBuilder=None):
# not implemented
self._deleted = older_than_timestamp
return defer.succeed(1)

0 comments on commit fa26fff

Please sign in to comment.