sharder: make gap and overlap warning logs shorter

Previously, when the audit process detected gaps and/or overlaps in a
DB's shard ranges, it would log a warning that included a list of all
impacted shard ranges. The log message can grow long when there are
gaps or overlaps involving many shard ranges: so long that syslog
might raise an OSError (Message too long).

This patch shortens these log warning messages to only include a count
of the number of gaps and/or overlaps. The count may still be useful
to observe how a problem has developed over time. The detailed
information is better accessed using the swift-manage-shard-ranges
repair command.

Change-Id: I055c40395807708de60882f53652d9533a495d09
Signed-off-by: Alistair Coles <alistairncoles@gmail.com>
This commit is contained in:
Alistair Coles
2025-07-23 14:56:17 +01:00
parent e75e93f11c
commit fd342b9190
2 changed files with 13 additions and 16 deletions

View File

@@ -1296,9 +1296,8 @@ class ContainerSharder(ContainerSharderConf, ContainerReplicator):
paths_with_gaps = find_paths_with_gaps(shard_ranges)
if paths_with_gaps:
warnings.append(
'missing range(s): %s' %
' '.join(['%s-%s' % (gap.lower, gap.upper)
for (_, gap, _) in paths_with_gaps]))
'missing range(s): %s (use swift-manage-shard-ranges '
'repair)' % len(paths_with_gaps))
for state in ShardRange.STATES:
if state == ShardRange.SHRINKING:
@@ -1322,13 +1321,9 @@ class ContainerSharder(ContainerSharderConf, ContainerReplicator):
self._increment_stat('audit_root', 'has_overlap')
self._update_stat('audit_root', 'num_overlap',
step=len(overlaps))
all_overlaps = ', '.join(
[' '.join(['%s-%s' % (sr.lower, sr.upper)
for sr in overlapping_ranges])
for overlapping_ranges in sorted(list(overlaps))])
warnings.append(
'overlapping ranges in state %r: %s' %
(ShardRange.STATES[state], all_overlaps))
'overlapping ranges: %s (use swift-manage-shard-ranges '
'repair)' % len(overlaps))
# We've seen a case in production where the roots own_shard_range
# epoch is reset to None, and state set to ACTIVE (like re-defaulted)

View File

@@ -6110,13 +6110,13 @@ class TestSharder(BaseTestSharder):
self.assertFalse(sharder.logger.get_lines_for_level('error'))
mocked.assert_not_called()
def assert_overlap_warning(line, state_text):
def assert_overlap_warning(line):
self.assertIn('Audit failed for root', line)
self.assertIn(broker.db_file, line)
self.assertIn(broker.path, line)
self.assertIn(
'overlapping ranges in state %r: k-t s-y, y-z y-z'
% state_text, line)
'overlapping ranges: 2 (use swift-manage-shard-ranges repair)',
line)
# check for no duplicates in reversed order
self.assertNotIn('s-z k-t', line)
@@ -6124,7 +6124,7 @@ class TestSharder(BaseTestSharder):
'has_overlap': 1, 'num_overlap': 2}
shard_bounds = (('a', 'j'), ('k', 't'), ('s', 'y'),
('y', 'z'), ('y', 'z'))
for state, state_text in ShardRange.STATES.items():
for state in ShardRange.STATES:
if state in (ShardRange.SHRINKING,
ShardRange.SHARDED,
ShardRange.SHRUNK):
@@ -6137,7 +6137,7 @@ class TestSharder(BaseTestSharder):
sharder, '_audit_shard_container') as mocked:
sharder._audit_container(broker)
lines = sharder.logger.get_lines_for_level('warning')
assert_overlap_warning(lines[0], state_text)
assert_overlap_warning(lines[0])
self.assertFalse(lines[1:])
self.assertFalse(sharder.logger.get_lines_for_level('error'))
self._assert_stats(expected_stats, sharder, 'audit_root')
@@ -6183,7 +6183,9 @@ class TestSharder(BaseTestSharder):
def assert_missing_warning(line):
self.assertIn('Audit failed for root', line)
self.assertIn('missing range(s): -a j-k z-', line)
self.assertIn(
'missing range(s): 3 (use swift-manage-shard-ranges repair)',
line)
self.assertIn('path: %s, db: %s' % (broker.path, broker.db_file),
line)
@@ -6200,7 +6202,7 @@ class TestSharder(BaseTestSharder):
sharder._audit_container(broker)
lines = sharder.logger.get_lines_for_level('warning')
assert_missing_warning(lines[0])
assert_overlap_warning(lines[0], 'active')
assert_overlap_warning(lines[0])
self.assertFalse(lines[1:])
self.assertFalse(sharder.logger.get_lines_for_level('error'))
self._assert_stats(expected_stats, sharder, 'audit_root')