Merge "Reproduce GET a_c slowness bug/2126751"
This commit is contained in:
@@ -13,6 +13,7 @@
|
|||||||
import collections
|
import collections
|
||||||
import copy
|
import copy
|
||||||
import itertools
|
import itertools
|
||||||
|
import time
|
||||||
|
|
||||||
import os_traits
|
import os_traits
|
||||||
from oslo_log import log as logging
|
from oslo_log import log as logging
|
||||||
@@ -782,6 +783,9 @@ def _merge_candidates(candidates, rw_ctx):
|
|||||||
all_suffixes = set(candidates)
|
all_suffixes = set(candidates)
|
||||||
num_granular_groups = len(all_suffixes - set(['']))
|
num_granular_groups = len(all_suffixes - set(['']))
|
||||||
max_a_c = rw_ctx.config.placement.max_allocation_candidates
|
max_a_c = rw_ctx.config.placement.max_allocation_candidates
|
||||||
|
|
||||||
|
dropped = 0
|
||||||
|
start = time.monotonic()
|
||||||
for areq_list in _generate_areq_lists(
|
for areq_list in _generate_areq_lists(
|
||||||
rw_ctx, areq_lists_by_anchor, all_suffixes
|
rw_ctx, areq_lists_by_anchor, all_suffixes
|
||||||
):
|
):
|
||||||
@@ -816,8 +820,15 @@ def _merge_candidates(candidates, rw_ctx):
|
|||||||
# now exceeds capacity where amounts of the same RP+RC were
|
# now exceeds capacity where amounts of the same RP+RC were
|
||||||
# folded together. So do a final capacity check/filter.
|
# folded together. So do a final capacity check/filter.
|
||||||
if rw_ctx.exceeds_capacity(areq):
|
if rw_ctx.exceeds_capacity(areq):
|
||||||
|
dropped += 1
|
||||||
continue
|
continue
|
||||||
areqs.add(areq)
|
areqs.add(areq)
|
||||||
|
if len(areqs) == 1:
|
||||||
|
LOG.warn(
|
||||||
|
"Found the first valid candidate in %.2f secs and "
|
||||||
|
"dropped %d invalid ones", time.monotonic() - start, dropped)
|
||||||
|
start = time.monotonic()
|
||||||
|
dropped = 0
|
||||||
|
|
||||||
if max_a_c >= 0 and len(areqs) >= max_a_c:
|
if max_a_c >= 0 and len(areqs) >= max_a_c:
|
||||||
break
|
break
|
||||||
|
@@ -188,3 +188,109 @@ class TestWideTreeAllocationCandidateExplosion(base.TestCase):
|
|||||||
body2 = resp.text
|
body2 = resp.text
|
||||||
|
|
||||||
self.assertEqual(body1, body2)
|
self.assertEqual(body1, body2)
|
||||||
|
|
||||||
|
def test_many_non_viable_candidates_8_8(self):
|
||||||
|
# This simulates that we have a single resource per RP (in this case
|
||||||
|
# one VF, but it could be on PF resource it does not matter). We have
|
||||||
|
# many RPs and we request many groups of one resource. This creates a
|
||||||
|
# situation where even if the number of candidates are limited by
|
||||||
|
# max_allocation_candidates the algorithm generate a lot of invalid
|
||||||
|
# candidates that needs to be filtered out which takes excessive time.
|
||||||
|
#
|
||||||
|
# We have 8 RPs with 1 resource, and we request 8 groups with
|
||||||
|
# 1 resource.
|
||||||
|
# Placement will generate an initial candidate matrix by satisfying
|
||||||
|
# each group independently (G is request group, R is RP):
|
||||||
|
#
|
||||||
|
# G1: [R1, R2,..., R8]
|
||||||
|
# G2: [R1, R2,..., R8]
|
||||||
|
# ...
|
||||||
|
# G8: [R1, R2,..., R8]
|
||||||
|
#
|
||||||
|
# Then creates all the possible combinations and check if they are
|
||||||
|
# valid (C is candidate, G1-R1 means G1 group satisfied from R1 RP):
|
||||||
|
# C1: [G1-R1, G2-R1, ..., G8-R1] # invalid R1 has 1 res but C1 needs 8
|
||||||
|
# C2: [G1-R1, G2-R1, ..., G8-R2] # invalid R1 has 1 res but C2 needs 7
|
||||||
|
# ...
|
||||||
|
# Cx: [G1-R1, G2-R2, ..., G8-R8] # valid each Rx has 1 res and
|
||||||
|
# # Cx ask form 1 res each
|
||||||
|
#
|
||||||
|
# So placement generates an exessive amount of invalid (and therefore
|
||||||
|
# later filtered) candidates before it finds the first valid one.
|
||||||
|
# The max_allocation_candidates check only applies to valid candidates
|
||||||
|
# so it cannot prevent the excessive runtime of generating candidates
|
||||||
|
# that turns out to be invalid.
|
||||||
|
#
|
||||||
|
# With the extra logging we see that the first valid Cx is:
|
||||||
|
# WARNING [placement.objects.allocation_candidate] Found the first
|
||||||
|
# valid candidate in 1.73 secs and dropped 342391 invalid ones
|
||||||
|
#
|
||||||
|
# If you bump this from 1000 to 10k max candidates then you will see a
|
||||||
|
# very long runtime.
|
||||||
|
#
|
||||||
|
# This runs in 12 seconds.
|
||||||
|
self.conf_fixture.conf.set_override(
|
||||||
|
"max_allocation_candidates", 1000, group="placement")
|
||||||
|
self._test_num_candidates_and_computes(
|
||||||
|
computes=1, pfs=8, vfs_per_pf=1, req_groups=8, req_res_per_group=1,
|
||||||
|
req_limit=1000,
|
||||||
|
expected_candidates=1000, expected_computes_with_candidates=1)
|
||||||
|
|
||||||
|
# This is bug https://bugs.launchpad.net/placement/+bug/2126751 the below
|
||||||
|
# case should run in reasonable time
|
||||||
|
#
|
||||||
|
# def test_many_non_viable_candidates_21_8(self):
|
||||||
|
# # This is runs for more than 120 seconds
|
||||||
|
# self.conf_fixture.conf.set_override(
|
||||||
|
# "max_allocation_candidates", 1000, group="placement")
|
||||||
|
# self._test_num_candidates_and_computes(
|
||||||
|
# computes=1, pfs=21, vfs_per_pf=1, req_groups=8,
|
||||||
|
# req_res_per_group=1,
|
||||||
|
# req_limit=1000,
|
||||||
|
# expected_candidates=1000, expected_computes_with_candidates=1)
|
||||||
|
#
|
||||||
|
# def test_many_non_viable_candidates_21_16(self):
|
||||||
|
# # This is runs for more than 120 seconds
|
||||||
|
# self.conf_fixture.conf.set_override(
|
||||||
|
# "max_allocation_candidates", 1000, group="placement")
|
||||||
|
# self._test_num_candidates_and_computes(
|
||||||
|
# computes=1, pfs=21, vfs_per_pf=1, req_groups=16,
|
||||||
|
# req_res_per_group=1,
|
||||||
|
# req_limit=1000,
|
||||||
|
# expected_candidates=1000, expected_computes_with_candidates=1)
|
||||||
|
#
|
||||||
|
# def test_many_non_viable_candidates_21_21(self):
|
||||||
|
# # This is runs for more than 120 seconds
|
||||||
|
# self.conf_fixture.conf.set_override(
|
||||||
|
# "max_allocation_candidates", 1000, group="placement")
|
||||||
|
# self._test_num_candidates_and_computes(
|
||||||
|
# computes=1, pfs=21, vfs_per_pf=1, req_groups=21,
|
||||||
|
# req_res_per_group=1,
|
||||||
|
# req_limit=1000,
|
||||||
|
# expected_candidates=1000, expected_computes_with_candidates=1)
|
||||||
|
#
|
||||||
|
# def test_many_non_viable_candidates_21_8_two_computes(self):
|
||||||
|
# # This is runs for more than 120 seconds
|
||||||
|
# self.conf_fixture.conf.set_override(
|
||||||
|
# "max_allocation_candidates", 1000, group="placement")
|
||||||
|
# self.conf_fixture.conf.set_override(
|
||||||
|
# "allocation_candidates_generation_strategy", "breadth-first",
|
||||||
|
# group="placement")
|
||||||
|
# self._test_num_candidates_and_computes(
|
||||||
|
# computes=2, pfs=21, vfs_per_pf=1, req_groups=8,
|
||||||
|
# req_res_per_group=1,
|
||||||
|
# req_limit=1000,
|
||||||
|
# expected_candidates=1000, expected_computes_with_candidates=2)
|
||||||
|
#
|
||||||
|
# def test_many_non_viable_candidates_21_21_two_computes(self):
|
||||||
|
# # This is runs for more than 120 seconds
|
||||||
|
# self.conf_fixture.conf.set_override(
|
||||||
|
# "max_allocation_candidates", 1000, group="placement")
|
||||||
|
# self.conf_fixture.conf.set_override(
|
||||||
|
# "allocation_candidates_generation_strategy", "breadth-first",
|
||||||
|
# group="placement")
|
||||||
|
# self._test_num_candidates_and_computes(
|
||||||
|
# computes=2, pfs=21, vfs_per_pf=1, req_groups=21,
|
||||||
|
# req_res_per_group=1,
|
||||||
|
# req_limit=1000,
|
||||||
|
# expected_candidates=1000, expected_computes_with_candidates=2)
|
||||||
|
Reference in New Issue
Block a user