Merge "NetApp - Fixed Device busy error when multiple cinder volume clone created in parallel"
This commit is contained in:
@@ -379,6 +379,58 @@ class NetAppBlockStorageCmodeLibraryTestCase(test.TestCase):
|
||||
qos_policy_group_is_adaptive=False,
|
||||
source_snapshot=None, is_snapshot=True)
|
||||
|
||||
def test_clone_lun_busy_exception(self):
|
||||
"""Test for when clone lun is throwing device busy error."""
|
||||
self.library._get_lun_attr = mock.Mock(
|
||||
return_value={'Volume': 'fakeLUN'})
|
||||
self.library.zapi_client = mock.Mock()
|
||||
lun = fake.FAKE_LUN_GET_ITER_RESULT
|
||||
self.library.zapi_client.get_lun_by_args.return_value = lun
|
||||
self.library._add_lun_to_table = mock.Mock()
|
||||
msg = 'Device busy'
|
||||
self.mock_object(self.library.zapi_client,
|
||||
'clone_lun',
|
||||
mock.Mock(side_effect=netapp_api.NaApiError(
|
||||
message=msg)))
|
||||
self.mock_object(self.library,
|
||||
'_retry_clone_lun',
|
||||
mock.Mock(return_value=None)
|
||||
)
|
||||
self.library._clone_lun('fakeLUN', 'newFakeLUN', is_snapshot=True)
|
||||
|
||||
self.library.zapi_client.clone_lun.assert_called_once_with(
|
||||
'fakeLUN', 'fakeLUN', 'newFakeLUN', 'true', block_count=0,
|
||||
dest_block=0, src_block=0, qos_policy_group_name=None,
|
||||
qos_policy_group_is_adaptive=False,
|
||||
source_snapshot=None, is_snapshot=True)
|
||||
|
||||
def test__retry_clone_lun_success(self):
|
||||
self.library.zapi_client = mock.Mock()
|
||||
self.library._retry_clone_lun('fakeSourceLUN',
|
||||
'fakeLUN',
|
||||
'newFakeLUN',
|
||||
'false',
|
||||
)
|
||||
self.library.zapi_client.clone_lun.assert_called_once_with(
|
||||
'fakeSourceLUN', 'fakeLUN', 'newFakeLUN', 'false', block_count=0,
|
||||
dest_block=0, src_block=0, qos_policy_group_name=None,
|
||||
qos_policy_group_is_adaptive=False, source_snapshot=None,
|
||||
is_snapshot=False)
|
||||
|
||||
def test_retry_clone_lun_failure(self):
|
||||
self.library.zapi_client = mock.Mock()
|
||||
self.mock_object(self.library.zapi_client,
|
||||
'clone_lun',
|
||||
mock.Mock(
|
||||
side_effect=na_utils.NetAppDriverException),
|
||||
)
|
||||
self.assertRaises(
|
||||
na_utils.NetAppDriverException,
|
||||
self.library._retry_clone_lun,
|
||||
'fakeLUN', 'fakeLUN',
|
||||
'newFakeLUN', 'false',
|
||||
)
|
||||
|
||||
def test_get_fc_target_wwpns(self):
|
||||
ports = [fake.FC_FORMATTED_TARGET_WWPNS[0],
|
||||
fake.FC_FORMATTED_TARGET_WWPNS[1]]
|
||||
|
@@ -23,6 +23,7 @@
|
||||
"""
|
||||
Volume driver library for NetApp C-mode block storage systems.
|
||||
"""
|
||||
import time
|
||||
|
||||
from oslo_log import log as logging
|
||||
from oslo_service import loopingcall
|
||||
@@ -33,6 +34,7 @@ from cinder import exception
|
||||
from cinder.i18n import _
|
||||
from cinder.objects import fields
|
||||
from cinder.volume.drivers.netapp.dataontap import block_base
|
||||
from cinder.volume.drivers.netapp.dataontap.client import api as netapp_api
|
||||
from cinder.volume.drivers.netapp.dataontap.performance import perf_cmode
|
||||
from cinder.volume.drivers.netapp.dataontap.utils import capabilities
|
||||
from cinder.volume.drivers.netapp.dataontap.utils import data_motion
|
||||
@@ -235,14 +237,30 @@ class NetAppBlockStorageCmodeLibrary(
|
||||
metadata = self._get_lun_attr(name, 'metadata')
|
||||
volume = metadata['Volume']
|
||||
|
||||
self.zapi_client.clone_lun(
|
||||
volume, name, new_name, space_reserved,
|
||||
qos_policy_group_name=qos_policy_group_name,
|
||||
src_block=src_block, dest_block=dest_block,
|
||||
block_count=block_count,
|
||||
source_snapshot=source_snapshot,
|
||||
is_snapshot=is_snapshot,
|
||||
qos_policy_group_is_adaptive=qos_policy_group_is_adaptive)
|
||||
try:
|
||||
self.zapi_client.clone_lun(
|
||||
volume, name, new_name, space_reserved,
|
||||
qos_policy_group_name=qos_policy_group_name,
|
||||
src_block=src_block, dest_block=dest_block,
|
||||
block_count=block_count,
|
||||
source_snapshot=source_snapshot,
|
||||
is_snapshot=is_snapshot,
|
||||
qos_policy_group_is_adaptive=qos_policy_group_is_adaptive,
|
||||
)
|
||||
except netapp_api.NaApiError as e:
|
||||
with excutils.save_and_reraise_exception() as exc_context:
|
||||
if 'Device busy' in e.message:
|
||||
self._retry_clone_lun(
|
||||
volume, name, new_name, space_reserved,
|
||||
qos_policy_group_name=qos_policy_group_name,
|
||||
src_block=src_block, dest_block=dest_block,
|
||||
block_count=block_count,
|
||||
source_snapshot=source_snapshot,
|
||||
is_snapshot=is_snapshot,
|
||||
qos_policy_group_is_adaptive=(
|
||||
qos_policy_group_is_adaptive),
|
||||
)
|
||||
exc_context.reraise = False
|
||||
|
||||
LOG.debug("Cloned LUN with new name %s", new_name)
|
||||
lun = self.zapi_client.get_lun_by_args(vserver=self.vserver,
|
||||
@@ -260,6 +278,49 @@ class NetAppBlockStorageCmodeLibrary(
|
||||
clone_lun['Size'],
|
||||
clone_lun))
|
||||
|
||||
def _retry_clone_lun(self, volume, name, new_name, space_reserved,
|
||||
qos_policy_group_name=None, src_block=0,
|
||||
dest_block=0, block_count=0,
|
||||
source_snapshot=None, is_snapshot=False,
|
||||
qos_policy_group_is_adaptive=False):
|
||||
"""Retry lun clone creation when ONTAP throws device busy error"""
|
||||
# timeout and interval are configurable parameters that the user can
|
||||
# specify under the backend stanza. If the user does not set these
|
||||
# values, default values will be used. For example, if timeout is set
|
||||
# to 60 seconds and interval is set to 5 seconds, then this code will
|
||||
# retry the LUN clone every 5 seconds until the 60-second timeout is
|
||||
# reached.
|
||||
timeout = self.configuration.safe_get('netapp_lun_clone_busy_timeout')
|
||||
interval = self.configuration.safe_get(
|
||||
'netapp_lun_clone_busy_interval')
|
||||
retries = int(timeout / interval)
|
||||
|
||||
for attempt in range(1, retries + 1):
|
||||
try:
|
||||
self.zapi_client.clone_lun(
|
||||
volume, name, new_name, space_reserved,
|
||||
qos_policy_group_name=qos_policy_group_name,
|
||||
src_block=src_block, dest_block=dest_block,
|
||||
block_count=block_count,
|
||||
source_snapshot=source_snapshot,
|
||||
is_snapshot=is_snapshot,
|
||||
qos_policy_group_is_adaptive=qos_policy_group_is_adaptive,
|
||||
)
|
||||
LOG.info("LUN clone succeeded on attempt %s.", attempt)
|
||||
break
|
||||
except netapp_api.NaApiError as e:
|
||||
if 'Device busy' in e.message:
|
||||
LOG.debug("Attempt %s failed with device busy error."
|
||||
"Retrying after %s seconds...", attempt,
|
||||
interval)
|
||||
if attempt == retries:
|
||||
msg = _("Timed out after %s retry for LUN clone"
|
||||
" creation")
|
||||
raise na_utils.NetAppDriverException(msg % retries)
|
||||
time.sleep(interval)
|
||||
else:
|
||||
raise netapp_api.NaApiError(e.code, e.message)
|
||||
|
||||
def _get_fc_target_wwpns(self, include_partner=True):
|
||||
return self.zapi_client.get_fc_target_wwpns()
|
||||
|
||||
|
@@ -257,7 +257,19 @@ netapp_san_opts = [
|
||||
'applied to the names of objects from the storage '
|
||||
'backend which represent pools in Cinder. This option '
|
||||
'is only utilized when the storage protocol is '
|
||||
'configured to use iSCSI or FC.')), ]
|
||||
'configured to use iSCSI or FC.')),
|
||||
cfg.IntOpt('netapp_lun_clone_busy_timeout',
|
||||
min=0,
|
||||
default=30,
|
||||
help='Specifies the maximum time (in seconds) to retry'
|
||||
' the LUN clone operation when an ONTAP "device busy"'
|
||||
' error occurs.'),
|
||||
cfg.IntOpt('netapp_lun_clone_busy_interval',
|
||||
min=0,
|
||||
default=3,
|
||||
help='Specifies the time interval (in seconds) to retry'
|
||||
' the LUN clone operation when an ONTAP "device busy"'
|
||||
' error occurs.')]
|
||||
|
||||
netapp_replication_opts = [
|
||||
cfg.MultiOpt('netapp_replication_aggregate_map',
|
||||
|
@@ -0,0 +1,8 @@
|
||||
---
|
||||
fixes:
|
||||
- |
|
||||
NetApp Driver `bug #2112245
|
||||
<https://bugs.launchpad.net/cinder/+bug/2112245>`_: Fixed the issue where
|
||||
a few cinder volume clone operations failed during bulk clone creation.
|
||||
Added retry logic to ensure the NetApp driver retries any failed clone
|
||||
operations.
|
Reference in New Issue
Block a user