本文整理汇总了Python中membase.helper.rebalance_helper.RebalanceHelper.pick_nodes方法的典型用法代码示例。如果您正苦于以下问题:Python RebalanceHelper.pick_nodes方法的具体用法?Python RebalanceHelper.pick_nodes怎么用?Python RebalanceHelper.pick_nodes使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类membase.helper.rebalance_helper.RebalanceHelper
的用法示例。
在下文中一共展示了RebalanceHelper.pick_nodes方法的14个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: rebalance_in_out_with_failover
# 需要导入模块: from membase.helper.rebalance_helper import RebalanceHelper [as 别名]
# 或者: from membase.helper.rebalance_helper.RebalanceHelper import pick_nodes [as 别名]
def rebalance_in_out_with_failover(self):
fail_over = self.input.param("fail_over", False)
gen = BlobGenerator('mike', 'mike-', self.value_size, end=self.num_items)
self._load_all_buckets(self.master, gen, "create", 0)
tasks = self._async_load_all_buckets(self.master, gen, "update", 0)
servs_in = self.servers[self.nodes_init:self.nodes_init + 1]
servs_out = self.servers[self.nodes_init - 1:self.nodes_init]
for task in tasks:
task.result(self.wait_timeout * 20)
self._verify_stats_all_buckets(self.servers[:self.nodes_init], timeout=120)
self._wait_for_stats_all_buckets(self.servers[:self.nodes_init])
self.sleep(20)
ejectedNode = self.find_node_info(self.master,self.servers[self.nodes_init-1])
prev_vbucket_stats = self.get_vbucket_seqnos(self.servers[:self.nodes_init], self.buckets)
prev_failover_stats = self.get_failovers_logs(self.servers[:self.nodes_init], self.buckets)
disk_replica_dataset, disk_active_dataset = self.get_and_compare_active_replica_data_set_all(self.servers[:self.nodes_init], self.buckets, path=None)
self.compare_vbucketseq_failoverlogs(prev_vbucket_stats, prev_failover_stats)
self.rest = RestConnection(self.master)
chosen = RebalanceHelper.pick_nodes(self.master, howmany=1)
result_nodes = self.add_remove_servers(self.servers,self.servers[:self.nodes_init],[self.servers[self.nodes_init-1],chosen[0]],[self.servers[self.nodes_init]])
self.rest.add_node(self.master.rest_username, self.master.rest_password,self.servers[self.nodes_init].ip,self.servers[self.nodes_init].port)
# Mark Node for failover
success_failed_over = self.rest.fail_over(chosen[0].id, graceful=fail_over)
self.nodes = self.rest.node_statuses()
self.rest.rebalance(otpNodes=[node.id for node in self.nodes],
ejectedNodes=[chosen[0].id,ejectedNode.id])
self.assertTrue(self.rest.monitorRebalance(stop_if_loop=True), msg="Rebalance failed")
self._wait_for_stats_all_buckets(result_nodes)
self.sleep(10)
self.verify_cluster_stats(result_nodes)
self.compare_failovers_logs(prev_failover_stats, result_nodes, self.buckets)
self.data_analysis_active_replica_all(disk_active_dataset, disk_replica_dataset, result_nodes, self.buckets, path=None)
self.verify_unacked_bytes_all_buckets()
nodes = self.get_nodes_in_cluster(self.master)
self.vb_distribution_analysis(servers = nodes, std = 1.0 , total_vbuckets = self.total_vbuckets)
示例2: rebalance_in_with_failover_full_addback_recovery
# 需要导入模块: from membase.helper.rebalance_helper import RebalanceHelper [as 别名]
# 或者: from membase.helper.rebalance_helper.RebalanceHelper import pick_nodes [as 别名]
def rebalance_in_with_failover_full_addback_recovery(self):
gen_update = BlobGenerator('mike', 'mike-', self.value_size, end=self.num_items)
tasks = []
tasks += self._async_load_all_buckets(self.master, gen_update, "update", 0)
for task in tasks:
task.result()
servs_in = [self.servers[i + self.nodes_init] for i in range(self.nodes_in)]
self._verify_stats_all_buckets(self.servers[:self.nodes_init], timeout=120)
self._wait_for_stats_all_buckets(self.servers[:self.nodes_init])
self.sleep(20)
prev_failover_stats = self.get_failovers_logs(self.servers[:self.nodes_init], self.buckets)
prev_vbucket_stats = self.get_vbucket_seqnos(self.servers[:self.nodes_init], self.buckets)
disk_replica_dataset, disk_active_dataset = self.get_and_compare_active_replica_data_set_all(self.servers[:self.nodes_init], self.buckets, path=None)
self.rest = RestConnection(self.master)
self.nodes = self.get_nodes(self.master)
chosen = RebalanceHelper.pick_nodes(self.master, howmany=1)
# Mark Node for failover
success_failed_over = self.rest.fail_over(chosen[0].id, graceful=False)
# Mark Node for full recovery
if success_failed_over:
self.rest.set_recovery_type(otpNode=chosen[0].id, recoveryType="full")
rebalance = self.cluster.async_rebalance(self.servers[:self.nodes_init], servs_in, [])
rebalance.result()
self._verify_stats_all_buckets(self.servers[:self.nodes_in + self.nodes_init], timeout=120)
self.verify_cluster_stats(self.servers[:self.nodes_in + self.nodes_init], check_ep_items_remaining = True)
self.compare_failovers_logs(prev_failover_stats, self.servers[:self.nodes_in + self.nodes_init], self.buckets)
self.sleep(30)
self.data_analysis_active_replica_all(disk_active_dataset, disk_replica_dataset, self.servers[:self.nodes_in + self.nodes_init], self.buckets, path=None)
self.verify_unacked_bytes_all_buckets()
nodes = self.get_nodes_in_cluster(self.master)
self.vb_distribution_analysis(servers = nodes, buckets = self.buckets, std = 1.0 , total_vbuckets = self.total_vbuckets)
示例3: _failover_swap_rebalance
# 需要导入模块: from membase.helper.rebalance_helper import RebalanceHelper [as 别名]
# 或者: from membase.helper.rebalance_helper.RebalanceHelper import pick_nodes [as 别名]
def _failover_swap_rebalance(self):
master = self.servers[0]
rest = RestConnection(master)
creds = self.input.membase_settings
num_initial_servers = self.num_initial_servers
intial_severs = self.servers[:num_initial_servers]
self.log.info("CREATE BUCKET PHASE")
SwapRebalanceBase.create_buckets(self)
# Cluster all starting set of servers
self.log.info("INITIAL REBALANCE PHASE")
status, servers_rebalanced = RebalanceHelper.rebalance_in(intial_severs, len(intial_severs) - 1)
self.assertTrue(status, msg="Rebalance was failed")
self.log.info("DATA LOAD PHASE")
self.loaders = SwapRebalanceBase.start_load_phase(self, master)
# Wait till load phase is over
SwapRebalanceBase.stop_load(self.loaders, do_stop=False)
self.log.info("DONE LOAD PHASE")
# Start the swap rebalance
self.log.info("current nodes : {0}".format(RebalanceHelper.getOtpNodeIds(master)))
toBeEjectedNodes = RebalanceHelper.pick_nodes(master, howmany=self.failover_factor)
optNodesIds = [node.id for node in toBeEjectedNodes]
if self.fail_orchestrator:
status, content = ClusterOperationHelper.find_orchestrator(master)
self.assertTrue(status, msg="Unable to find orchestrator: {0}:{1}".\
format(status, content))
optNodesIds[0] = content
self.log.info("FAILOVER PHASE")
# Failover selected nodes
for node in optNodesIds:
self.log.info("failover node {0} and rebalance afterwards".format(node))
rest.fail_over(node)
new_swap_servers = self.servers[num_initial_servers:num_initial_servers + self.failover_factor]
for server in new_swap_servers:
otpNode = rest.add_node(creds.rest_username, creds.rest_password, server.ip)
msg = "unable to add node {0} to the cluster"
self.assertTrue(otpNode, msg.format(server.ip))
if self.fail_orchestrator:
rest = RestConnection(new_swap_servers[0])
master = new_swap_servers[0]
self.log.info("DATA ACCESS PHASE")
self.loaders = SwapRebalanceBase.start_access_phase(self, master)
rest.rebalance(otpNodes=[node.id for node in rest.node_statuses()], \
ejectedNodes=optNodesIds)
self.assertTrue(rest.monitorRebalance(),
msg="rebalance operation failed after adding node {0}".format(new_swap_servers))
SwapRebalanceBase.verification_phase(self, master)
示例4: test_rebalance_in_out_with_failover_addback_recovery
# 需要导入模块: from membase.helper.rebalance_helper import RebalanceHelper [as 别名]
# 或者: from membase.helper.rebalance_helper.RebalanceHelper import pick_nodes [as 别名]
def test_rebalance_in_out_with_failover_addback_recovery(self):
"""
Rebalances nodes out and in with failover and full/delta recovery add back of a node
Use different nodes_in and nodes_out params to have uneven add and deletion. Use 'zone'
param to have nodes divided into server groups by having zone > 1.
This test begins by loading a given number of items into the cluster. It then
removes one node, rebalances that node out the cluster, and then rebalances it back
in. During the rebalancing we update all of the items in the cluster. Once the
node has been removed and added back we wait for the disk queues to drain, and
then verify that there has been no data loss, sum(curr_items) match the curr_items_total.
We then remove and add back two nodes at a time and so on until we have reached the point
where we are adding back and removing at least half of the nodes.
"""
recovery_type = self.input.param("recoveryType", "full")
gen = BlobGenerator('mike', 'mike-', self.value_size, end=self.num_items)
self._load_all_buckets(self.master, gen, "create", 0)
tasks = self._async_load_all_buckets(self.master, gen, "update", 0)
servs_in = self.servers[self.nodes_init:self.nodes_init + self.nodes_in]
servs_out = self.servers[self.nodes_init - self.nodes_out:self.nodes_init]
for task in tasks:
task.result(self.wait_timeout * 20)
self._verify_stats_all_buckets(self.servers[:self.nodes_init], timeout=120)
self._wait_for_stats_all_buckets(self.servers[:self.nodes_init])
self.sleep(20)
prev_vbucket_stats = self.get_vbucket_seqnos(self.servers[:self.nodes_init], self.buckets)
prev_failover_stats = self.get_failovers_logs(self.servers[:self.nodes_init], self.buckets)
disk_replica_dataset, disk_active_dataset = self.get_and_compare_active_replica_data_set_all(
self.servers[:self.nodes_init], self.buckets, path=None)
self.compare_vbucketseq_failoverlogs(prev_vbucket_stats, prev_failover_stats)
self.rest = RestConnection(self.master)
self.nodes = self.get_nodes(self.master)
result_nodes = list(set(self.servers[:self.nodes_init] + servs_in) - set(servs_out))
for node in servs_in:
self.rest.add_node(self.master.rest_username, self.master.rest_password, node.ip, node.port)
chosen = RebalanceHelper.pick_nodes(self.master, howmany=1)
# Mark Node for failover
self.sleep(30)
success_failed_over = self.rest.fail_over(chosen[0].id, graceful=False)
# Mark Node for full recovery
if success_failed_over:
self.rest.set_recovery_type(otpNode=chosen[0].id, recoveryType=recovery_type)
self.sleep(30)
self.shuffle_nodes_between_zones_and_rebalance(servs_out)
self._verify_stats_all_buckets(result_nodes, timeout=120)
self.verify_cluster_stats(result_nodes, check_ep_items_remaining=True)
self.compare_failovers_logs(prev_failover_stats, result_nodes, self.buckets)
self.sleep(30)
self.data_analysis_active_replica_all(disk_active_dataset, disk_replica_dataset, result_nodes, self.buckets,
path=None)
self.verify_unacked_bytes_all_buckets()
nodes = self.get_nodes_in_cluster(self.master)
self.vb_distribution_analysis(servers=nodes, std=1.0, total_vbuckets=self.total_vbuckets)
示例5: test_start_stop_rebalance_after_failover
# 需要导入模块: from membase.helper.rebalance_helper import RebalanceHelper [as 别名]
# 或者: from membase.helper.rebalance_helper.RebalanceHelper import pick_nodes [as 别名]
def test_start_stop_rebalance_after_failover(self):
"""
Rebalances nodes out and in with failover
Use different nodes_in and nodes_out params to have uneven add and deletion. Use 'zone'
param to have nodes divided into server groups by having zone > 1.
The test begin with loading the bucket with given number of items. It then fails over a node. We then
rebalance the cluster, while adding or removing given number of nodes. Once the rebalance reaches 50%,
we stop the rebalance and validate the cluster stats. We then restart the rebalance and validate rebalance
was completed successfully.
"""
fail_over = self.input.param("fail_over", False)
gen = BlobGenerator('mike', 'mike-', self.value_size, end=self.num_items)
self._load_all_buckets(self.master, gen, "create", 0)
tasks = self._async_load_all_buckets(self.master, gen, "update", 0)
for task in tasks:
task.result(self.wait_timeout * 20)
self._verify_stats_all_buckets(self.servers[:self.nodes_init], timeout=120)
self._wait_for_stats_all_buckets(self.servers[:self.nodes_init])
self.sleep(20)
prev_vbucket_stats = self.get_vbucket_seqnos(self.servers[:self.nodes_init], self.buckets)
prev_failover_stats = self.get_failovers_logs(self.servers[:self.nodes_init], self.buckets)
disk_replica_dataset, disk_active_dataset = self.get_and_compare_active_replica_data_set_all(
self.servers[:self.nodes_init], self.buckets, path=None)
self.compare_vbucketseq_failoverlogs(prev_vbucket_stats, prev_failover_stats)
self.rest = RestConnection(self.master)
chosen = RebalanceHelper.pick_nodes(self.master, howmany=1)
result_nodes = list(set(self.servers[:self.nodes_init] + self.servs_in) - set(self.servs_out))
for node in self.servs_in:
self.rest.add_node(self.master.rest_username, self.master.rest_password, node.ip, node.port)
# Mark Node for failover
self.rest.fail_over(chosen[0].id, graceful=fail_over)
rebalance = self.cluster.async_rebalance(self.servers[:self.nodes_init], self.servs_in, self.servs_out)
expected_progress = 50
rest = RestConnection(self.master)
reached = RestHelper(rest).rebalance_reached(expected_progress)
self.assertTrue(reached, "Rebalance failed or did not reach {0}%".format(expected_progress))
if not RestHelper(rest).is_cluster_rebalanced():
self.log.info("Stop the rebalance")
stopped = rest.stop_rebalance(wait_timeout=self.wait_timeout / 3)
self.assertTrue(stopped, msg="Unable to stop rebalance")
self._verify_all_buckets(self.master, timeout=None, max_verify=self.max_verify, batch_size=1)
self.shuffle_nodes_between_zones_and_rebalance()
self.verify_cluster_stats(result_nodes, check_ep_items_remaining=True, check_bucket_stats=False)
self.sleep(30)
self.data_analysis_active_replica_all(disk_active_dataset, disk_replica_dataset, result_nodes, self.buckets,
path=None)
self.verify_unacked_bytes_all_buckets()
nodes = self.get_nodes_in_cluster(self.master)
self.vb_distribution_analysis(servers=nodes, std=1.0, total_vbuckets=self.total_vbuckets)
示例6: rebalance_in_with_failover
# 需要导入模块: from membase.helper.rebalance_helper import RebalanceHelper [as 别名]
# 或者: from membase.helper.rebalance_helper.RebalanceHelper import pick_nodes [as 别名]
def rebalance_in_with_failover(self):
fail_over = self.input.param("fail_over", False)
gen_update = BlobGenerator('mike', 'mike-', self.value_size, end=self.num_items)
tasks = []
tasks += self._async_load_all_buckets(self.master, gen_update, "update", 0)
for task in tasks:
task.result()
servs_in = [self.servers[i + self.nodes_init] for i in range(self.nodes_in)]
self._verify_stats_all_buckets(self.servers[:self.nodes_init], timeout=120)
self._wait_for_stats_all_buckets(self.servers[:self.nodes_init])
self.sleep(20)
prev_failover_stats = self.get_failovers_logs(self.servers[:self.nodes_init], self.buckets)
prev_vbucket_stats = self.get_vbucket_seqnos(self.servers[:self.nodes_init], self.buckets)
disk_replica_dataset, disk_active_dataset = self.get_and_compare_active_replica_data_set_all(self.servers[:self.nodes_init], self.buckets, path=None)
self.rest = RestConnection(self.master)
self.nodes = self.get_nodes(self.master)
chosen = RebalanceHelper.pick_nodes(self.master, howmany=1)
self.rest = RestConnection(self.master)
self.rest.add_node(self.master.rest_username, self.master.rest_password,self.servers[self.nodes_init].ip,self.servers[self.nodes_init].port)
# Mark Node for failover
self.rest.fail_over(chosen[0].id, graceful=fail_over)
if fail_over:
self.assertTrue(self.rest.monitorRebalance(stop_if_loop=True), msg="Graceful Failover Failed")
self.nodes = self.rest.node_statuses()
self.rest.rebalance(otpNodes=[node.id for node in self.nodes],ejectedNodes=[chosen[0].id])
self.assertTrue(self.rest.monitorRebalance(stop_if_loop=True), msg="Rebalance Failed")
# Verification
new_server_list = self.add_remove_servers(self.servers,self.servers[:self.nodes_init],[chosen[0]],[self.servers[self.nodes_init]])
self._wait_for_stats_all_buckets(new_server_list)
self._verify_stats_all_buckets(new_server_list, timeout=120)
self.verify_cluster_stats(new_server_list)
self.compare_failovers_logs(prev_failover_stats, new_server_list, self.buckets)
self.sleep(30)
self.data_analysis_active_replica_all(disk_active_dataset, disk_replica_dataset, new_server_list, self.buckets, path=None)
self.verify_unacked_bytes_all_buckets()
nodes = self.get_nodes_in_cluster(self.master)
self.vb_distribution_analysis(servers = nodes, buckets = self.buckets, std = 1.0 , total_vbuckets = self.total_vbuckets)
示例7: _common_test_body_failed_swap_rebalance
# 需要导入模块: from membase.helper.rebalance_helper import RebalanceHelper [as 别名]
# 或者: from membase.helper.rebalance_helper.RebalanceHelper import pick_nodes [as 别名]
def _common_test_body_failed_swap_rebalance(self):
master = self.servers[0]
rest = RestConnection(master)
num_initial_servers = self.num_initial_servers
creds = self.input.membase_settings
intial_severs = self.servers[:num_initial_servers]
self.log.info("CREATE BUCKET PHASE")
SwapRebalanceBase.create_buckets(self)
# Cluster all starting set of servers
self.log.info("INITIAL REBALANCE PHASE")
status, servers_rebalanced = RebalanceHelper.rebalance_in(intial_severs, len(intial_severs) - 1)
self.assertTrue(status, msg="Rebalance was failed")
self.log.info("DATA LOAD PHASE")
self.loaders = SwapRebalanceBase.start_load_phase(self, master)
# Wait till load phase is over
SwapRebalanceBase.stop_load(self.loaders, do_stop=False)
self.log.info("DONE LOAD PHASE")
# Start the swap rebalance
current_nodes = RebalanceHelper.getOtpNodeIds(master)
self.log.info("current nodes : {0}".format(current_nodes))
toBeEjectedNodes = RebalanceHelper.pick_nodes(master, howmany=self.num_swap)
optNodesIds = [node.id for node in toBeEjectedNodes]
if self.swap_orchestrator:
status, content = ClusterOperationHelper.find_orchestrator(master)
self.assertTrue(status, msg="Unable to find orchestrator: {0}:{1}".\
format(status, content))
# When swapping all the nodes
if self.num_swap is len(current_nodes):
optNodesIds.append(content)
else:
optNodesIds[0] = content
for node in optNodesIds:
self.log.info("removing node {0} and rebalance afterwards".format(node))
new_swap_servers = self.servers[num_initial_servers:num_initial_servers + self.num_swap]
for server in new_swap_servers:
otpNode = rest.add_node(creds.rest_username, creds.rest_password, server.ip)
msg = "unable to add node {0} to the cluster"
self.assertTrue(otpNode, msg.format(server.ip))
if self.swap_orchestrator:
rest = RestConnection(new_swap_servers[0])
master = new_swap_servers[0]
self.log.info("DATA ACCESS PHASE")
self.loaders = SwapRebalanceBase.start_access_phase(self, master)
self.log.info("SWAP REBALANCE PHASE")
rest.rebalance(otpNodes=[node.id for node in rest.node_statuses()],
ejectedNodes=optNodesIds)
SwapRebalanceBase.sleep(self, 10, "Rebalance should start")
self.log.info("FAIL SWAP REBALANCE PHASE @ {0}".format(self.percentage_progress))
reached = RestHelper(rest).rebalance_reached(self.percentage_progress)
if reached == 100 and not RestHelper(rest).is_cluster_rebalanced():
# handle situation when rebalance failed at the beginning
self.log.error('seems rebalance failed!')
self.log.info("Latest logs from UI:")
for i in rest.get_logs(): self.log.error(i)
self.fail("rebalance failed even before killing memcached")
bucket = rest.get_buckets()[0].name
pid = None
if self.swap_orchestrator:
# get PID via remote connection if master is a new node
shell = RemoteMachineShellConnection(master)
o, _ = shell.execute_command("ps -eo comm,pid | awk '$1 == \"memcached\" { print $2 }'")
pid = o[0]
shell.disconnect()
else:
for i in xrange(2):
try:
_mc = MemcachedClientHelper.direct_client(master, bucket)
pid = _mc.stats()["pid"]
break
except EOFError as e:
self.log.error("{0}.Retry in 2 sec".format(e))
SwapRebalanceBase.sleep(self, 1)
if pid is None:
self.fail("impossible to get a PID")
command = "os:cmd(\"kill -9 {0} \")".format(pid)
self.log.info(command)
killed = rest.diag_eval(command)
self.log.info("killed {0}:{1}?? {2} ".format(master.ip, master.port, killed))
self.log.info("sleep for 10 sec after kill memcached")
SwapRebalanceBase.sleep(self, 10)
# we can't get stats for new node when rebalance falls
if not self.swap_orchestrator:
ClusterOperationHelper._wait_warmup_completed(self, [master], bucket, wait_time=600)
i = 0
# we expect that rebalance will be failed
try:
rest.monitorRebalance()
except RebalanceFailedException:
# retry rebalance if it failed
self.log.warn("Rebalance failed but it's expected")
#.........这里部分代码省略.........
示例8: _common_test_body_swap_rebalance
# 需要导入模块: from membase.helper.rebalance_helper import RebalanceHelper [as 别名]
# 或者: from membase.helper.rebalance_helper.RebalanceHelper import pick_nodes [as 别名]
def _common_test_body_swap_rebalance(self, do_stop_start=False):
master = self.servers[0]
rest = RestConnection(master)
num_initial_servers = self.num_initial_servers
creds = self.input.membase_settings
intial_severs = self.servers[:num_initial_servers]
self.log.info("CREATE BUCKET PHASE")
SwapRebalanceBase.create_buckets(self)
# Cluster all starting set of servers
self.log.info("INITIAL REBALANCE PHASE")
status, servers_rebalanced = RebalanceHelper.rebalance_in(intial_severs, len(intial_severs) - 1)
self.assertTrue(status, msg="Rebalance was failed")
self.log.info("DATA LOAD PHASE")
self.loaders = SwapRebalanceBase.start_load_phase(self, master)
# Wait till load phase is over
SwapRebalanceBase.stop_load(self.loaders, do_stop=False)
self.log.info("DONE LOAD PHASE")
# Start the swap rebalance
current_nodes = RebalanceHelper.getOtpNodeIds(master)
self.log.info("current nodes : {0}".format(current_nodes))
toBeEjectedNodes = RebalanceHelper.pick_nodes(master, howmany=self.num_swap)
optNodesIds = [node.id for node in toBeEjectedNodes]
if self.swap_orchestrator:
status, content = ClusterOperationHelper.find_orchestrator(master)
self.assertTrue(status, msg="Unable to find orchestrator: {0}:{1}".\
format(status, content))
if self.num_swap is len(current_nodes):
optNodesIds.append(content)
else:
optNodesIds[0] = content
for node in optNodesIds:
self.log.info("removing node {0} and rebalance afterwards".format(node))
new_swap_servers = self.servers[num_initial_servers:num_initial_servers + self.num_swap]
for server in new_swap_servers:
otpNode = rest.add_node(creds.rest_username, creds.rest_password, server.ip)
msg = "unable to add node {0} to the cluster"
self.assertTrue(otpNode, msg.format(server.ip))
if self.swap_orchestrator:
rest = RestConnection(new_swap_servers[0])
master = new_swap_servers[0]
if self.do_access:
self.log.info("DATA ACCESS PHASE")
self.loaders = SwapRebalanceBase.start_access_phase(self, master)
self.log.info("SWAP REBALANCE PHASE")
rest.rebalance(otpNodes=[node.id for node in rest.node_statuses()],
ejectedNodes=optNodesIds)
if do_stop_start:
# Rebalance is stopped at 20%, 40% and 60% completion
retry = 0
for expected_progress in (20, 40, 60):
self.log.info("STOP/START SWAP REBALANCE PHASE WITH PROGRESS {0}%".
format(expected_progress))
while True:
progress = rest._rebalance_progress()
if progress < 0:
self.log.error("rebalance progress code : {0}".format(progress))
break
elif progress == 100:
self.log.warn("Rebalance has already reached 100%")
break
elif progress >= expected_progress:
self.log.info("Rebalance will be stopped with {0}%".format(progress))
stopped = rest.stop_rebalance()
self.assertTrue(stopped, msg="unable to stop rebalance")
SwapRebalanceBase.sleep(self, 20)
rest.rebalance(otpNodes=[node.id for node in rest.node_statuses()],
ejectedNodes=optNodesIds)
break
elif retry > 100:
break
else:
retry += 1
SwapRebalanceBase.sleep(self, 1)
self.assertTrue(rest.monitorRebalance(),
msg="rebalance operation failed after adding node {0}".format(optNodesIds))
SwapRebalanceBase.verification_phase(self, master)
示例9: common_test_body
# 需要导入模块: from membase.helper.rebalance_helper import RebalanceHelper [as 别名]
# 或者: from membase.helper.rebalance_helper.RebalanceHelper import pick_nodes [as 别名]
def common_test_body(self, failover_reason):
"""
Main Test body which contains the flow of the failover basic steps
1. Starts Operations if programmed into the test case (before/after)
2. Start View and Index Building operations
3. Failover K out of N nodes (failover can be HARDFAILOVER/GRACEFUL)
4.1 Rebalance the cluster is failover of K nodeStatuses
4.2 Run Add-Back operation with recoveryType = (full/delta) with rebalance
5. Verify all expected operations completed by checking stats, replicaiton, views, data correctness
"""
# Pick the reference node for communication
# We pick a node in the cluster which will NOT be failed over
self.filter_list = []
if self.failoverMaster:
self.master = self.servers[1]
self.log.info(" Picking node {0} as reference node for test case".format(self.master.ip))
self.print_test_params(failover_reason)
self.rest = RestConnection(self.master)
self.nodes = self.rest.node_statuses()
# Set the data path for the cluster
self.data_path = self.rest.get_data_path()
# Check if the test case has to be run for 3.0.0
versions = self.rest.get_nodes_versions()
self.version_greater_than_2_5 = True
for version in versions:
if "3" > version:
self.version_greater_than_2_5 = False
# Do not run this this test if graceful category is being used
if not self.version_greater_than_2_5 and (self.graceful or (self.recoveryType != None)):
self.log.error("Graceful failover can't be applied to nodes with version less then 3.*")
self.log.error("Please check configuration parameters: SKIPPING TEST.")
return
# Find nodes that will under go failover
if self.failoverMaster:
self.chosen = RebalanceHelper.pick_nodes(self.master, howmany=1, target_node = self.servers[0])
else:
self.chosen = RebalanceHelper.pick_nodes(self.master, howmany=self.num_failed_nodes)
# Perform operations - Create/Update/Delete
# self.withMutationOps = True => Run Operations in parallel to failover
# self.withMutationOps = False => Run Operations Before failover
self.load_initial_data()
if not self.withMutationOps:
self.run_mutation_operations()
# Perform View Creation Tasks and check for completion if required before failover
if self.withViewsOps:
self.run_view_creation_operations(self.servers)
if not self.createIndexesDuringFailover:
self.query_and_monitor_view_tasks(self.servers)
# Take snap-shot of data set used for validaiton
record_static_data_set ={}
prev_vbucket_stats = {}
prev_failover_stats = {}
if not self.withMutationOps:
record_static_data_set = self.get_data_set_all(self.servers, self.buckets, path = None)
# Capture vbucket and failover stats if test version >= 2.5.*
if self.version_greater_than_2_5 and self.upr_check:
prev_vbucket_stats = self.get_vbucket_seqnos(self.servers, self.buckets)
prev_failover_stats = self.get_failovers_logs(self.servers, self.buckets)
# Perform Operations relalted to failover
if self.withMutationOps or self.withViewsOps or self.compact:
self.run_failover_operations_with_ops(self.chosen, failover_reason)
else:
self.run_failover_operations(self.chosen, failover_reason)
# Perform Add Back Operation with Rebalance Or only Rebalance with Verificaitons
if not self.gracefulFailoverFail and self.runRebalanceAfterFailover:
if self.add_back_flag:
self.run_add_back_operation_and_verify(self.chosen, prev_vbucket_stats, record_static_data_set, prev_failover_stats)
else:
self.run_rebalance_after_failover_and_verify(self.chosen, prev_vbucket_stats, record_static_data_set, prev_failover_stats)
else:
return
if self.during_ops == None:
self.verify_unacked_bytes_all_buckets(filter_list = self.filter_list, master_node = self.master)
示例10: _common_test_body_failed_swap_rebalance
# 需要导入模块: from membase.helper.rebalance_helper import RebalanceHelper [as 别名]
# 或者: from membase.helper.rebalance_helper.RebalanceHelper import pick_nodes [as 别名]
def _common_test_body_failed_swap_rebalance(self):
master = self.servers[0]
rest = RestConnection(master)
num_initial_servers = self.num_initial_servers
creds = self.input.membase_settings
intial_severs = self.servers[:num_initial_servers]
# Cluster all starting set of servers
self.log.info("INITIAL REBALANCE PHASE")
RebalanceHelper.rebalance_in(intial_severs, len(intial_severs)-1)
self.log.info("CREATE BUCKET PHASE")
SwapRebalanceBase.create_buckets(self)
self.log.info("DATA LOAD PHASE")
loaders = SwapRebalanceBase.start_load_phase(self, master)
# Wait till load phase is over
SwapRebalanceBase.stop_load(loaders, do_stop=False)
self.log.info("DONE LOAD PHASE")
# Start the swap rebalance
current_nodes = RebalanceHelper.getOtpNodeIds(master)
self.log.info("current nodes : {0}".format(current_nodes))
toBeEjectedNodes = RebalanceHelper.pick_nodes(master, howmany=self.num_swap)
optNodesIds = [node.id for node in toBeEjectedNodes]
if self.swap_orchestrator:
status, content = ClusterHelper.find_orchestrator(master)
self.assertTrue(status, msg="Unable to find orchestrator: {0}:{1}".\
format(status, content))
# When swapping all the nodes
if self.num_swap is len(current_nodes):
optNodesIds.append(content)
else:
optNodesIds[0] = content
for node in optNodesIds:
self.log.info("removing node {0} and rebalance afterwards".format(node))
new_swap_servers = self.servers[num_initial_servers:num_initial_servers+self.num_swap]
for server in new_swap_servers:
otpNode = rest.add_node(creds.rest_username, creds.rest_password, server.ip)
msg = "unable to add node {0} to the cluster"
self.assertTrue(otpNode, msg.format(server.ip))
if self.swap_orchestrator:
rest = RestConnection(new_swap_servers[0])
master = new_swap_servers[0]
self.log.info("DATA ACCESS PHASE")
loaders = SwapRebalanceBase.start_access_phase(self, master)
self.log.info("SWAP REBALANCE PHASE")
rest.rebalance(otpNodes=[node.id for node in rest.node_statuses()],\
ejectedNodes=optNodesIds)
# Rebalance is failed at 20%, 40% and 60% completion
for i in [1, 2, 3]:
expected_progress = 20*i
self.log.info("FAIL SWAP REBALANCE PHASE @ {0}".format(expected_progress))
reached = RestHelper(rest).rebalance_reached(expected_progress)
command = "[erlang:exit(element(2, X), kill) || X <- supervisor:which_children(ns_port_sup)]."
memcached_restarted = rest.diag_eval(command)
self.assertTrue(memcached_restarted, "unable to restart memcached/moxi process through diag/eval")
time.sleep(20)
rest.rebalance(otpNodes=[node.id for node in rest.node_statuses()],\
ejectedNodes=optNodesIds)
# Stop loaders
SwapRebalanceBase.stop_load(loaders)
self.assertTrue(rest.monitorRebalance(),
msg="rebalance operation failed after adding node {0}".format(toBeEjectedNodes))
self.log.info("DONE DATA ACCESS PHASE")
#for bucket in rest.get_buckets():
# SwapRebalanceBase.verify_data(new_swap_servers[0], bucket_data[bucket.name].get('inserted_keys'),\
# bucket.name, self)
# RebalanceHelper.wait_for_persistence(master, bucket.name)
self.log.info("VERIFICATION PHASE")
SwapRebalanceBase.items_verification(master, self)
示例11: test_failover_continuous_bidirectional_sets_deletes
# 需要导入模块: from membase.helper.rebalance_helper import RebalanceHelper [as 别名]
# 或者: from membase.helper.rebalance_helper.RebalanceHelper import pick_nodes [as 别名]
def test_failover_continuous_bidirectional_sets_deletes(self):
cluster_ref_a = "cluster_ref_a"
master_a = self._input.clusters.get(0)[0]
rest_conn_a = RestConnection(master_a)
cluster_ref_b = "cluster_ref_b"
master_b = self._input.clusters.get(1)[0]
rest_conn_b = RestConnection(master_b)
# Rebalance all the nodes together
servers_a = self._input.clusters.get(0)
servers_b = self._input.clusters.get(1)
rebalanced_servers_a = []
rebalanced_servers_b = []
RebalanceHelper.rebalance_in(servers_a, len(servers_a)-1)
RebalanceHelper.rebalance_in(servers_b, len(servers_b)-1)
rebalanced_servers_a.extend(servers_a)
rebalanced_servers_b.extend(servers_b)
# Setup bi-directional continuous replication
replication_type = "continuous"
rest_conn_a.add_remote_cluster(master_b.ip, master_b.port,
master_b.rest_username,
master_b.rest_password, cluster_ref_b)
rest_conn_b.add_remote_cluster(master_a.ip, master_a.port,
master_a.rest_username,
master_a.rest_password, cluster_ref_a)
(rep_database_a, rep_id_a) = rest_conn_a.start_replication(
replication_type, self._buckets[0],
cluster_ref_b)
(rep_database_b, rep_id_b) = rest_conn_b.start_replication(
replication_type, self._buckets[0],
cluster_ref_a)
load_thread_list = []
# Start load
kvstore = ClientKeyValueStore()
self._params["ops"] = "set"
task_def = RebalanceDataGenerator.create_loading_tasks(self._params)
load_thread = RebalanceDataGenerator.start_load(rest_conn_a,
self._buckets[0],
task_def, kvstore)
load_thread.start()
load_thread.join()
RebalanceHelper.wait_for_persistence(master_a, self._buckets[0])
# Do some deletes
self._params["ops"] = "delete"
self._params["count"] = self._num_items/5
task_def = RebalanceDataGenerator.create_loading_tasks(self._params)
load_thread = RebalanceDataGenerator.start_load(rest_conn_a,
self._buckets[0],
task_def, kvstore)
load_thread_list.append(load_thread)
# Start all loads concurrently
for lt in load_thread_list:
lt.start()
# Do the failover of nodes on both clusters
self.log.info("Failing over nodes")
self.log.info("current nodes on cluster 1: {0}".format(RebalanceHelper.getOtpNodeIds(master_a)))
self.log.info("current nodes on cluster 2: {0}".format(RebalanceHelper.getOtpNodeIds(master_b)))
# Find nodes to be failed_over
toBeEjectedNodes = RebalanceHelper.pick_nodes(master_a, howmany=self._failover_factor)
optNodesIds_a = [node.id for node in toBeEjectedNodes]
if self._fail_orchestrator_a:
status, content = ClusterOperationHelper.find_orchestrator(master_a)
self.assertTrue(status, msg="Unable to find orchestrator: {0}:{1}".\
format(status, content))
optNodesIds_a[0] = content
master_a = self._input.clusters.get(0)[-1]
rest_conn_a = RestConnection(master_a)
#Failover selected nodes
for node in optNodesIds_a:
self.log.info("failover node {0} and rebalance afterwards".format(node))
rest_conn_a.fail_over(node)
toBeEjectedNodes = RebalanceHelper.pick_nodes(master_b, howmany=self._failover_factor)
optNodesIds_b = [node.id for node in toBeEjectedNodes]
if self._fail_orchestrator_b:
status, content = ClusterOperationHelper.find_orchestrator(master_b)
self.assertTrue(status, msg="Unable to find orchestrator: {0}:{1}".\
format(status, content))
optNodesIds_b[0] = content
master_b = self._input.clusters.get(1)[-1]
rest_conn_b = RestConnection(master_b)
self._state.append((rest_conn_a, cluster_ref_b, rep_database_a, rep_id_a))
self._state.append((rest_conn_b, cluster_ref_a, rep_database_b, rep_id_b))
#Failover selected nodes
for node in optNodesIds_b:
self.log.info("failover node {0} and rebalance afterwards".format(node))
rest_conn_b.fail_over(node)
#.........这里部分代码省略.........
示例12: common_test_body
# 需要导入模块: from membase.helper.rebalance_helper import RebalanceHelper [as 别名]
# 或者: from membase.helper.rebalance_helper.RebalanceHelper import pick_nodes [as 别名]
def common_test_body(self, keys_count, replica, load_ratio, failover_reason):
log = logger.Logger.get_logger()
log.info("keys_count : {0}".format(keys_count))
log.info("replica : {0}".format(replica))
log.info("load_ratio : {0}".format(load_ratio))
log.info("failover_reason : {0}".format(failover_reason))
master = self._servers[0]
log.info('picking server : {0} as the master'.format(master))
rest = RestConnection(master)
info = rest.get_nodes_self()
rest.init_cluster(username=master.rest_username,
password=master.rest_password)
rest.init_cluster_memoryQuota(memoryQuota=info.mcdMemoryReserved)
bucket_ram = info.memoryQuota * 2 / 3
bucket = 'default'
rest.create_bucket(bucket=bucket,
ramQuotaMB=bucket_ram,
replicaNumber=replica,
proxyPort=info.moxi)
ready = BucketOperationHelper.wait_for_memcached(master, bucket)
self.assertTrue(ready, "wait_for_memcached_failed")
credentials = self._input.membase_settings
ClusterOperationHelper.add_all_nodes_or_assert(master, self._servers, credentials, self)
nodes = rest.node_statuses()
rest.rebalance(otpNodes=[node.id for node in nodes], ejectedNodes=[])
msg = "rebalance failed after adding these nodes {0}".format(nodes)
self.assertTrue(rest.monitorRebalance(), msg=msg)
inserted_keys = FailoverBaseTest.load_data(master, bucket, keys_count, load_ratio)
inserted_count = len(inserted_keys)
log.info('inserted {0} keys'.format(inserted_count))
nodes = rest.node_statuses()
while (len(nodes) - replica) > 1:
final_replication_state = RestHelper(rest).wait_for_replication(900)
msg = "replication state after waiting for up to 15 minutes : {0}"
self.log.info(msg.format(final_replication_state))
chosen = RebalanceHelper.pick_nodes(master, howmany=replica)
for node in chosen:
#let's do op
if failover_reason == 'stop_server':
self.stop_server(node)
log.info("10 seconds delay to wait for membase-server to shutdown")
#wait for 5 minutes until node is down
self.assertTrue(RestHelper(rest).wait_for_node_status(node, "unhealthy", 300),
msg="node status is not unhealthy even after waiting for 5 minutes")
elif failover_reason == "firewall":
RemoteUtilHelper.enable_firewall(self._servers, node, bidirectional=self.bidirectional)
self.assertTrue(RestHelper(rest).wait_for_node_status(node, "unhealthy", 300),
msg="node status is not unhealthy even after waiting for 5 minutes")
failed_over = rest.fail_over(node.id)
if not failed_over:
self.log.info("unable to failover the node the first time. try again in 60 seconds..")
#try again in 60 seconds
time.sleep(75)
failed_over = rest.fail_over(node.id)
self.assertTrue(failed_over, "unable to failover node after {0}".format(failover_reason))
log.info("failed over node : {0}".format(node.id))
#REMOVEME -
log.info("10 seconds sleep after failover before invoking rebalance...")
time.sleep(10)
rest.rebalance(otpNodes=[node.id for node in nodes],
ejectedNodes=[node.id for node in chosen])
msg = "rebalance failed while removing failover nodes {0}".format(chosen)
self.assertTrue(rest.monitorRebalance(), msg=msg)
FailoverBaseTest.replication_verification(master, bucket, replica, inserted_count, self)
nodes = rest.node_statuses()
FailoverBaseTest.verify_data(master, inserted_keys, bucket, self)
示例13: _add_back_failed_node
# 需要导入模块: from membase.helper.rebalance_helper import RebalanceHelper [as 别名]
# 或者: from membase.helper.rebalance_helper.RebalanceHelper import pick_nodes [as 别名]
def _add_back_failed_node(self, do_node_cleanup=False):
master = self.servers[0]
rest = RestConnection(master)
creds = self.input.membase_settings
self.log.info("CREATE BUCKET PHASE")
SwapRebalanceBase.create_buckets(self)
# Cluster all servers
self.log.info("INITIAL REBALANCE PHASE")
status, servers_rebalanced = RebalanceHelper.rebalance_in(self.servers, len(self.servers) - 1)
self.assertTrue(status, msg="Rebalance was failed")
self.log.info("DATA LOAD PHASE")
self.loaders = SwapRebalanceBase.start_load_phase(self, master)
# Wait till load phase is over
SwapRebalanceBase.stop_load(self.loaders, do_stop=False)
self.log.info("DONE LOAD PHASE")
# Start the swap rebalance
current_nodes = RebalanceHelper.getOtpNodeIds(master)
self.log.info("current nodes : {0}".format(current_nodes))
toBeEjectedNodes = RebalanceHelper.pick_nodes(master, howmany=self.failover_factor)
optNodesIds = [node.id for node in toBeEjectedNodes]
# List of servers that will not be failed over
not_failed_over = []
for server in self.servers:
if server.ip not in [node.ip for node in toBeEjectedNodes]:
not_failed_over.append(server)
self.log.info("Node %s not failed over" % server.ip)
if self.fail_orchestrator:
status, content = ClusterOperationHelper.find_orchestrator(master)
self.assertTrue(status, msg="Unable to find orchestrator: {0}:{1}".\
format(status, content))
# When swapping all the nodes
if self.num_swap is len(current_nodes):
optNodesIds.append(content)
else:
optNodesIds[0] = content
master = not_failed_over[-1]
self.log.info("DATA ACCESS PHASE")
self.loaders = SwapRebalanceBase.start_access_phase(self, master)
# Failover selected nodes
for node in optNodesIds:
self.log.info("failover node {0} and rebalance afterwards".format(node))
rest.fail_over(node)
rest.rebalance(otpNodes=[node.id for node in rest.node_statuses()], \
ejectedNodes=optNodesIds)
self.assertTrue(rest.monitorRebalance(),
msg="rebalance operation failed after adding node {0}".format(optNodesIds))
# Add back the same failed over nodes
# Cleanup the node, somehow
# TODO: cluster_run?
if do_node_cleanup:
pass
# Make rest connection with node part of cluster
rest = RestConnection(master)
# Given the optNode, find ip
add_back_servers = []
nodes = rest.get_nodes()
for server in [node.ip for node in nodes]:
if isinstance(server, unicode):
add_back_servers.append(server)
final_add_back_servers = []
for server in self.servers:
if server.ip not in add_back_servers:
final_add_back_servers.append(server)
for server in final_add_back_servers:
otpNode = rest.add_node(creds.rest_username, creds.rest_password, server.ip)
msg = "unable to add node {0} to the cluster"
self.assertTrue(otpNode, msg.format(server.ip))
rest.rebalance(otpNodes=[node.id for node in rest.node_statuses()], ejectedNodes=[])
self.assertTrue(rest.monitorRebalance(),
msg="rebalance operation failed after adding node {0}".format(add_back_servers))
SwapRebalanceBase.verification_phase(self, master)
示例14: common_test_body
# 需要导入模块: from membase.helper.rebalance_helper import RebalanceHelper [as 别名]
# 或者: from membase.helper.rebalance_helper.RebalanceHelper import pick_nodes [as 别名]
def common_test_body(self, keys_count, failover_reason):
log = logger.Logger.get_logger()
log.info("keys_count : {0}".format(keys_count))
log.info("replicas : {0}".format(self.num_replicas))
log.info("failover_reason : {0}".format(failover_reason))
log.info('picking server : {0} as the master'.format(self.master))
self._load_all_buckets(self.master, self.gen_create, "create", 0,
batch_size=10000, pause_secs=5, timeout_secs=180)
self._wait_for_stats_all_buckets(self.servers)
_servers_ = self.servers
rest = RestConnection(self.master)
nodes = rest.node_statuses()
RebalanceHelper.wait_for_replication(self.servers, self.cluster)
chosen = RebalanceHelper.pick_nodes(self.master, howmany=self.num_replicas)
for node in chosen:
# let's do op
if failover_reason == 'stop_server':
self.stop_server(node)
log.info("10 seconds delay to wait for membase-server to shutdown")
# wait for 5 minutes until node is down
self.assertTrue(RestHelper(rest).wait_for_node_status(node, "unhealthy", 300),
msg="node status is not unhealthy even after waiting for 5 minutes")
elif failover_reason == "firewall":
server = [srv for srv in self.servers if node.ip == srv.ip][0]
RemoteUtilHelper.enable_firewall(server, bidirectional=self.bidirectional)
status = RestHelper(rest).wait_for_node_status(node, "unhealthy", 300)
if status:
log.info("node {0}:{1} is 'unhealthy' as expected".format(node.ip, node.port))
else:
# verify iptables on the node if something wrong
for server in self.servers:
if server.ip == node.ip:
shell = RemoteMachineShellConnection(server)
info = shell.extract_remote_info()
if info.type.lower() == "windows":
o, r = shell.execute_command("netsh advfirewall show allprofiles")
else:
o, r = shell.execute_command("/sbin/iptables --list")
shell.log_command_output(o, r)
shell.disconnect()
for i in rest.get_logs(): self.log.error(i)
api = rest.baseUrl + 'nodeStatuses'
status, content, header = rest._http_request(api)
json_parsed = json.loads(content)
self.log.info("nodeStatuses: {0}".format(json_parsed))
self.fail("node status is not unhealthy even after waiting for 5 minutes")
failed_over = rest.fail_over(node.id)
if not failed_over:
self.log.info("unable to failover the node the first time. try again in 60 seconds..")
# try again in 75 seconds
time.sleep(75)
failed_over = rest.fail_over(node.id)
self.assertTrue(failed_over, "unable to failover node after {0}".format(failover_reason))
log.info("failed over node : {0}".format(node.id))
self._failed_nodes.append(node)
if self.add_back_flag:
for node in self._failed_nodes:
rest.add_back_node(node.id)
time.sleep(5)
log.info("10 seconds sleep after failover before invoking rebalance...")
time.sleep(10)
rest.rebalance(otpNodes=[node.id for node in nodes],
ejectedNodes=[])
msg = "rebalance failed while removing failover nodes {0}".format(chosen)
self.assertTrue(rest.monitorRebalance(stop_if_loop=True), msg=msg)
else:
# Need a delay > min because MB-7168
log.info("60 seconds sleep after failover before invoking rebalance...")
time.sleep(60)
rest.rebalance(otpNodes=[node.id for node in nodes],
ejectedNodes=[node.id for node in chosen])
if self.during_ops:
self.sleep(5, "Wait for some progress in rebalance")
if self.during_ops == "change_password":
old_pass = self.master.rest_password
self.change_password(new_password=self.input.param("new_password", "new_pass"))
rest = RestConnection(self.master)
elif self.during_ops == "change_port":
self.change_port(new_port=self.input.param("new_port", "9090"))
rest = RestConnection(self.master)
try:
msg = "rebalance failed while removing failover nodes {0}".format(chosen)
self.assertTrue(rest.monitorRebalance(stop_if_loop=True), msg=msg)
for failed in chosen:
for server in _servers_:
if server.ip == failed.ip:
_servers_.remove(server)
self._cleanup_nodes.append(server)
log.info("Begin VERIFICATION ...")
RebalanceHelper.wait_for_replication(_servers_, self.cluster)
self.verify_cluster_stats(_servers_, self.master)
finally:
if self.during_ops:
if self.during_ops == "change_password":
#.........这里部分代码省略.........