pacemaker_cluster: enhancements and add unit tests (#10227)
Some checks are pending
EOL CI / EOL Sanity (Ⓐ2.16) (push) Waiting to run
EOL CI / EOL Units (Ⓐ2.16+py2.7) (push) Waiting to run
EOL CI / EOL Units (Ⓐ2.16+py3.11) (push) Waiting to run
EOL CI / EOL Units (Ⓐ2.16+py3.6) (push) Waiting to run
EOL CI / EOL I (Ⓐ2.16+alpine3+py:azp/posix/1/) (push) Waiting to run
EOL CI / EOL I (Ⓐ2.16+alpine3+py:azp/posix/2/) (push) Waiting to run
EOL CI / EOL I (Ⓐ2.16+alpine3+py:azp/posix/3/) (push) Waiting to run
EOL CI / EOL I (Ⓐ2.16+fedora38+py:azp/posix/1/) (push) Waiting to run
EOL CI / EOL I (Ⓐ2.16+fedora38+py:azp/posix/2/) (push) Waiting to run
EOL CI / EOL I (Ⓐ2.16+fedora38+py:azp/posix/3/) (push) Waiting to run
EOL CI / EOL I (Ⓐ2.16+opensuse15+py:azp/posix/1/) (push) Waiting to run
EOL CI / EOL I (Ⓐ2.16+opensuse15+py:azp/posix/2/) (push) Waiting to run
EOL CI / EOL I (Ⓐ2.16+opensuse15+py:azp/posix/3/) (push) Waiting to run
nox / Run extra sanity tests (push) Waiting to run

* feat(initial): Add unit tests and rewrite pacemaker_cluster

This commit introduces unit tests and pacemaker_cluster module rewrite
to use the pacemaker module utils.

* feat(cleanup): Various fixes and add resource state

This commit migrates the pacemaker_cluster's cleanup state to the
pacemaker_resource module. Additionally, the unit tests for
pacemaker_cluster have been corrected to proper mock run command order.

* doc(botmeta): Add author to pacemaker_cluster

* style(whitespace): Cleanup test files

* refactor(cleanup): Remove unused state value

* bug(fix): Parse apply_all as separate option

* refactor(review): Apply code review suggestions

This commit refactors breaking changes in pacemaker_cluster module into
deprecated features. The following will be scheduled for deprecation:
`state: cleanup` and `state: None`.

* Apply suggestions from code review

Co-authored-by: Felix Fontein <felix@fontein.de>

* refactor(review): Additional review suggestions

* refactor(deprecations): Remove all deprecation changes

* refactor(review): Enhance rename changelog entry and fix empty string logic

* refactor(cleanup): Remove from pacemaker_resource

* Apply suggestions from code review

Co-authored-by: Felix Fontein <felix@fontein.de>

* refactor(review): Add changelog and revert required name

* revert(default): Use default state=present

* Update changelogs/fragments/10227-pacemaker-cluster-and-resource-enhancement.yml

Co-authored-by: Felix Fontein <felix@fontein.de>

* Update changelog fragment.

---------

Co-authored-by: Felix Fontein <felix@fontein.de>
This commit is contained in:
Dexter 2025-07-14 01:48:36 -04:00 committed by GitHub
commit 283d947f17
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
8 changed files with 695 additions and 156 deletions

View file

@ -14,7 +14,12 @@ _state_map = {
"absent": "remove",
"status": "status",
"enabled": "enable",
"disabled": "disable"
"disabled": "disable",
"online": "start",
"offline": "stop",
"maintenance": "set",
"config": "config",
"cleanup": "cleanup",
}
@ -38,20 +43,19 @@ def fmt_resource_argument(value):
def get_pacemaker_maintenance_mode(runner):
with runner("config") as ctx:
rc, out, err = ctx.run()
with runner("cli_action config") as ctx:
rc, out, err = ctx.run(cli_action="property")
maintenance_mode_output = list(filter(lambda string: "maintenance-mode=true" in string.lower(), out.splitlines()))
return bool(maintenance_mode_output)
def pacemaker_runner(module, cli_action=None, **kwargs):
def pacemaker_runner(module, **kwargs):
runner_command = ['pcs']
if cli_action:
runner_command.append(cli_action)
runner = CmdRunner(
module,
command=runner_command,
arg_formats=dict(
cli_action=cmd_runner_fmt.as_list(),
state=cmd_runner_fmt.as_map(_state_map),
name=cmd_runner_fmt.as_list(),
resource_type=cmd_runner_fmt.as_func(fmt_resource_type),
@ -59,6 +63,7 @@ def pacemaker_runner(module, cli_action=None, **kwargs):
resource_operation=cmd_runner_fmt.as_func(fmt_resource_operation),
resource_meta=cmd_runner_fmt.stack(cmd_runner_fmt.as_opt_val)("meta"),
resource_argument=cmd_runner_fmt.as_func(fmt_resource_argument),
apply_all=cmd_runner_fmt.as_bool("--all"),
wait=cmd_runner_fmt.as_opt_eq_val("--wait"),
config=cmd_runner_fmt.as_fixed("config"),
force=cmd_runner_fmt.as_bool("--force"),

View file

@ -13,6 +13,7 @@ module: pacemaker_cluster
short_description: Manage pacemaker clusters
author:
- Mathieu Bultel (@matbu)
- Dexter Le (@munchtoast)
description:
- This module can manage a pacemaker cluster and nodes from Ansible using the pacemaker CLI.
extends_documentation_fragment:
@ -26,18 +27,20 @@ options:
state:
description:
- Indicate desired state of the cluster.
choices: [cleanup, offline, online, restart]
- The value V(maintenance) has been added in community.general 11.1.0.
choices: [cleanup, offline, online, restart, maintenance]
type: str
node:
name:
description:
- Specify which node of the cluster you want to manage. V(null) == the cluster status itself, V(all) == check the status
of all nodes.
type: str
aliases: ['node']
timeout:
description:
- Timeout when the module should considered that the action has failed.
default: 300
- Timeout period (in seconds) for polling the cluster operation.
type: int
default: 300
force:
description:
- Force the change of the cluster state.
@ -63,132 +66,104 @@ out:
returned: always
"""
import time
from ansible.module_utils.basic import AnsibleModule
from ansible_collections.community.general.plugins.module_utils.module_helper import StateModuleHelper
from ansible_collections.community.general.plugins.module_utils.pacemaker import pacemaker_runner, get_pacemaker_maintenance_mode
_PCS_CLUSTER_DOWN = "Error: cluster is not currently running on this node"
class PacemakerCluster(StateModuleHelper):
module = dict(
argument_spec=dict(
state=dict(type='str', choices=[
'cleanup', 'offline', 'online', 'restart', 'maintenance']),
name=dict(type='str', aliases=['node']),
timeout=dict(type='int', default=300),
force=dict(type='bool', default=True)
),
supports_check_mode=True,
)
default_state = ""
def __init_module__(self):
self.runner = pacemaker_runner(self.module)
self.vars.set('apply_all', True if not self.module.params['name'] else False)
get_args = dict([('cli_action', 'cluster'), ('state', 'status'), ('name', None), ('apply_all', self.vars.apply_all)])
if self.module.params['state'] == "maintenance":
get_args['cli_action'] = "property"
get_args['state'] = "config"
get_args['name'] = "maintenance-mode"
elif self.module.params['state'] == "cleanup":
get_args['cli_action'] = "resource"
get_args['name'] = self.module.params['name']
def get_cluster_status(module):
cmd = ["pcs", "cluster", "status"]
rc, out, err = module.run_command(cmd)
if out in _PCS_CLUSTER_DOWN:
return 'offline'
else:
return 'online'
self.vars.set('get_args', get_args)
self.vars.set('previous_value', self._get()['out'])
self.vars.set('value', self.vars.previous_value, change=True, diff=True)
if not self.module.params['state']:
self.module.deprecate(
'Parameter "state" values not set is being deprecated. Make sure to provide a value for "state"',
version='12.0.0',
collection_name='community.general'
)
def get_node_status(module, node='all'):
node_l = ["all"] if node == "all" else []
cmd = ["pcs", "cluster", "pcsd-status"] + node_l
rc, out, err = module.run_command(cmd)
if rc == 1:
module.fail_json(msg="Command execution failed.\nCommand: `%s`\nError: %s" % (cmd, err))
status = []
for o in out.splitlines():
status.append(o.split(':'))
return status
def __quit_module__(self):
self.vars.set('value', self._get()['out'])
def _process_command_output(self, fail_on_err, ignore_err_msg=""):
def process(rc, out, err):
if fail_on_err and rc != 0 and err and ignore_err_msg not in err:
self.do_raise('pcs failed with error (rc={0}): {1}'.format(rc, err))
out = out.rstrip()
return None if out == "" else out
return process
def clean_cluster(module, timeout):
cmd = ["pcs", "resource", "cleanup"]
rc, out, err = module.run_command(cmd)
if rc == 1:
module.fail_json(msg="Command execution failed.\nCommand: `%s`\nError: %s" % (cmd, err))
def _get(self):
with self.runner('cli_action state name') as ctx:
result = ctx.run(cli_action=self.vars.get_args['cli_action'], state=self.vars.get_args['state'], name=self.vars.get_args['name'])
return dict([('rc', result[0]),
('out', result[1] if result[1] != "" else None),
('err', result[2])])
def state_cleanup(self):
with self.runner('cli_action state name', output_process=self._process_command_output(True, "Fail"), check_mode_skip=True) as ctx:
ctx.run(cli_action='resource')
def set_cluster(module, state, timeout, force):
if state == 'online':
cmd = ["pcs", "cluster", "start"]
if state == 'offline':
cmd = ["pcs", "cluster", "stop"]
if force:
cmd = cmd + ["--force"]
rc, out, err = module.run_command(cmd)
if rc == 1:
module.fail_json(msg="Command execution failed.\nCommand: `%s`\nError: %s" % (cmd, err))
def state_offline(self):
with self.runner('cli_action state name apply_all wait',
output_process=self._process_command_output(True, "not currently running"),
check_mode_skip=True) as ctx:
ctx.run(cli_action='cluster', apply_all=self.vars.apply_all, wait=self.module.params['timeout'])
t = time.time()
ready = False
while time.time() < t + timeout:
cluster_state = get_cluster_status(module)
if cluster_state == state:
ready = True
break
if not ready:
module.fail_json(msg="Failed to set the state `%s` on the cluster\n" % (state))
def state_online(self):
with self.runner('cli_action state name apply_all wait',
output_process=self._process_command_output(True, "currently running"),
check_mode_skip=True) as ctx:
ctx.run(cli_action='cluster', apply_all=self.vars.apply_all, wait=self.module.params['timeout'])
if get_pacemaker_maintenance_mode(self.runner):
with self.runner('cli_action state name', output_process=self._process_command_output(True, "Fail"), check_mode_skip=True) as ctx:
ctx.run(cli_action='property', state='maintenance', name='maintenance-mode=false')
def state_maintenance(self):
with self.runner('cli_action state name',
output_process=self._process_command_output(True, "Fail"),
check_mode_skip=True) as ctx:
ctx.run(cli_action='property', name='maintenance-mode=true')
def state_restart(self):
with self.runner('cli_action state name apply_all wait',
output_process=self._process_command_output(True, "not currently running"),
check_mode_skip=True) as ctx:
ctx.run(cli_action='cluster', state='offline', apply_all=self.vars.apply_all, wait=self.module.params['timeout'])
ctx.run(cli_action='cluster', state='online', apply_all=self.vars.apply_all, wait=self.module.params['timeout'])
if get_pacemaker_maintenance_mode(self.runner):
with self.runner('cli_action state name', output_process=self._process_command_output(True, "Fail"), check_mode_skip=True) as ctx:
ctx.run(cli_action='property', state='maintenance', name='maintenance-mode=false')
def main():
argument_spec = dict(
state=dict(type='str', choices=['online', 'offline', 'restart', 'cleanup']),
node=dict(type='str'),
timeout=dict(type='int', default=300),
force=dict(type='bool', default=True),
)
module = AnsibleModule(
argument_spec,
supports_check_mode=True,
)
changed = False
state = module.params['state']
node = module.params['node']
force = module.params['force']
timeout = module.params['timeout']
if state in ['online', 'offline']:
# Get cluster status
if node is None:
cluster_state = get_cluster_status(module)
if cluster_state == state:
module.exit_json(changed=changed, out=cluster_state)
else:
if module.check_mode:
module.exit_json(changed=True)
set_cluster(module, state, timeout, force)
cluster_state = get_cluster_status(module)
if cluster_state == state:
module.exit_json(changed=True, out=cluster_state)
else:
module.fail_json(msg="Fail to bring the cluster %s" % state)
else:
cluster_state = get_node_status(module, node)
# Check cluster state
for node_state in cluster_state:
if node_state[1].strip().lower() == state:
module.exit_json(changed=changed, out=cluster_state)
else:
if module.check_mode:
module.exit_json(changed=True)
# Set cluster status if needed
set_cluster(module, state, timeout, force)
cluster_state = get_node_status(module, node)
module.exit_json(changed=True, out=cluster_state)
elif state == 'restart':
if module.check_mode:
module.exit_json(changed=True)
set_cluster(module, 'offline', timeout, force)
cluster_state = get_cluster_status(module)
if cluster_state == 'offline':
set_cluster(module, 'online', timeout, force)
cluster_state = get_cluster_status(module)
if cluster_state == 'online':
module.exit_json(changed=True, out=cluster_state)
else:
module.fail_json(msg="Failed during the restart of the cluster, the cluster cannot be started")
else:
module.fail_json(msg="Failed during the restart of the cluster, the cluster cannot be stopped")
elif state == 'cleanup':
if module.check_mode:
module.exit_json(changed=True)
clean_cluster(module, timeout)
cluster_state = get_cluster_status(module)
module.exit_json(changed=True, out=cluster_state)
PacemakerCluster.execute()
if __name__ == '__main__':

View file

@ -163,13 +163,15 @@ class PacemakerResource(StateModuleHelper):
required_if=[('state', 'present', ['resource_type', 'resource_option'])],
supports_check_mode=True,
)
default_state = "present"
def __init_module__(self):
self.runner = pacemaker_runner(self.module, cli_action='resource')
self._maintenance_mode_runner = pacemaker_runner(self.module, cli_action='property')
self.vars.set('previous_value', self._get())
self.runner = pacemaker_runner(self.module)
self.vars.set('previous_value', self._get()['out'])
self.vars.set('value', self.vars.previous_value, change=True, diff=True)
self.module.params['name'] = self.module.params['name'] or None
def __quit_module__(self):
self.vars.set('value', self._get()['out'])
def _process_command_output(self, fail_on_err, ignore_err_msg=""):
def process(rc, out, err):
@ -180,45 +182,31 @@ class PacemakerResource(StateModuleHelper):
return process
def _get(self):
with self.runner('state name', output_process=self._process_command_output(False)) as ctx:
return ctx.run(state='status')
with self.runner('cli_action state name') as ctx:
result = ctx.run(cli_action="resource", state='status')
return dict([('rc', result[0]),
('out', result[1] if result[1] != "" else None),
('err', result[2])])
def state_absent(self):
runner_args = ['state', 'name', 'force']
force = get_pacemaker_maintenance_mode(self._maintenance_mode_runner)
with self.runner(runner_args, output_process=self._process_command_output(True, "does not exist"), check_mode_skip=True) as ctx:
ctx.run(force=force)
self.vars.set('value', self._get())
self.vars.stdout = ctx.results_out
self.vars.stderr = ctx.results_err
self.vars.cmd = ctx.cmd
force = get_pacemaker_maintenance_mode(self.runner)
with self.runner('cli_action state name force', output_process=self._process_command_output(True, "does not exist"), check_mode_skip=True) as ctx:
ctx.run(cli_action='resource', force=force)
def state_present(self):
with self.runner(
'state name resource_type resource_option resource_operation resource_meta resource_argument wait',
output_process=self._process_command_output(not get_pacemaker_maintenance_mode(self._maintenance_mode_runner), "already exists"),
'cli_action state name resource_type resource_option resource_operation resource_meta resource_argument wait',
output_process=self._process_command_output(not get_pacemaker_maintenance_mode(self.runner), "already exists"),
check_mode_skip=True) as ctx:
ctx.run()
self.vars.set('value', self._get())
self.vars.stdout = ctx.results_out
self.vars.stderr = ctx.results_err
self.vars.cmd = ctx.cmd
ctx.run(cli_action='resource')
def state_enabled(self):
with self.runner('state name', output_process=self._process_command_output(True, "Starting"), check_mode_skip=True) as ctx:
ctx.run()
self.vars.set('value', self._get())
self.vars.stdout = ctx.results_out
self.vars.stderr = ctx.results_err
self.vars.cmd = ctx.cmd
with self.runner('cli_action state name', output_process=self._process_command_output(True, "Starting"), check_mode_skip=True) as ctx:
ctx.run(cli_action='resource')
def state_disabled(self):
with self.runner('state name', output_process=self._process_command_output(True, "Stopped"), check_mode_skip=True) as ctx:
ctx.run()
self.vars.set('value', self._get())
self.vars.stdout = ctx.results_out
self.vars.stderr = ctx.results_err
self.vars.cmd = ctx.cmd
with self.runner('cli_action state name', output_process=self._process_command_output(True, "Stopped"), check_mode_skip=True) as ctx:
ctx.run(cli_action='resource')
def main():