Skip to content
Snippets Groups Projects
Commit 7e1c5cf9 authored by Emmanuel Cohen's avatar Emmanuel Cohen
Browse files

double failover test using postgres-maintenance role, refs #33389

parent 9bafd7ac
No related branches found
No related tags found
No related merge requests found
Showing
with 147 additions and 202 deletions
......@@ -103,7 +103,7 @@ stop-ovh-preprod:
script:
- make stop-preprod-ovh
test-ha-pgsql:
test-pgsql-ha:
image: registry.ubicast.net/mediaserver/envsetup:root
stage: test-pgsql-ha
tags:
......@@ -115,7 +115,7 @@ test-ha-pgsql:
changes:
- ansible/**/*
script:
- make test ha-pgsql=1
- make test pgsql-ha=1
test-mediaserver-ha:
image: registry.ubicast.net/mediaserver/envsetup:root
......
......@@ -18,8 +18,11 @@ endif
ifdef keep
MOLECULE_TEST_FLAGS += --destroy=never --parallel
endif
ifdef ha-pgsql
MOLECULE_TEST_FLAGS += --scenario-name ha-pgsql
ifdef pgsql-ha
MOLECULE_TEST_FLAGS += --scenario-name pgsql-ha
endif
ifdef mediaserver-ha
MOLECULE_TEST_FLAGS += --scenario-name mediaserver-ha
endif
ifdef mediaserver-ha
MOLECULE_TEST_FLAGS += --scenario-name mediaserver-ha
......@@ -65,7 +68,7 @@ lint:
ANSIBLE_CONFIG=$(ANSIBLE_CONFIG) $(ANSIBLE_LINT_BIN) ansible/playbooks/site.yml
.PHONY: test
## test: Run development tests on the project : debug=1, keep=1, SKYREACH_SYSTEM_KEY=<xxx>, ha-pgsql=1
## test: Run development tests on the project : debug=1, keep=1, SKYREACH_SYSTEM_KEY=<xxx>, pgsql-ha=1
test:
cd ansible; molecule $(MOLECULE_FLAGS) test $(MOLECULE_TEST_FLAGS)
......
import os
import testinfra.utils.ansible_runner
import time
hosts = testinfra.utils.ansible_runner.AnsibleRunner(os.environ["MOLECULE_INVENTORY_FILE"]).get_hosts("postgres")
testinfra_hosts = [i for i in hosts if i.startswith('db0')]
def test_postgresql_check_shutdown_primary(host):
''' Shutdown the primary server '''
s = host.ansible("command", "systemctl stop postgresql", become=True, check=False)
assert s['changed']
time.sleep(40)
s = host.socket("tcp://127.0.0.1:5432")
assert not s.is_listening
import os
import testinfra.utils.ansible_runner
import commons
# /!\ This test run accross all servers
testinfra_hosts = testinfra.utils.ansible_runner.AnsibleRunner(os.environ["MOLECULE_INVENTORY_FILE"]).get_hosts("postgres")
def test_postgresql_check_repmgr_new_master(host):
''' check repmgr status for each node after new master election '''
if host.ansible.get_variables()["inventory_hostname"].startswith("db0-default"):
data = commons.get_status(host)
assert data == "fenced"
if host.ansible.get_variables()["inventory_hostname"].startswith("db1-default"):
data = commons.get_status(host)
assert data == "primary"
if host.ansible.get_variables()["inventory_hostname"].startswith("db2-default"):
data = commons.get_status(host)
assert data == "witness"
import os
import testinfra.utils.ansible_runner
hosts = testinfra.utils.ansible_runner.AnsibleRunner(os.environ["MOLECULE_INVENTORY_FILE"]).get_hosts("postgres")
testinfra_hosts = [i for i in hosts if i.startswith('db1')]
def test_postgresql_insert_new_master(host):
s = host.ansible("postgresql_query", "db=test query='INSERT INTO test_ha (name) VALUES (\'test2\');'", become=True, check=False, become_user='postgres')
assert s["changed"]
import os
import testinfra.utils.ansible_runner
import time
hosts = testinfra.utils.ansible_runner.AnsibleRunner(os.environ["MOLECULE_INVENTORY_FILE"]).get_hosts("postgres")
testinfra_hosts = [i for i in hosts if i.startswith('db0')]
def test_postgresql_delete_data(host):
''' delete data directory '''
s = host.ansible("command", "rm -rf /var/lib/postgresql/11/main/", become=True, check=False)
assert s['changed']
def test_postgresql_launch_repmgr_sync(host):
''' sync data with primary server using repmgr '''
current_m = [i for i in hosts if i.startswith('db1')]
# print("current master" + current_m[0])
current_master = testinfra.utils.ansible_runner.AnsibleRunner(os.environ["MOLECULE_INVENTORY_FILE"]).get_host(current_m[0])
# current_master = testinfra.utils.ansible_runner.AnsibleRunner(os.environ["MOLECULE_INVENTORY_FILE"]).get_host("db1-default")
# print(current_master)
current_master_ip = current_master.interface('eth0').addresses[0]
rep_mgr_command = "repmgr -f /etc/postgresql/11/main/repmgr.conf --force --verbose standby clone -h " + str(current_master_ip) + " -d repmgr -U repmgr -c"
s = host.ansible("command", rep_mgr_command, become=True, become_user='postgres', check=False)
assert s['changed']
def test_postgresql_start_postgresql(host):
''' start postgresql '''
s = host.ansible("command", "systemctl start postgresql", become=True, check=False)
time.sleep(20)
assert s['changed']
def test_pogresql_register_as_standby(host):
''' register server as standby in repmgr '''
s = host.ansible("command", "repmgr -f /etc/postgresql/11/main/repmgr.conf --force --verbose standby register", become=True, become_user='postgres', check=False)
time.sleep(20)
assert s['changed']
import os
import testinfra.utils.ansible_runner
import commons
# /!\ This test run accross all servers
testinfra_hosts = testinfra.utils.ansible_runner.AnsibleRunner(os.environ["MOLECULE_INVENTORY_FILE"]).get_hosts("postgres")
def test_postgresql_check_status_after_shutdown(host):
''' check repmgr status accross server after primary change and server reintegration '''
if host.ansible.get_variables()["inventory_hostname"].startswith("db0"):
data = commons.get_status(host)
assert data == "standby"
if host.ansible.get_variables()["inventory_hostname"].startswith("db1"):
data = commons.get_status(host)
assert data == "primary"
if host.ansible.get_variables()["inventory_hostname"].startswith("db2"):
data = commons.get_status(host)
assert data == "witness"
......@@ -49,6 +49,8 @@ provisioner:
ANSIBLE_ACTION_PLUGINS: ../../plugins/action
ANSIBLE_PYTHON_INTERPRETER: /usr/bin/python3
SKYREACH_SYSTEM_KEY: s1121eb6e7593525bf3e0302586c82d2
playbooks:
side_effect: side_effect.yml
verifier:
name: testinfra
# options:
......
---
- name: first failover
import_playbook: side_effect_destroy_master.yml
# Refresh inventory to have a clean inventory (each server in the initial group)
- name: refresh inventory
hosts: all
tasks:
- name: refresh inventory
meta: refresh_inventory
- name: second failover
import_playbook: side_effect_destroy_master.yml
---
- name: stop master node
hosts: postgres[0]
tasks:
- name: stop postgresql
service:
name: postgresql@11-main
state: stopped
- name: pause for 45 seconds
hosts: localhost
tasks:
- pause:
seconds: 45
- name: set correct host to correct groups
hosts: postgres
tasks:
- name: add db0 to fenced group
add_host:
name: "{{ groups['postgres'][0] }}"
groups: [ 'postgres_fenced', 'postgres_standby' ]
- name: add db1 to primary group
add_host:
name: "{{ groups['postgres'][1] }}"
groups: postgres_primary
# db0 is now fenced, import playbook to switch it back to standby
- import_playbook: ../../playbooks/postgres-maintenance/rephacheck_status.yml
- import_playbook: ../../playbooks/postgres-maintenance/fenced_to_standby.yml
- name: pause for 45 seconds
hosts: localhost
tasks:
- pause:
seconds: 45
# db0 is now standby, import playbook to switch it back as primary
- import_playbook: ../../playbooks/postgres-maintenance/rephacheck_status.yml
- import_playbook: ../../playbooks/postgres-maintenance/standby_to_primary.yml
- name: pause for 45 seconds
hosts: localhost
tasks:
- pause:
seconds: 45
# db0 is now primary, restart repmgrd
- import_playbook: ../../playbooks/postgres-maintenance/rephacheck_status.yml
- import_playbook: ../../playbooks/postgres-maintenance/restart_repmgrd.yml
...
#!/usr/bin/env ansible-playbook
---
- name: GATHER FACTS
hosts: postgres_primary:postgres_standby:postgres_fenced
tags: always
tasks:
- name: get cluster state
command: "rephacheck"
register: rephacheck
- name: show status for each node
debug:
msg: "Current node {{ ansible_hostname }} status {{ rephacheck['stdout'] }}"
when: rephacheck['stdout'] != ""
- import_playbook: postgres-maintenance/rephacheck_status.yml
tags: [ 'always' ]
- name: POSTGRESQL SWITCH CURRENT STANDBY TO PRIMARY
hosts: postgres_standby
tags: [ 'never', 'standby-to-primary' ]
tasks:
- name: fail if node status if not standby
fail:
msg: "Current status {{ rephacheck['stdout'] }} must be standby."
when: rephacheck['stdout'] != "standby"
- name: check if node is currently in standby
command: "repmgr standby switchover -f /etc/postgresql/11/main/repmgr.conf --siblings-follow --dry-run"
become: true
become_user: postgres
when: rephacheck['stdout'] == "standby"
register: standby_dry_run
- name: switch standby node to primary
command: "repmgr standby switchover -f /etc/postgresql/11/main/repmgr.conf --siblings-follow"
become: true
become_user: postgres
when:
- standby_dry_run is succeeded
- rephacheck['stdout'] == "standby"
- name: POSTGRESQL SWITCH CURRENT FENCED TO STANDBY
hosts: postgres_fenced
- import_playbook: postgres-maintenance/fenced_to_standby.yml
tags: [ 'never', 'fenced-to-standby' ]
tasks:
- name: fail if node status if not fenced
fail:
msg: "Current status {{ rephacheck['stdout'] }} must be fenced."
when: rephacheck['stdout'] != "fenced"
- name: stop postgresql
systemd:
name: postgresql
state: stopped
- name: delete postgresql data directory
file:
path: /var/lib/postgresql/11/main/
state: absent
force: true
- name: copy data from primary
command: "repmgr -f /etc/postgresql/11/main/repmgr.conf --force --verbose standby clone -h {{ hostvars[groups['postgres_primary'][0]]['ansible_default_ipv4']['address'] }} -d repmgr -U repmgr -c"
become: true
become_user: postgres
register: copy_from_primary
- name: start postgresql
systemd:
name: postgresql
state: started
when: copy_from_primary is succeeded
- name: register node as standby
command: "repmgr -f /etc/postgresql/11/main/repmgr.conf --force --verbose standby register"
become: true
become_user: postgres
when: copy_from_primary is succeeded
- name: REPMGRD RESTART
hosts: postgres
- import_playbook: postgres-maintenance/standby_to_primary.yml
tags: [ 'never', 'standby-to-primary' ]
- import_playbook: postgres-maintenance/restart_repmgrd.yml
tags: [ 'never', 'restart-repmgrd', 'standby-to-primary' ]
tasks:
- name: kill repmgrd
command: "pkill repmgrd"
ignore_errors: true
- name: restart repmgrd
systemd:
name: repmgrd
state: restarted
...
#!/usr/bin/env ansible-playbook
---
- name: POSTGRESQL SWITCH CURRENT FENCED TO STANDBY
hosts: postgres_fenced
tasks:
- name: fail if node status if not fenced
fail:
msg: "Current status {{ rephacheck['stdout'] }} must be fenced."
when: rephacheck['stdout'] != "fenced"
- name: stop postgresql
systemd:
name: postgresql
state: stopped
- name: delete postgresql data directory
file:
path: /var/lib/postgresql/11/main/
state: absent
force: true
- name: copy data from primary
command: "repmgr -f /etc/postgresql/11/main/repmgr.conf --force --verbose standby clone -h {{ hostvars[groups['postgres_primary'][0]]['ansible_default_ipv4']['address'] }} -d repmgr -U repmgr -c"
become: true
become_user: postgres
register: copy_from_primary
- name: start postgresql
systemd:
name: postgresql
state: started
when: copy_from_primary is succeeded
- name: register node as standby
command: "repmgr -f /etc/postgresql/11/main/repmgr.conf --force --verbose standby register"
become: true
become_user: postgres
when: copy_from_primary is succeeded
...
#!/usr/bin/env ansible-playbook
---
- name: REHACHECK STATUS
hosts: postgres_primary:postgres_standby:postgres_fenced
tasks:
- name: get cluster state
command: "rephacheck"
register: rephacheck
- name: show status for each node
debug:
msg: "Current node {{ ansible_hostname }} status {{ rephacheck['stdout'] }}"
when: rephacheck['stdout'] != ""
...
#!/usr/bin/env ansible-playbook
---
- name: REPMGRD RESTART
hosts: postgres
tasks:
- name: kill repmgrd
command: "pkill repmgrd"
ignore_errors: true
- name: restart repmgrd
systemd:
name: repmgrd
state: restarted
...
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment