diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 8dd3aa963427f3db5aab9ca74da11968012865cc..0d2273453184e04db52e9c441dae76cd99c4f457 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -103,7 +103,7 @@ stop-ovh-preprod: script: - make stop-preprod-ovh -test-ha-pgsql: +test-pgsql-ha: image: registry.ubicast.net/mediaserver/envsetup:root stage: test-pgsql-ha tags: @@ -115,7 +115,7 @@ test-ha-pgsql: changes: - ansible/**/* script: - - make test ha-pgsql=1 + - make test pgsql-ha=1 test-mediaserver-ha: image: registry.ubicast.net/mediaserver/envsetup:root diff --git a/Makefile b/Makefile index 29c61748afa1c9c00ac24401e034e065268f3b27..e32e7bb7d56a03e65214af102e1d61d41f2c8a5a 100644 --- a/Makefile +++ b/Makefile @@ -18,8 +18,11 @@ endif ifdef keep MOLECULE_TEST_FLAGS += --destroy=never --parallel endif -ifdef ha-pgsql - MOLECULE_TEST_FLAGS += --scenario-name ha-pgsql +ifdef pgsql-ha + MOLECULE_TEST_FLAGS += --scenario-name pgsql-ha +endif +ifdef mediaserver-ha + MOLECULE_TEST_FLAGS += --scenario-name mediaserver-ha endif ifdef mediaserver-ha MOLECULE_TEST_FLAGS += --scenario-name mediaserver-ha @@ -65,7 +68,7 @@ lint: ANSIBLE_CONFIG=$(ANSIBLE_CONFIG) $(ANSIBLE_LINT_BIN) ansible/playbooks/site.yml .PHONY: test -## test: Run development tests on the project : debug=1, keep=1, SKYREACH_SYSTEM_KEY=<xxx>, ha-pgsql=1 +## test: Run development tests on the project : debug=1, keep=1, SKYREACH_SYSTEM_KEY=<xxx>, pgsql-ha=1 test: cd ansible; molecule $(MOLECULE_FLAGS) test $(MOLECULE_TEST_FLAGS) diff --git a/ansible/molecule/ha-pgsql/tests/test_postgres_e_shutdown_primary.py b/ansible/molecule/ha-pgsql/tests/test_postgres_e_shutdown_primary.py deleted file mode 100644 index 919ea2dd3a0fdddaf7793d60e2eecd04eb486387..0000000000000000000000000000000000000000 --- a/ansible/molecule/ha-pgsql/tests/test_postgres_e_shutdown_primary.py +++ /dev/null @@ -1,21 +0,0 @@ -import os - -import testinfra.utils.ansible_runner - -import time - - -hosts = testinfra.utils.ansible_runner.AnsibleRunner(os.environ["MOLECULE_INVENTORY_FILE"]).get_hosts("postgres") -testinfra_hosts = [i for i in hosts if i.startswith('db0')] - - -def test_postgresql_check_shutdown_primary(host): - ''' Shutdown the primary server ''' - - s = host.ansible("command", "systemctl stop postgresql", become=True, check=False) - assert s['changed'] - - time.sleep(40) - - s = host.socket("tcp://127.0.0.1:5432") - assert not s.is_listening diff --git a/ansible/molecule/ha-pgsql/tests/test_postgres_f_new_primary_cluster_status.py b/ansible/molecule/ha-pgsql/tests/test_postgres_f_new_primary_cluster_status.py deleted file mode 100644 index 9f9c8edb95a0391f2703739aad596b9042e2c391..0000000000000000000000000000000000000000 --- a/ansible/molecule/ha-pgsql/tests/test_postgres_f_new_primary_cluster_status.py +++ /dev/null @@ -1,22 +0,0 @@ -import os - -import testinfra.utils.ansible_runner - -import commons - -# /!\ This test run accross all servers -testinfra_hosts = testinfra.utils.ansible_runner.AnsibleRunner(os.environ["MOLECULE_INVENTORY_FILE"]).get_hosts("postgres") - - -def test_postgresql_check_repmgr_new_master(host): - ''' check repmgr status for each node after new master election ''' - - if host.ansible.get_variables()["inventory_hostname"].startswith("db0-default"): - data = commons.get_status(host) - assert data == "fenced" - if host.ansible.get_variables()["inventory_hostname"].startswith("db1-default"): - data = commons.get_status(host) - assert data == "primary" - if host.ansible.get_variables()["inventory_hostname"].startswith("db2-default"): - data = commons.get_status(host) - assert data == "witness" diff --git a/ansible/molecule/ha-pgsql/tests/test_postgres_g_new_primary_write.py b/ansible/molecule/ha-pgsql/tests/test_postgres_g_new_primary_write.py deleted file mode 100644 index 4aa79ed3b72365bc29a7774f4b496b82120c0178..0000000000000000000000000000000000000000 --- a/ansible/molecule/ha-pgsql/tests/test_postgres_g_new_primary_write.py +++ /dev/null @@ -1,12 +0,0 @@ -import os - -import testinfra.utils.ansible_runner - - -hosts = testinfra.utils.ansible_runner.AnsibleRunner(os.environ["MOLECULE_INVENTORY_FILE"]).get_hosts("postgres") -testinfra_hosts = [i for i in hosts if i.startswith('db1')] - - -def test_postgresql_insert_new_master(host): - s = host.ansible("postgresql_query", "db=test query='INSERT INTO test_ha (name) VALUES (\'test2\');'", become=True, check=False, become_user='postgres') - assert s["changed"] diff --git a/ansible/molecule/ha-pgsql/tests/test_postgres_h_reintegrate_server.py b/ansible/molecule/ha-pgsql/tests/test_postgres_h_reintegrate_server.py deleted file mode 100644 index 17e3b486e5aa9f46fa1f1e78236876ad0bb5b845..0000000000000000000000000000000000000000 --- a/ansible/molecule/ha-pgsql/tests/test_postgres_h_reintegrate_server.py +++ /dev/null @@ -1,46 +0,0 @@ -import os - -import testinfra.utils.ansible_runner -import time - -hosts = testinfra.utils.ansible_runner.AnsibleRunner(os.environ["MOLECULE_INVENTORY_FILE"]).get_hosts("postgres") - - -testinfra_hosts = [i for i in hosts if i.startswith('db0')] - - -def test_postgresql_delete_data(host): - ''' delete data directory ''' - - s = host.ansible("command", "rm -rf /var/lib/postgresql/11/main/", become=True, check=False) - assert s['changed'] - - -def test_postgresql_launch_repmgr_sync(host): - ''' sync data with primary server using repmgr ''' - - current_m = [i for i in hosts if i.startswith('db1')] - #Â print("current master" + current_m[0]) - current_master = testinfra.utils.ansible_runner.AnsibleRunner(os.environ["MOLECULE_INVENTORY_FILE"]).get_host(current_m[0]) - #Â current_master = testinfra.utils.ansible_runner.AnsibleRunner(os.environ["MOLECULE_INVENTORY_FILE"]).get_host("db1-default") - #Â print(current_master) - current_master_ip = current_master.interface('eth0').addresses[0] - rep_mgr_command = "repmgr -f /etc/postgresql/11/main/repmgr.conf --force --verbose standby clone -h " + str(current_master_ip) + " -d repmgr -U repmgr -c" - s = host.ansible("command", rep_mgr_command, become=True, become_user='postgres', check=False) - assert s['changed'] - - -def test_postgresql_start_postgresql(host): - ''' start postgresql ''' - - s = host.ansible("command", "systemctl start postgresql", become=True, check=False) - time.sleep(20) - assert s['changed'] - - -def test_pogresql_register_as_standby(host): - ''' register server as standby in repmgr ''' - - s = host.ansible("command", "repmgr -f /etc/postgresql/11/main/repmgr.conf --force --verbose standby register", become=True, become_user='postgres', check=False) - time.sleep(20) - assert s['changed'] diff --git a/ansible/molecule/ha-pgsql/tests/test_postgres_i_final_cluster_status.py b/ansible/molecule/ha-pgsql/tests/test_postgres_i_final_cluster_status.py deleted file mode 100644 index 558b40282dfbcb3eca4d6f535416f20cf1ad8747..0000000000000000000000000000000000000000 --- a/ansible/molecule/ha-pgsql/tests/test_postgres_i_final_cluster_status.py +++ /dev/null @@ -1,22 +0,0 @@ -import os - -import testinfra.utils.ansible_runner - -import commons - -# /!\ This test run accross all servers -testinfra_hosts = testinfra.utils.ansible_runner.AnsibleRunner(os.environ["MOLECULE_INVENTORY_FILE"]).get_hosts("postgres") - - -def test_postgresql_check_status_after_shutdown(host): - ''' check repmgr status accross server after primary change and server reintegration ''' - - if host.ansible.get_variables()["inventory_hostname"].startswith("db0"): - data = commons.get_status(host) - assert data == "standby" - if host.ansible.get_variables()["inventory_hostname"].startswith("db1"): - data = commons.get_status(host) - assert data == "primary" - if host.ansible.get_variables()["inventory_hostname"].startswith("db2"): - data = commons.get_status(host) - assert data == "witness" diff --git a/ansible/molecule/ha-pgsql/converge.yml b/ansible/molecule/pgsql-ha/converge.yml similarity index 100% rename from ansible/molecule/ha-pgsql/converge.yml rename to ansible/molecule/pgsql-ha/converge.yml diff --git a/ansible/molecule/ha-pgsql/molecule.yml b/ansible/molecule/pgsql-ha/molecule.yml similarity index 96% rename from ansible/molecule/ha-pgsql/molecule.yml rename to ansible/molecule/pgsql-ha/molecule.yml index 06ab89f35a721ddca4396758922eec7546bd475e..5fd1e88a0804efb21f4587b44cf5f0e4cef2c28d 100644 --- a/ansible/molecule/ha-pgsql/molecule.yml +++ b/ansible/molecule/pgsql-ha/molecule.yml @@ -49,6 +49,8 @@ provisioner: ANSIBLE_ACTION_PLUGINS: ../../plugins/action ANSIBLE_PYTHON_INTERPRETER: /usr/bin/python3 SKYREACH_SYSTEM_KEY: s1121eb6e7593525bf3e0302586c82d2 + playbooks: + side_effect: side_effect.yml verifier: name: testinfra #Â options: diff --git a/ansible/molecule/pgsql-ha/side_effect.yml b/ansible/molecule/pgsql-ha/side_effect.yml new file mode 100644 index 0000000000000000000000000000000000000000..b66b60b2f4648862d0e5a33e9dc04af655c8ffc6 --- /dev/null +++ b/ansible/molecule/pgsql-ha/side_effect.yml @@ -0,0 +1,13 @@ +--- +- name: first failover + import_playbook: side_effect_destroy_master.yml + +# Refresh inventory to have a clean inventory (each server in the initial group) +- name: refresh inventory + hosts: all + tasks: + - name: refresh inventory + meta: refresh_inventory + +- name: second failover + import_playbook: side_effect_destroy_master.yml diff --git a/ansible/molecule/pgsql-ha/side_effect_destroy_master.yml b/ansible/molecule/pgsql-ha/side_effect_destroy_master.yml new file mode 100644 index 0000000000000000000000000000000000000000..5332953fe58bba9bc012f18b8b84401e5e0f692f --- /dev/null +++ b/ansible/molecule/pgsql-ha/side_effect_destroy_master.yml @@ -0,0 +1,53 @@ +--- +- name: stop master node + hosts: postgres[0] + tasks: + - name: stop postgresql + service: + name: postgresql@11-main + state: stopped + +- name: pause for 45 seconds + hosts: localhost + tasks: + - pause: + seconds: 45 + +- name: set correct host to correct groups + hosts: postgres + tasks: + - name: add db0 to fenced group + add_host: + name: "{{ groups['postgres'][0] }}" + groups: [ 'postgres_fenced', 'postgres_standby' ] + + - name: add db1 to primary group + add_host: + name: "{{ groups['postgres'][1] }}" + groups: postgres_primary + +# db0 is now fenced, import playbook to switch it back to standby +- import_playbook: ../../playbooks/postgres-maintenance/rephacheck_status.yml +- import_playbook: ../../playbooks/postgres-maintenance/fenced_to_standby.yml + +- name: pause for 45 seconds + hosts: localhost + tasks: + - pause: + seconds: 45 + +# db0 is now standby, import playbook to switch it back as primary +- import_playbook: ../../playbooks/postgres-maintenance/rephacheck_status.yml +- import_playbook: ../../playbooks/postgres-maintenance/standby_to_primary.yml + +- name: pause for 45 seconds + hosts: localhost + tasks: + - pause: + seconds: 45 + +# db0 is now primary, restart repmgrd +- import_playbook: ../../playbooks/postgres-maintenance/rephacheck_status.yml +- import_playbook: ../../playbooks/postgres-maintenance/restart_repmgrd.yml + +... diff --git a/ansible/molecule/ha-pgsql/tests/commons.py b/ansible/molecule/pgsql-ha/tests/commons.py similarity index 100% rename from ansible/molecule/ha-pgsql/tests/commons.py rename to ansible/molecule/pgsql-ha/tests/commons.py diff --git a/ansible/molecule/ha-pgsql/tests/test_postgres_a_setup.py b/ansible/molecule/pgsql-ha/tests/test_postgres_a_setup.py similarity index 100% rename from ansible/molecule/ha-pgsql/tests/test_postgres_a_setup.py rename to ansible/molecule/pgsql-ha/tests/test_postgres_a_setup.py diff --git a/ansible/molecule/ha-pgsql/tests/test_postgres_b_cluster_status.py b/ansible/molecule/pgsql-ha/tests/test_postgres_b_cluster_status.py similarity index 100% rename from ansible/molecule/ha-pgsql/tests/test_postgres_b_cluster_status.py rename to ansible/molecule/pgsql-ha/tests/test_postgres_b_cluster_status.py diff --git a/ansible/molecule/ha-pgsql/tests/test_postgres_c_test_cluster_primary.py b/ansible/molecule/pgsql-ha/tests/test_postgres_c_test_cluster_db0_primary.py similarity index 100% rename from ansible/molecule/ha-pgsql/tests/test_postgres_c_test_cluster_primary.py rename to ansible/molecule/pgsql-ha/tests/test_postgres_c_test_cluster_db0_primary.py diff --git a/ansible/molecule/ha-pgsql/tests/test_postgres_d_test_cluster_secondary.py b/ansible/molecule/pgsql-ha/tests/test_postgres_d_test_cluster_db1_secondary.py similarity index 100% rename from ansible/molecule/ha-pgsql/tests/test_postgres_d_test_cluster_secondary.py rename to ansible/molecule/pgsql-ha/tests/test_postgres_d_test_cluster_db1_secondary.py diff --git a/ansible/playbooks/postgres-maintenance.yml b/ansible/playbooks/postgres-maintenance.yml index 4385d878c546a9c7e41a881706763af72ffad4aa..b4774ee9f567a90d95605b2175061c2308f02a55 100755 --- a/ansible/playbooks/postgres-maintenance.yml +++ b/ansible/playbooks/postgres-maintenance.yml @@ -1,82 +1,16 @@ #!/usr/bin/env ansible-playbook --- -- name: GATHER FACTS - hosts: postgres_primary:postgres_standby:postgres_fenced - tags: always - tasks: - - name: get cluster state - command: "rephacheck" - register: rephacheck - - name: show status for each node - debug: - msg: "Current node {{ ansible_hostname }} status {{ rephacheck['stdout'] }}" - when: rephacheck['stdout'] != "" +- import_playbook: postgres-maintenance/rephacheck_status.yml + tags: [ 'always' ] -- name: POSTGRESQL SWITCH CURRENT STANDBY TO PRIMARY - hosts: postgres_standby - tags: [ 'never', 'standby-to-primary' ] - tasks: - - name: fail if node status if not standby - fail: - msg: "Current status {{ rephacheck['stdout'] }} must be standby." - when: rephacheck['stdout'] != "standby" - - name: check if node is currently in standby - command: "repmgr standby switchover -f /etc/postgresql/11/main/repmgr.conf --siblings-follow --dry-run" - become: true - become_user: postgres - when: rephacheck['stdout'] == "standby" - register: standby_dry_run - - name: switch standby node to primary - command: "repmgr standby switchover -f /etc/postgresql/11/main/repmgr.conf --siblings-follow" - become: true - become_user: postgres - when: - - standby_dry_run is succeeded - - rephacheck['stdout'] == "standby" - -- name: POSTGRESQL SWITCH CURRENT FENCED TO STANDBY - hosts: postgres_fenced +- import_playbook: postgres-maintenance/fenced_to_standby.yml tags: [ 'never', 'fenced-to-standby' ] - tasks: - - name: fail if node status if not fenced - fail: - msg: "Current status {{ rephacheck['stdout'] }} must be fenced." - when: rephacheck['stdout'] != "fenced" - - name: stop postgresql - systemd: - name: postgresql - state: stopped - - name: delete postgresql data directory - file: - path: /var/lib/postgresql/11/main/ - state: absent - force: true - - name: copy data from primary - command: "repmgr -f /etc/postgresql/11/main/repmgr.conf --force --verbose standby clone -h {{ hostvars[groups['postgres_primary'][0]]['ansible_default_ipv4']['address'] }} -d repmgr -U repmgr -c" - become: true - become_user: postgres - register: copy_from_primary - - name: start postgresql - systemd: - name: postgresql - state: started - when: copy_from_primary is succeeded - - name: register node as standby - command: "repmgr -f /etc/postgresql/11/main/repmgr.conf --force --verbose standby register" - become: true - become_user: postgres - when: copy_from_primary is succeeded -- name: REPMGRD RESTART - hosts: postgres +- import_playbook: postgres-maintenance/standby_to_primary.yml + tags: [ 'never', 'standby-to-primary' ] + +- import_playbook: postgres-maintenance/restart_repmgrd.yml tags: [ 'never', 'restart-repmgrd', 'standby-to-primary' ] - tasks: - - name: kill repmgrd - command: "pkill repmgrd" - ignore_errors: true - - name: restart repmgrd - systemd: - name: repmgrd - state: restarted + ... diff --git a/ansible/playbooks/postgres-maintenance/fenced_to_standby.yml b/ansible/playbooks/postgres-maintenance/fenced_to_standby.yml new file mode 100644 index 0000000000000000000000000000000000000000..f4b19d1a56e592fd0401037f43998f4698da30f8 --- /dev/null +++ b/ansible/playbooks/postgres-maintenance/fenced_to_standby.yml @@ -0,0 +1,35 @@ +#!/usr/bin/env ansible-playbook +--- +- name: POSTGRESQL SWITCH CURRENT FENCED TO STANDBY + hosts: postgres_fenced + tasks: + - name: fail if node status if not fenced + fail: + msg: "Current status {{ rephacheck['stdout'] }} must be fenced." + when: rephacheck['stdout'] != "fenced" + - name: stop postgresql + systemd: + name: postgresql + state: stopped + - name: delete postgresql data directory + file: + path: /var/lib/postgresql/11/main/ + state: absent + force: true + - name: copy data from primary + command: "repmgr -f /etc/postgresql/11/main/repmgr.conf --force --verbose standby clone -h {{ hostvars[groups['postgres_primary'][0]]['ansible_default_ipv4']['address'] }} -d repmgr -U repmgr -c" + become: true + become_user: postgres + register: copy_from_primary + - name: start postgresql + systemd: + name: postgresql + state: started + when: copy_from_primary is succeeded + - name: register node as standby + command: "repmgr -f /etc/postgresql/11/main/repmgr.conf --force --verbose standby register" + become: true + become_user: postgres + when: copy_from_primary is succeeded + +... diff --git a/ansible/playbooks/postgres-maintenance/rephacheck_status.yml b/ansible/playbooks/postgres-maintenance/rephacheck_status.yml new file mode 100644 index 0000000000000000000000000000000000000000..b22dec19a7b53300534c6903868e553b52695490 --- /dev/null +++ b/ansible/playbooks/postgres-maintenance/rephacheck_status.yml @@ -0,0 +1,14 @@ +#!/usr/bin/env ansible-playbook +--- +- name: REHACHECK STATUS + hosts: postgres_primary:postgres_standby:postgres_fenced + tasks: + - name: get cluster state + command: "rephacheck" + register: rephacheck + - name: show status for each node + debug: + msg: "Current node {{ ansible_hostname }} status {{ rephacheck['stdout'] }}" + when: rephacheck['stdout'] != "" + +... diff --git a/ansible/playbooks/postgres-maintenance/restart_repmgrd.yml b/ansible/playbooks/postgres-maintenance/restart_repmgrd.yml new file mode 100644 index 0000000000000000000000000000000000000000..4753a56a824df950b92233d98e9ec6e9977e5edc --- /dev/null +++ b/ansible/playbooks/postgres-maintenance/restart_repmgrd.yml @@ -0,0 +1,14 @@ +#!/usr/bin/env ansible-playbook +--- +- name: REPMGRD RESTART + hosts: postgres + tasks: + - name: kill repmgrd + command: "pkill repmgrd" + ignore_errors: true + - name: restart repmgrd + systemd: + name: repmgrd + state: restarted + +... diff --git a/ansible/playbooks/postgres-maintenance/standby_to_primary.yml b/ansible/playbooks/postgres-maintenance/standby_to_primary.yml new file mode 100644 index 0000000000000000000000000000000000000000..bfce1c64b61e4bf28131f2a5dcb4907c2b35e3b0 --- /dev/null +++ b/ansible/playbooks/postgres-maintenance/standby_to_primary.yml @@ -0,0 +1,24 @@ +#!/usr/bin/env ansible-playbook +--- +- name: POSTGRESQL SWITCH CURRENT STANDBY TO PRIMARY + hosts: postgres_standby + tasks: + - name: fail if node status if not standby + fail: + msg: "Current status {{ rephacheck['stdout'] }} must be standby." + when: rephacheck['stdout'] != "standby" + - name: check if node is currently in standby + command: "repmgr standby switchover -f /etc/postgresql/11/main/repmgr.conf --siblings-follow --dry-run" + become: true + become_user: postgres + when: rephacheck['stdout'] == "standby" + register: standby_dry_run + - name: switch standby node to primary + command: "repmgr standby switchover -f /etc/postgresql/11/main/repmgr.conf --siblings-follow" + become: true + become_user: postgres + when: + - standby_dry_run is succeeded + - rephacheck['stdout'] == "standby" + +...