Skip to content
Snippets Groups Projects
Commit ce638f6c authored by Nicolas KAROLAK's avatar Nicolas KAROLAK
Browse files

postgres and wowza ha | fixes #31841

parent 03b1bc73
No related branches found
No related tags found
No related merge requests found
Showing
with 774 additions and 20 deletions
---
hap_packages:
- haproxy
hap_config: /etc/haproxy/haproxy.cfg
hap_config_global: |
log /dev/log local0 warning
log /dev/log local1 warning
chroot /var/lib/haproxy
stats socket /run/haproxy/admin.sock mode 664 level admin
stats timeout 30s
user haproxy
group haproxy
daemon
hap_config_defaults: |
log global
mode tcp
balance leastconn
retries 2
timeout tunnel 30m
timeout client 30m
timeout connect 5s
timeout server 30m
timeout check 15s
option tcplog
option tcpka
option clitcpka
option srvtcpka
hap_stats_enabled: false
hap_config_stats: |
bind :9000
mode http
stats enable
stats uri /
stats auth admin:password
hap_config_listen: []
...
---
- name: reload haproxy
systemd:
name: haproxy
state: reloaded
...
---
- name: install packages
apt:
force_apt_get: true
install_recommends: false
name: "{{ hap_packages }}"
- name: configure
notify: reload haproxy
template:
src: haproxy.cfg.j2
dest: /etc/haproxy/haproxy.cfg
backup: true
- meta: flush_handlers
...
global
{{ hap_config_global }}
defaults
{{ hap_config_defaults }}
{% if hap_stats_enabled %}
listen stats
{{ hap_config_stats }}
{% endif %}
{% for listen in hap_config_listen %}
listen {{ listen.name }}
{{ listen.content }}
{% endfor %}
---
repmgr_packages:
- repmgr
# rephacheck:
- python3
- python3-psycopg2
- python3-toml
repmgr_pg_version: "{{ pg_version | default('11') }}"
repmgr_pg_cluster: "{{ pg_cluster | default('main') }}"
repmgr_pg_data: /var/lib/postgresql/{{ repmgr_pg_version }}/{{ repmgr_pg_cluster }}
repmgr_config: /etc/postgresql/{{ repmgr_pg_version }}/{{ repmgr_pg_cluster }}/repmgr.conf
repmgr_user: repmgr
repmgr_password:
repmgr_db: repmgr
repmgr_roles: LOGIN,REPLICATION,SUPERUSER
repmgr_primary_node:
repmgr_timeout: 5
repmgr_node_id:
repmgr_node_name: "{{ ansible_hostname }}"
repmgr_conninfo: host={{ ansible_default_ipv4.address }} dbname={{ repmgr_db }} user={{ repmgr_user }} connect_timeout={{ repmgr_timeout }}
repmgr_repha_port: 8543
...
#!/usr/bin/env python3
"""
Determine by voting which is the state of each node.
For this to work properly, you need to have an odd number of nodes.
"""
from collections import Counter
import psycopg2
import toml
def get_state(addr, port, node_id):
try:
# postgresql query
con_args = {'host': addr, 'port': port, 'connect_timeout': TIMEOUT}
con_args.update(CONNINFO)
con = psycopg2.connect(**con_args)
cur = con.cursor()
query = 'SELECT active, type FROM repmgr.nodes WHERE node_id = {};'
cur.execute(query.format(node_id))
data = cur.fetchone()
cur.close()
# return result
return data
except Exception:
# an error occured, so return false by default
return (False, 'unknown')
def get_quorum_state(node_id):
# init vars
votes = []
# ask each node for the state of `node_id`
for node in NODES.values():
active, role = get_state(node['addr'], node['port'], node_id)
# if node considered active take vote, otherwise fence it
if active:
votes.append(role)
else:
votes.append('fenced')
# determines voting result
results = Counter(votes)
state = results.most_common(1)[0][0]
# catch inconsistent state case
if state == 'standby' and 'primary' in votes:
state = 'fenced'
# return result
return state
if __name__ == '__main__':
with open('/etc/postgresql/11/main/rephacheck.conf') as rephaconf:
conf = toml.load(rephaconf)
NODES = conf.get('nodes')
CONNINFO = conf.get('conninfo')
TIMEOUT = conf.get('connect_timeout', 3)
CURRENT = conf.get('local_node_id')
state = get_quorum_state(CURRENT)
print(state)
#!/usr/bin/env python3
"""
event_notification_command='/usr/local/bin/repmgr_event.py %n %e %s "%t" "%d"'
"""
import argparse
import json
import logging
import logging.handlers
import sys
PARSER = argparse.ArgumentParser()
PARSER.add_argument("node_id")
PARSER.add_argument("event_type")
PARSER.add_argument("success")
PARSER.add_argument("timestamp")
PARSER.add_argument("details")
ARGS = PARSER.parse_args()
MONIT_EVENTS = (
"standby_promote",
"repmgrd_failover_promote",
"repmgrd_failover_follow",
)
LOG_DATA = (ARGS.timestamp, ARGS.node_id, ARGS.event_type, ARGS.success, ARGS.details)
def main():
"""Log repmgrd events."""
# do not process unwanted events
if ARGS.event_type not in MONIT_EVENTS:
sys.exit(0)
# load config if exists
try:
with open("/etc/postgresql/11/main/repmgr-event.json") as data_file:
data = json.load(data_file)
except EnvironmentError:
data = {}
# get config, fallback on defaults
log_file = data.get("log", "/var/log/repmgr-event.log")
mail = data.get("mail", None)
if mail:
host = mail.get("host", "localhost")
sender = mail.get("sender", "root@localhost")
recipients = mail.get("recipients", ["root@localhost"])
credentials = mail.get("credentials", None)
secure = mail.get("secure", None)
# from args
subject = "repmgr event: {}".format(ARGS.event_type)
# logging conf
logger = logging.getLogger("repmgr_event")
logger.setLevel(logging.INFO)
# log to stream
stream_handler = logging.StreamHandler(sys.stdout)
logger.addHandler(stream_handler)
# log to file
file_handler = logging.handlers.RotatingFileHandler(
log_file, maxBytes=102400, backupCount=7
)
logger.addHandler(file_handler)
# log to mail
if mail:
mail_handler = logging.handlers.SMTPHandler(
host, sender, recipients, subject, credentials, secure
)
logger.addHandler(mail_handler)
# write log
logger.info("%s - %s:%s:%s - %s", *LOG_DATA)
sys.exit(0)
if __name__ == "__main__":
main()
---
- name: restart postgresql
systemd:
name: postgresql@{{ repmgr_pg_version }}-{{ repmgr_pg_cluster }}
state: restarted
- name: restart repmgrd
systemd:
name: repmgrd
state: restarted
...
---
# INSTALLATION
- name: install packages
apt:
force_apt_get: true
install_recommends: false
name: "{{ repmgr_packages }}"
# POSTGRESQL
- name: postgresql
vars:
pg_hba:
- type: local
method: peer
- type: host
address: 127.0.0.1/32
- type: host
address: ::1/128
- type: host
address: 0.0.0.0/0
- type: host
address: ::/0
- type: local
database: replication
method: peer
- type: host
database: replication
address: 127.0.0.1/32
- type: host
database: replication
address: ::1/128
- type: host
database: replication
address: 0.0.0.0/0
- type: host
database: replication
address: ::/0
pg_conf:
- name: main
content: |
listen_addresses = '*'
- name: modules
content: |
shared_preload_libraries = 'repmgr'
pg_users:
- name: "{{ repmgr_user }}"
password: "{{ repmgr_password }}"
roles: "{{ repmgr_roles }}"
pg_databases:
- name: "{{ repmgr_db }}"
owner: "{{ repmgr_user }}"
pg_ferm_input_rules:
- proto:
- tcp
dport:
- 5432
- "{{ repmgr_repha_port }}"
include_role:
name: postgres
# CONFIGURATION
- name: configure repmgr
notify: restart repmgrd
template:
src: repmgr.conf.j2
dest: "{{ repmgr_config }}"
owner: postgres
group: postgres
- name: configure debian default
notify: restart repmgrd
loop:
- key: REPMGRD_ENABLED
value: 'yes'
- key: REPMGRD_CONF
value: "{{ repmgr_config }}"
replace:
path: /etc/default/repmgrd
regexp: '^#?{{ item.key }}=.*$'
replace: '{{ item.key }}={{ item.value }}'
- name: copy events notification script
copy:
src: repmgr-event.py
dest: /usr/local/bin/repmgr-event
mode: 0755
- name: configure sudo
copy:
dest: /etc/sudoers.d/postgres
validate: visudo -cf %s
content: |
Defaults:postgres !requiretty
postgres ALL=NOPASSWD: \
/bin/systemctl start postgresql@{{ repmgr_pg_version }}-{{ repmgr_pg_cluster }}, \
/bin/systemctl stop postgresql@{{ repmgr_pg_version }}-{{ repmgr_pg_cluster }}, \
/bin/systemctl restart postgresql@{{ repmgr_pg_version }}-{{ repmgr_pg_cluster }}, \
/bin/systemctl reload postgresql@{{ repmgr_pg_version }}-{{ repmgr_pg_cluster }}
# SSH
- name: ensure postgres account have a ssh keypair
user:
name: postgres
generate_ssh_key: true
ssh_key_type: ed25519
ssh_key_file: ~postgres/.ssh/id_ed25519
- name: fetch postgres ssh public key
register: repmgr_postgres_ssh_pubkey
slurp:
path: ~postgres/.ssh/id_ed25519.pub
- name: register postgres ssh public key as an ansible fact
set_fact:
pubkey: "{{ repmgr_postgres_ssh_pubkey['content'] | b64decode }}"
- name: share postgres ssh public key between cluster members
loop: "{{ groups['postgres'] }}"
authorized_key:
user: postgres
key: "{{ hostvars[item]['pubkey'] }}"
- name: postgres ssh client configuration
copy:
dest: ~postgres/.ssh/config
owner: postgres
group: postgres
content: |
IdentityFile ~/.ssh/ed25519
StrictHostKeyChecking no
UserKnownHostsFile /dev/null
# REGISTER PRIMARY
- name: setup primary
when: db_role == "primary"
block:
- name: check if primary already joined
become: true
become_user: postgres
register: repmgr_check_primary
postgresql_query:
db: repmgr
query: SELECT 1 FROM pg_tables WHERE tablename='nodes'
- name: register primary
become: true
become_user: postgres
when: repmgr_check_primary.query_result | length == 0
notify: restart repmgrd
command:
cmd: repmgr --config-file={{ repmgr_config }} primary register
- meta: flush_handlers
# REGISTER STANDBY
- name: setup standby
when: db_role == "standby"
block:
- name: check if standby already joined
become: true
become_user: postgres
register: repmgr_check_standby
postgresql_query:
db: repmgr
query: SELECT 1 FROM pg_tables WHERE tablename='nodes'
- name: stop postgresql service
when: repmgr_check_standby.query_result | length == 0
systemd:
name: postgresql@{{ repmgr_pg_version }}-{{ repmgr_pg_cluster }}
state: stopped
- name: remove existing pgdata
when: repmgr_check_standby.query_result | length == 0
command:
cmd: mv -vf {{ repmgr_pg_data }} {{ repmgr_pg_data }}.save
removes: "{{ repmgr_pg_data }}"
- name: clone from primary to standby
become: true
become_user: postgres
when: repmgr_check_standby.query_result | length == 0
ignore_errors: true
register: repmgr_clone_standby
shell:
cmd: |
repmgr \
--config-file={{ repmgr_config }} \
--force \
--dbname={{ repmgr_db }} \
--host={{ repmgr_primary_node }} \
--port=5432 \
--username={{ repmgr_user }} \
--pgdata={{ repmgr_pg_data }} \
standby clone --fast-checkpoint
- name: remove pgdata backup
when: repmgr_clone_standby is succeeded
file:
path: "{{ repmgr_pg_data }}.save"
state: absent
- name: remove failed clone pgdata
when: repmgr_clone_standby is failed
file:
path: "{{ repmgr_pg_data }}"
state: absent
- name: restore pgdata backup
when: repmgr_clone_standby is failed
command:
cmd: mv -vf {{ repmgr_pg_data }}.save {{ repmgr_pg_data }}
removes: "{{ repmgr_pg_data }}.save"
- name: start postgresql service
systemd:
name: postgresql@{{ repmgr_pg_version }}-{{ repmgr_pg_cluster }}
state: started
- name: standby clone failed
when: repmgr_clone_standby is failed
fail:
msg: "{{ repmgr_clone_standby.stderr }}"
- name: register standby
become: true
become_user: postgres
when: repmgr_check_standby.query_result | length == 0
notify: restart repmgrd
command:
cmd: repmgr --config-file={{ repmgr_config }} standby register
- meta: flush_handlers
# REGISTER WITNESS
- name: setup witness
when: db_role == "witness"
block:
- name: check if witness already joined
become: true
become_user: postgres
register: repmgr_check_witness
postgresql_query:
db: repmgr
query: SELECT 1 FROM pg_tables WHERE tablename='nodes'
- name: register witness
become: true
become_user: postgres
when: repmgr_check_witness.query_result | length == 0
notify: restart repmgrd
command:
cmd: repmgr --config-file={{ repmgr_config }} --host={{ repmgr_primary_node }} witness register
- meta: flush_handlers
# REPHACHECK
- name: install rephacheck
copy:
src: rephacheck.py
dest: /usr/bin/rephacheck
mode: 0755
- name: register variables needed by rephacheck as facts
set_fact:
repmgr_node_name: "{{ repmgr_node_name }}"
repmgr_node_id: "{{ repmgr_node_id }}"
- name: configure rephacheck
template:
src: rephacheck.conf.j2
dest: /etc/postgresql/11/main/rephacheck.conf
owner: postgres
group: postgres
mode: 0644
- name: configure rephacheck socket
copy:
dest: /etc/systemd/system/rephacheck.socket
content: |
[Unit]
Description=RepHACheck socket
[Socket]
ListenStream={{ repmgr_repha_port }}
Accept=yes
[Install]
WantedBy=sockets.target
- name: configure rephacheck service
copy:
dest: /etc/systemd/system/rephacheck@.service
content: |
[Unit]
Description=RepHACheck - Health check for PostgreSQL cluster managed by repmgr
[Service]
ExecStart=-/usr/bin/rephacheck
StandardInput=socket
User=postgres
Group=postgres
- name: enable and start rephacheck
service:
name: rephacheck.socket
state: started
enabled: true
...
local_node_id = "{{ repmgr_node_id }}"
connect_timeout = "{{ repmgr_timeout }}"
{% for node in play_hosts %}
[nodes."{{ hostvars[node]['repmgr_node_name'] }}"]
addr = "{{ hostvars[node]['ansible_default_ipv4']['address'] }}"
port = 5432
node_id = {{ hostvars[node]['repmgr_node_id'] }}
{% endfor %}
[conninfo]
dbname = "{{ repmgr_db }}"
user = "{{ repmgr_user }}"
password = "{{ repmgr_password }}"
node_id={{ repmgr_node_id }}
node_name={{ repmgr_node_name }}
conninfo='{{ repmgr_conninfo }}'
data_directory=/var/lib/postgresql/{{ repmgr_pg_version }}/{{ repmgr_pg_cluster }}
use_replication_slots=1
event_notification_command='/usr/local/bin/repmgr-event %n %e %s "%t" "%d"'
pg_bindir=/usr/lib/postgresql/{{ repmgr_pg_version }}/bin/
pg_ctl_options='-s'
pg_basebackup_options='--label=repmgr_backup'
primary_follow_timeout=30
failover=automatic
reconnect_attempts=6
reconnect_interval=5
promote_command='repmgr standby promote'
follow_command='repmgr standby follow -W'
service_start_command='sudo systemctl start postgresql@{{ repmgr_pg_version }}-{{ repmgr_pg_cluster }}'
service_stop_command='sudo systemctl stop postgresql@{{ repmgr_pg_version }}-{{ repmgr_pg_cluster }}'
service_restart_command='sudo systemctl restart postgresql@{{ repmgr_pg_version }}-{{ repmgr_pg_cluster }}'
service_reload_command='sudo systemctl reload postgresql@{{ repmgr_pg_version }}-{{ repmgr_pg_cluster }}'
---
postgres_host: "{{ envsetup_db_host }}"
postgres_port: "{{ envsetup_db_port }}"
postgres_pwd: "{{ envsetup_db_pg_root_pwd }}"
pg_packages:
- postgresql
postgres_firewall_enabled: true
postgres_ferm_rules_filename: postgres
postgres_ferm_input_rules:
pg_version: 11
pg_cluster: main
pg_password: "{{ envsetup_db_pg_root_pwd | d() }}"
pg_conf_dir: /etc/postgresql/{{ pg_version }}/{{ pg_cluster }}
pg_conf:
- name: main
content:
pg_hba:
- type: local
method: peer
- type: host
address: 127.0.0.1/32
- type: host
address: ::1/128
- type: local
database: replication
method: peer
- type: host
database: replication
address: 127.0.0.1/32
- type: host
database: replication
address: ::1/128
pg_users: []
pg_databases: []
pg_firewall_enabled: true
pg_ferm_rules_filename: postgres
pg_ferm_input_rules:
- proto:
- tcp
dport:
- 5432
postgres_ferm_output_rules: []
postgres_ferm_global_settings:
pg_ferm_output_rules: []
pg_ferm_global_settings:
...
---
- name: restart postgresql
systemd:
name: postgresql@{{ pg_version }}-{{ pg_cluster }}
state: restarted
...
......@@ -6,33 +6,107 @@
install_recommends: false
name: python3-psycopg2
- name: postgresql install
- name: install packages
apt:
force_apt_get: true
install_recommends: false
name: postgresql
name: "{{ pg_packages }}"
- name: ensure postgresql is running
service:
name: postgresql
# CONFIGURATION
- name: ensure conf directory exists
file:
path: "{{ pg_conf_dir }}/conf.d"
owner: postgres
group: postgres
state: directory
- name: ensure conf directory is included
replace:
path: "{{ pg_conf_dir }}/postgresql.conf"
backup: true
regexp: "^#?include_dir = '[A-Za-z\\.]+'(\\s+.*)$"
replace: "include_dir = 'conf.d'\\1"
- name: configure custom settings
notify: restart postgresql
loop: "{{ pg_conf }}"
when: item.content | d(false)
copy:
dest: "{{ pg_conf_dir }}/conf.d/{{ item.name }}.conf"
owner: postgres
group: postgres
backup: true
content: "{{ item.content }}"
- name: configure authentication
notify: restart postgresql
template:
src: pg_hba.conf.j2
dest: "{{ pg_conf_dir }}/pg_hba.conf"
owner: postgres
group: postgres
mode: 0640
backup: true
- name: ensure service is enabled and running
systemd:
name: postgresql@{{ pg_version }}-{{ pg_cluster }}
enabled: true
state: started
- name: postgresql set superuser password
# USERS
- name: set superuser password
become: true
become_user: postgres
no_log: true
postgresql_user:
name: postgres
password: "{{ postgres_pwd }}"
password: "{{ pg_password | d(omit) }}"
- name: manage users
become: true
become_user: postgres
no_log: true
loop: "{{ pg_users }}"
postgresql_user:
name: "{{ item.name }}"
password: "{{ item.password | d(omit) }}"
db: "{{ item.db | d(omit) }}"
priv: "{{ item.priv | d(omit) }}"
role_attr_flags: "{{ item.roles | d(omit) }}"
- name: set .pgpass to allow passwordless connection
loop: "{{ query('nested', ['root', 'postgres'], pg_users) }}"
blockinfile:
path: "~{{ item.0 }}/.pgpass"
block: "*:*:*:{{ item.1.name }}:{{ item.1.password }}"
marker: "# {mark} {{ item.1.name }}"
create: true
owner: "{{ item.0 }}"
group: "{{ item.0 }}"
mode: 0600
# DATABASES
- name: create databases
become: true
become_user: postgres
loop: "{{ pg_databases }}"
postgresql_db:
name: "{{ item.name }}"
owner: "{{ item.owner | d(omit) }}"
# FIREWALL
- name: firewall
when: postgres_firewall_enabled
when: pg_firewall_enabled
vars:
ferm_rules_filename: "{{ postgres_ferm_rules_filename }}"
ferm_input_rules: "{{ postgres_ferm_input_rules }}"
ferm_output_rules: "{{ postgres_ferm_output_rules }}"
ferm_global_settings: "{{ postgres_ferm_global_settings }}"
ferm_rules_filename: "{{ pg_ferm_rules_filename }}"
ferm_input_rules: "{{ pg_ferm_input_rules }}"
ferm_output_rules: "{{ pg_ferm_output_rules }}"
ferm_global_settings: "{{ pg_ferm_global_settings }}"
include_role:
name: ferm
......
# {{ ansible_managed }}
# PostgreSQL Client Authentication Configuration File
# ===================================================
{% for connection in pg_hba %}
{% if connection.comment is defined %}
# {{ connection.comment }}
{% endif %}
{{ connection.type }} {{ connection.database | d('all') }} {{ connection.user | d('all') }} {{ connection.address | d() }} {{ connection.method | d('md5') }}
{% endfor %}
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment