Solve race condition in password lookup (#42529)

NOTE:
1. use os.open() with os.O_CREAT|os.O_EXCL to check existence
and create a lock file if not exists, it's an atomic operation
2. the fastest process will create the lock file and others will
wait until the lock file is removed
3. after the writer finished writing to the password file, all the reading
operations use built-in open so processes can read the file parallel
This commit is contained in:
Zhikang Zhang 2018-08-15 15:10:52 -04:00 committed by GitHub
parent 5c1e620504
commit 0971a342d8
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
2 changed files with 84 additions and 3 deletions

View file

@ -92,6 +92,9 @@ _raw:
import os
import string
import time
import shutil
import hashlib
from ansible.errors import AnsibleError, AnsibleAssertionError
from ansible.module_utils._text import to_bytes, to_native, to_text
@ -267,6 +270,40 @@ def _write_password_file(b_path, content):
f.write(b_content)
def _get_lock(b_path):
"""Get the lock for writing password file."""
first_process = False
b_pathdir = os.path.dirname(b_path)
lockfile_name = to_bytes("%s.ansible_lockfile" % hashlib.md5(b_path).hexdigest())
lockfile = os.path.join(b_pathdir, lockfile_name)
if not os.path.exists(lockfile) and b_path != to_bytes('/dev/null'):
try:
makedirs_safe(b_pathdir, mode=0o700)
fd = os.open(lockfile, os.O_CREAT | os.O_EXCL)
os.close(fd)
first_process = True
except OSError as e:
if e.strerror != 'File exists':
raise
counter = 0
# if the lock is got by other process, wait until it's released
while os.path.exists(lockfile) and not first_process:
time.sleep(2 ** counter)
if counter >= 2:
raise AnsibleError("Password lookup cannot get the lock in 7 seconds, abort..."
"This may caused by un-removed lockfile"
"you can manually remove it from controller machine at %s and try again" % lockfile)
counter += 1
return first_process, lockfile
def _release_lock(lockfile):
"""Release the lock so other processes can read the password file."""
if os.path.exists(lockfile):
os.remove(lockfile)
class LookupModule(LookupBase):
def run(self, terms, variables, **kwargs):
ret = []
@ -277,7 +314,10 @@ class LookupModule(LookupBase):
b_path = to_bytes(path, errors='surrogate_or_strict')
chars = _gen_candidate_chars(params['chars'])
changed = False
changed = None
# make sure only one process finishes all the job first
first_process, lockfile = _get_lock(b_path)
content = _read_password_file(b_path)
if content is None or b_path == to_bytes('/dev/null'):
@ -295,6 +335,10 @@ class LookupModule(LookupBase):
content = _format_content(plaintext_password, salt, encrypt=params['encrypt'])
_write_password_file(b_path, content)
if first_process:
# let other processes continue
_release_lock(lockfile)
if params['encrypt']:
password = do_encrypt(plaintext_password, params['encrypt'], salt=salt)
ret.append(password)