diff --git a/lib/ansible/modules/files/unarchive.py b/lib/ansible/modules/files/unarchive.py index e4bb230aef..cf9005a47b 100644 --- a/lib/ansible/modules/files/unarchive.py +++ b/lib/ansible/modules/files/unarchive.py @@ -4,6 +4,7 @@ # (c) 2012, Michael DeHaan # (c) 2013, Dylan Martin # (c) 2015, Toshio Kuratomi +# (c) 2016, Dag Wieers # # This file is part of Ansible # @@ -59,17 +60,34 @@ options: choices: [ "yes", "no" ] default: "no" version_added: "2.0" -author: "Dylan Martin (@pileofrogs)" + exclude: + description: + - List the directory and file entries that you would like to exclude from the unarchive action. + required: false + default: [] + version_added: "2.1" + keep_newer: + description: + - Do not replace existing files that are newer than files from the archive. + required: false + default: no + version_added: "2.1" + extra_opts: + description: + - Specify additional options by passing in an array. + default: + required: false + version_added: "2.1" +author: "Dag Wieers (@dagwieers)" todo: - - detect changed/unchanged for .zip files - - handle common unarchive args, like preserve owner/timestamp etc... + - re-implement tar support using native tarfile module + - re-implement zip support using native zipfile module notes: - - requires C(tar)/C(unzip) command on target host + - requires C(gtar)/C(unzip) command on target host - can handle I(gzip), I(bzip2) and I(xz) compressed as well as uncompressed tar files - detects type of archive automatically - - uses tar's C(--diff arg) to calculate if changed or not. If this C(arg) is not + - uses gtar's C(--diff arg) to calculate if changed or not. If this C(arg) is not supported, it will always unpack the archive - - does not detect if a .zip file is different from destination - always unzips - existing files/directories in the destination which are not in the archive are not touched. This is the same behavior as a normal archive extraction - existing files/directories in the destination which are not in the archive @@ -89,46 +107,360 @@ EXAMPLES = ''' import re import os +import stat +import pwd +import grp +import datetime +import time +import binascii from zipfile import ZipFile # String from tar that shows the tar contents are different from the # filesystem -DIFFERENCE_RE = re.compile(r': (.*) differs$') +OWNER_DIFF_RE = re.compile(r': Uid differs$') +GROUP_DIFF_RE = re.compile(r': Gid differs$') +MODE_DIFF_RE = re.compile(r': Mode differs$') +#NEWER_DIFF_RE = re.compile(r' is newer or same age.$') +MISSING_FILE_RE = re.compile(r': Warning: Cannot stat: No such file or directory$') +ZIP_FILE_MODE_RE = re.compile(r'([r-][w-][stx-]){3}') # When downloading an archive, how much of the archive to download before # saving to a tempfile (64k) BUFSIZE = 65536 +# Return a CRC32 checksum of a file +def crc32(path): + return binascii.crc32(open(path).read()) & 0xffffffff + class UnarchiveError(Exception): pass # class to handle .zip files class ZipArchive(object): - def __init__(self, src, dest, module): + def __init__(self, src, dest, file_args, module): self.src = src self.dest = dest + self.file_args = file_args + self.opts = module.params['extra_opts'] self.module = module + self.excludes = module.params['exclude'] + self.includes = [] self.cmd_path = self.module.get_bin_path('unzip') self._files_in_archive = [] + self._infodict = dict() + + def _permstr_to_octal(self, modestr, umask): + ''' Convert a Unix permission string (rw-r--r--) into a mode (0644) ''' + revstr = modestr[::-1] + mode = 0 + for j in range(0, 3): + for i in range(0, 3): + if revstr[i+3*j] in ['r', 'w', 'x', 's', 't']: + mode += 2**(i+3*j) + # The unzip utility does not support setting the stST bits +# if revstr[i+3*j] in ['s', 't', 'S', 'T' ]: +# mode += 2**(9+j) + return ( mode & ~umask ) + + def _crc32(self, path): + if self._infodict: + return self._infodict[path] + + archive = ZipFile(self.src) + try: + for item in archive.infolist(): + self._infodict[item.filename] = long(item.CRC) + except: + archive.close() + raise UnarchiveError('Unable to list files in the archive') + + return self._infodict[path] @property def files_in_archive(self, force_refresh=False): if self._files_in_archive and not force_refresh: return self._files_in_archive + self._files_in_archive = [] archive = ZipFile(self.src) try: - self._files_in_archive = archive.namelist() + for member in archive.namelist(): + if member not in self.excludes: + self._files_in_archive.append(member) except: + archive.close() raise UnarchiveError('Unable to list files in the archive') + archive.close() return self._files_in_archive - def is_unarchived(self, mode, owner, group): - return dict(unarchived=False) + def is_unarchived(self): + cmd = '%s -ZT -s "%s"' % (self.cmd_path, self.src) + if self.excludes: + cmd += ' -x "' + '" "'.join(self.excludes) + '"' + rc, out, err = self.module.run_command(cmd) + + old_out = out + diff = '' + out = '' + if rc == 0: + unarchived = True + else: + unarchived = False + + # Get some information related to user/group ownership + umask = os.umask(0) + os.umask(umask) + + # Get current user and group information + groups = os.getgroups() + run_uid = os.getuid() + run_gid = os.getgid() + try: + run_owner = pwd.getpwuid(run_uid).pw_name + except: + run_owner = run_uid + try: + run_group = grp.getgrgid(run_gid).gr_name + except: + run_group = run_gid + + # Get future user ownership + fut_owner = fut_uid = None + if self.file_args['owner']: + try: + tpw = pwd.getpwname(self.file_args['owner']) + except: + try: + tpw = pwd.getpwuid(self.file_args['owner']) + except: + tpw = pwd.getpwuid(run_uid) + fut_owner = tpw.pw_name + fut_uid = tpw.pw_uid + else: + try: + fut_owner = run_owner + except: + pass + fut_uid = run_uid + + # Get future group ownership + fut_group = fut_gid = None + if self.file_args['group']: + try: + tgr = grp.getgrnam(self.file_args['group']) + except: + try: + tgr = grp.getgrgid(self.file_args['group']) + except: + tgr = grp.getgrgid(run_gid) + fut_group = tgr.gr_name + fut_gid = tgr.gr_gid + else: + try: + fut_group = run_group + except: + pass + fut_gid = run_gid + + for line in old_out.splitlines(): + change = False + + pcs = line.split() + if len(pcs) != 8: continue + + ztype = pcs[0][0] + permstr = pcs[0][1:10] + version = pcs[0][1] + ostype = pcs[0][2] + size = int(pcs[3]) + path = pcs[7] + + # Skip excluded files + if path in self.excludes: + out += 'Path %s is excluded on request\n' % path + continue + + # Itemized change requires L for symlink + if path[-1] == '/': + if ztype != 'd': + err += 'Path %s incorrectly tagged as "%s", but is a directory.\n' % (path, ztype) + ftype = 'd' + elif ztype == 'l': + ftype = 'L' + elif ztype == '-': + ftype = 'f' + elif ztype == '?': + ftype = 'f' + + # Some files may be storing FAT permissions, not Unix permissions + if len(permstr) == 6: + if path[-1] == '/': + permstr = 'rwxrwxrwx' + elif permstr == 'rwx---': + permstr = 'rwxrwxrwx' + else: + permstr = 'rw-rw-rw-' + + # Test string conformity + if len(permstr) != 9 or not ZIP_FILE_MODE_RE.match(permstr): + raise UnarchiveError('ZIP info perm format incorrect, %s' % permstr) + + # DEBUG +# err += "%s%s %10d %s\n" % (ztype, permstr, size, path) + + dest = os.path.join(self.dest, path) + try: + st = os.lstat(dest) + except: + change = True + self.includes.append(path) + err += 'Path %s is missing\n' % path + diff += '>%s++++++.?? %s\n' % (ftype, path) + continue + + # Compare file types + if ftype == 'd' and not stat.S_ISDIR(st.st_mode): + change = True + self.includes.append(path) + err += 'File %s already exists, but not as a directory\n' % path + diff += 'c%s++++++.?? %s\n' % (ftype, path) + continue + + if ftype == 'f' and not stat.S_ISREG(st.st_mode): + change = True + unarchived = False + self.includes.append(path) + err += 'Directory %s already exists, but not as a regular file\n' % path + diff += 'c%s++++++.?? %s\n' % (ftype, path) + continue + + if ftype == 'L' and not stat.S_ISLNK(st.st_mode): + change = True + self.includes.append(path) + err += 'Directory %s already exists, but not as a symlink\n' % path + diff += 'c%s++++++.?? %s\n' % (ftype, path) + continue + + itemized = bytearray('.%s.......??' % ftype) + + dt_object = datetime.datetime(*(time.strptime(pcs[6], '%Y%m%d.%H%M%S')[0:6])) + timestamp = time.mktime(dt_object.timetuple()) + + # Compare file timestamps + if stat.S_ISREG(st.st_mode): + if self.module.params['keep_newer']: + if timestamp > st.st_mtime: + change = True + self.includes.append(path) + err += 'File %s is older, replacing file\n' % path + itemized[4] = 't' + elif stat.S_ISREG(st.st_mode) and timestamp < st.st_mtime: + # Add to excluded files, ignore other changes + out += 'File %s is newer, excluding file\n' % path + continue + else: + if timestamp != st.st_mtime: + change = True + self.includes.append(path) + err += 'File %s differs in mtime (%f vs %f)\n' % (path, timestamp, st.st_mtime) + itemized[4] = 't' + + # Compare file sizes + if stat.S_ISREG(st.st_mode) and size != st.st_size: + change = True + err += 'File %s differs in size (%d vs %d)\n' % (path, size, st.st_size) + itemized[3] = 's' + + # Compare file checksums + if stat.S_ISREG(st.st_mode): + crc = crc32(dest) + if crc != self._crc32(path): + change = True + err += 'File %s differs in CRC32 checksum (0x%08x vs 0x%08x)\n' % (path, self._crc32(path), crc) + itemized[2] = 'c' + + # Compare file permissions + + # Do not handle permissions of symlinks + if ftype != 'L': + # Only special files require no umask-handling + if ztype == '?': + mode = self._permstr_to_octal(permstr, 0) + else: + mode = self._permstr_to_octal(permstr, umask) + if self.file_args['mode'] and self.file_args['mode'] != stat.S_IMODE(st.st_mode): + change = True + err += 'Path %s differs in permissions (%o vs %o)\n' % (path, self.file_args['mode'], stat.S_IMODE(st.st_mode)) + itemized[5] = 'p' + elif mode != stat.S_IMODE(st.st_mode): + change = True + itemized[5] = 'p' + err += 'Path %s differs in permissions (%o vs %o)\n' % (path, mode, stat.S_IMODE(st.st_mode)) + + # Compare file user ownership + owner = uid = None + try: + owner = pwd.getpwuid(st.st_uid).pw_name + except: + uid = st.st_uid + + # If we are not root and requested owner is not our user, fail + if run_uid != 0 and (fut_owner != run_owner or fut_uid != run_uid): + raise UnarchiveError('Cannot change ownership of %s to %s, as user %s' % (path, fut_owner, run_owner)) + + if owner and owner != fut_owner: + change = True + err += 'Path %s is owned by user %s, not by user %s as expected\n' % (path, owner, fut_owner) + itemized[6] = 'o' + elif uid and uid != fut_uid: + change = True + err += 'Path %s is owned by uid %s, not by uid %s as expected\n' % (path, uid, fut_uid) + itemized[6] = 'o' + + # Compare file group ownership + group = gid = None + try: + group = grp.getgrgid(st.st_gid).gr_name + except: + gid = st.st_gid + + if run_uid != 0 and fut_gid not in groups: + raise UnarchiveError('Cannot change group ownership of %s to %s, as user %s' % (path, fut_group, run_owner)) + + if group and group != fut_group: + change = True + err += 'Path %s is owned by group %s, not by group %s as expected\n' % (path, group, fut_group) + itemized[6] = 'g' + elif gid and gid != fut_gid: + change = True + err += 'Path %s is owned by gid %s, not by gid %s as expected\n' % (path, gid, fut_gid) + itemized[6] = 'g' + + # Register changed files and finalize diff output + if change: + if path not in self.includes: + self.includes.append(path) + diff += '%s %s\n' % (itemized, path) + + if self.includes: + unarchived = False + + # DEBUG +# out = old_out + out + + return dict(unarchived=unarchived, rc=rc, out=out, err=err, cmd=cmd, diff=diff) def unarchive(self): - cmd = '%s -o "%s" -d "%s"' % (self.cmd_path, self.src, self.dest) + cmd = '%s -o "%s"' % (self.cmd_path, self.src) + if self.opts: + cmd += ' ' + ' '.join(self.opts) + if self.includes: + cmd += ' "' + '" "'.join(self.includes) + '"' + # We don't need to handle excluded files, since we simply do not include them +# if self.excludes: +# cmd += ' -x ' + ' '.join(self.excludes) + cmd += ' -d "%s"' % self.dest rc, out, err = self.module.run_command(cmd) return dict(cmd=cmd, rc=rc, out=out, err=err) @@ -145,10 +477,13 @@ class ZipArchive(object): # class to handle gzipped tar files class TgzArchive(object): - def __init__(self, src, dest, module): + def __init__(self, src, dest, file_args, module): self.src = src self.dest = dest + self.file_args = file_args + self.opts = module.params['extra_opts'] self.module = module + self.excludes = [ path.rstrip('/') for path in self.module.params['exclude']] # Prefer gtar (GNU tar) as it supports the compression options -zjJ self.cmd_path = self.module.get_bin_path('gtar', None) if not self.cmd_path: @@ -162,49 +497,77 @@ class TgzArchive(object): if self._files_in_archive and not force_refresh: return self._files_in_archive - cmd = '%s -t%sf "%s"' % (self.cmd_path, self.zipflag, self.src) + cmd = '%s -t%s' % (self.cmd_path, self.zipflag) + if self.opts: + cmd += ' ' + ' '.join(self.opts) + if self.excludes: + cmd += ' --exclude="' + '" --exclude="'.join(self.excludes) + '"' + cmd += ' -f "%s"' % self.src rc, out, err = self.module.run_command(cmd) if rc != 0: raise UnarchiveError('Unable to list files in the archive') for filename in out.splitlines(): - if filename: + if filename and filename not in self.excludes: self._files_in_archive.append(filename) return self._files_in_archive - def is_unarchived(self, mode, owner, group): - cmd = '%s -C "%s" --diff -%sf "%s"' % (self.cmd_path, self.dest, self.zipflag, self.src) + def is_unarchived(self): + cmd = '%s -C "%s" -d%s' % (self.cmd_path, self.dest, self.zipflag) + if self.opts: + cmd += ' ' + ' '.join(self.opts) + if self.file_args['owner']: + cmd += ' --owner="%s"' % self.file_args['owner'] + if self.file_args['group']: + cmd += ' --group="%s"' % self.file_args['group'] + if self.file_args['mode']: + cmd += ' --mode="%s"' % self.file_args['mode'] + if self.module.params['keep_newer']: + cmd += ' --keep-newer-files' + if self.excludes: + cmd += ' --exclude="' + '" --exclude="'.join(self.excludes) + '"' + cmd += ' -f "%s"' % self.src rc, out, err = self.module.run_command(cmd) - unarchived = (rc == 0) - if not unarchived: - # Check whether the differences are in something that we're - # setting anyway - # What will be set - to_be_set = set() - for perm in (('Mode', mode), ('Gid', group), ('Uid', owner)): - if perm[1] is not None: - to_be_set.add(perm[0]) + # Check whether the differences are in something that we're + # setting anyway - # What is different - changes = set() - if err: - # Assume changes if anything returned on stderr - # * Missing files are known to trigger this - return dict(unarchived=unarchived, rc=rc, out=out, err=err, cmd=cmd) - for line in out.splitlines(): - match = DIFFERENCE_RE.search(line) - if not match: - # Unknown tar output. Assume we have changes - return dict(unarchived=unarchived, rc=rc, out=out, err=err, cmd=cmd) - changes.add(match.groups()[0]) - - if changes and changes.issubset(to_be_set): - unarchived = True + # What is different + unarchived = True + old_out = out + out = '' + run_uid = os.getuid() + # When unarchiving as a user, or when owner/group/mode is supplied --diff is insufficient + # Only way to be sure is to check request with what is on disk (as we do for zip) + # Leave this up to set_fs_attributes_if_different() instead of inducing a (false) change + for line in old_out.splitlines() + err.splitlines(): + if run_uid == 0 and not self.file_args['owner'] and OWNER_DIFF_RE.search(line): + out += line + '\n' + if run_uid == 0 and not self.file_args['group'] and GROUP_DIFF_RE.search(line): + out += line + '\n' + if not self.file_args['mode'] and MODE_DIFF_RE.search(line): + out += line + '\n' + if MISSING_FILE_RE.search(line): + out += line + '\n' + if out: + unarchived = False return dict(unarchived=unarchived, rc=rc, out=out, err=err, cmd=cmd) def unarchive(self): - cmd = '%s -x%sf "%s"' % (self.cmd_path, self.zipflag, self.src) + cmd = '%s -C "%s" -x%s' % (self.cmd_path, self.dest, self.zipflag) + if self.opts: + cmd += ' ' + ' '.join(self.opts) + if self.file_args['owner']: + cmd += ' --owner="%s"' % self.file_args['owner'] + if self.file_args['group']: + cmd += ' --group="%s"' % self.file_args['group'] + if self.file_args['mode']: + cmd += ' --mode="%s"' % self.file_args['mode'] + if self.module.params['keep_newer']: + cmd += ' --keep-newer-files' + if self.excludes: + cmd += ' --exclude="' + '" --exclude="'.join(self.excludes) + '"' + cmd += ' -f "%s"' % (self.src) rc, out, err = self.module.run_command(cmd, cwd=self.dest) return dict(cmd=cmd, rc=rc, out=out, err=err) @@ -224,30 +587,30 @@ class TgzArchive(object): # class to handle tar files that aren't compressed class TarArchive(TgzArchive): - def __init__(self, src, dest, module): - super(TarArchive, self).__init__(src, dest, module) + def __init__(self, src, dest, file_args, module): + super(TarArchive, self).__init__(src, dest, file_args, module) self.zipflag = '' # class to handle bzip2 compressed tar files class TarBzipArchive(TgzArchive): - def __init__(self, src, dest, module): - super(TarBzipArchive, self).__init__(src, dest, module) + def __init__(self, src, dest, file_args, module): + super(TarBzipArchive, self).__init__(src, dest, file_args, module) self.zipflag = 'j' # class to handle xz compressed tar files class TarXzArchive(TgzArchive): - def __init__(self, src, dest, module): - super(TarXzArchive, self).__init__(src, dest, module) + def __init__(self, src, dest, file_args, module): + super(TarXzArchive, self).__init__(src, dest, file_args, module) self.zipflag = 'J' # try handlers in order and return the one that works or bail if none work -def pick_handler(src, dest, module): +def pick_handler(src, dest, file_args, module): handlers = [TgzArchive, ZipArchive, TarArchive, TarBzipArchive, TarXzArchive] for handler in handlers: - obj = handler(src, dest, module) + obj = handler(src, dest, file_args, module) if obj.can_handle_archive(): return obj module.fail_json(msg='Failed to find handler for "%s". Make sure the required command to extract the file is installed.' % src) @@ -262,23 +625,26 @@ def main(): dest = dict(required=True, type='path'), copy = dict(default=True, type='bool'), creates = dict(required=False, type='path'), - list_files = dict(required=False, default=False, type='bool'), + list_files = dict(required=False, default=False, type='bool'), + keep_newer = dict(required=False, default=False, type='bool'), + exclude = dict(requited=False, default=[], type='list'), + extra_opts = dict(required=False, default=[], type='list'), ), - add_file_common_args=True, + add_file_common_args = True, +# supports_check_mode = True, ) - src = module.params['src'] - dest = module.params['dest'] + src = os.path.expanduser(module.params['src']) + dest = os.path.expanduser(module.params['dest']) copy = module.params['copy'] file_args = module.load_file_common_arguments(module.params) - # did tar file arrive? if not os.path.exists(src): if copy: module.fail_json(msg="Source '%s' failed to transfer" % src) # If copy=false, and src= contains ://, try and download the file to a temp directory. elif '://' in src: - tempdir = os.path.dirname(__file__) + tempdir = os.path.dirname(os.path.realpath(__file__)) package = os.path.join(tempdir, str(src.rsplit('/', 1)[1])) try: rsp, info = fetch_url(module, src) @@ -314,14 +680,17 @@ def main(): if not os.path.isdir(dest): module.fail_json(msg="Destination '%s' is not a directory" % dest) - handler = pick_handler(src, dest, module) + handler = pick_handler(src, dest, file_args, module) res_args = dict(handler=handler.__class__.__name__, dest=dest, src=src) # do we need to do unpack? - res_args['check_results'] = handler.is_unarchived(file_args['mode'], - file_args['owner'], file_args['group']) - if res_args['check_results']['unarchived']: + check_results = handler.is_unarchived() + + # DEBUG +# res_args['check_results'] = check_results + + if check_results['unarchived']: res_args['changed'] = False else: # do the unpack @@ -334,13 +703,18 @@ def main(): else: res_args['changed'] = True - # do we need to change perms? - for filename in handler.files_in_archive: - file_args['path'] = os.path.join(dest, filename) - try: - res_args['changed'] = module.set_fs_attributes_if_different(file_args, res_args['changed']) - except (IOError, OSError), e: - module.fail_json(msg="Unexpected error when accessing exploded file: %s" % str(e)) + if check_results.get('diff', False): + res_args['diff'] = { 'prepared': check_results['diff'] } + + # Run only if we found differences (idempotence) or diff was missing + if res_args.get('diff', True): + # do we need to change perms? + for filename in handler.files_in_archive: + file_args['path'] = os.path.join(dest, filename) + try: + res_args['changed'] = module.set_fs_attributes_if_different(file_args, res_args['changed']) + except (IOError, OSError), e: + module.fail_json(msg="Unexpected error when accessing exploded file: %s" % str(e)) if module.params['list_files']: res_args['files'] = handler.files_in_archive