Almost a complete rewrite to piggyback on ssh.py

This decreased the size of the plugin considerably, and I don't have to
account for every little corner case in the upstream ssh.py plugin.
This commit is contained in:
Jorge Gallegos 2025-09-02 17:32:36 -07:00
commit d6e4860eba
No known key found for this signature in database
2 changed files with 360 additions and 479 deletions

View file

@ -1,479 +0,0 @@
# Copyright (c) 202/ Red Hat
# GNU General Public License v3.0+ https://www.gnu.org/licenses/gpl-3.0.txt
from __future__ import absolute_import, annotations
import os
import os.path as ospath
import pty
import shlex
import shutil
import subprocess
import typing as T
import ansible.errors as errors
import ansible.module_utils.common.text.converters as converters
import ansible.plugins.connection as connection
import ansible.utils.display as display
DOCUMENTATION = """
author: Jorge A Gallegos <jgallego@redhat.com>
short_description: Run tasks via Google Cloud's CLI
description:
- use the `gcloud` CLI command to connect and copy files
- see https://cloud.google.com/sdk/gcloud/reference/compute/ssh and
https://cloud.google.com/sdk/gcloud/reference/compute/scp for details
- this connection plugin relies on `gcloud` to be available in your
PATH and for authentication to be done prior to usage
options:
instance:
required: true
description:
- The name of the instance to connect to.
type: string
vars:
- name: inventory_hostname
- name: ansible_host
- name: ansible_ssh_host
- name: ansible_gcloud_instance
remote_user:
required: true
description:
- The user to log in as.
type: string
vars:
- name: ansible_user
- name: ansible_ssh_user
- name: ansible_gcloud_user
env:
- name: ANSIBLE_REMOTE_USER
- name: ANSIBLE_GCLOUD_REMOTE_USER
ini:
- section: defaults
key: remote_user
- section: gcloud
key: remote_user
cli:
- name: user
keyword:
- name: remote_user
gcloud_executable:
description:
- Path to the gcloud executable, defaults to whatever is found in
the PATH environment variable
type: string
vars:
- name: ansible_gcloud_executable
ini:
- section: gcloud
key: executable
cli:
- name: gcloud_executable
option: --gcloud-executable
configuration:
description:
- File name of the configuration to use for this command invocation
type: string
vars:
- name: ansible_gcloud_configuration
ini:
- section: gcloud
key: configuration
env:
- name: CLOUDSDK_ACTIVE_CONFIG_NAME
cli:
- name: configuration
option: --gcloud-configuration
project:
required: true
description:
- The Google Cloud project ID to use for this invocation.
If omitted, then the current project is assumed
type: string
vars:
- name: ansible_gcloud_project
ini:
- section: gcloud
key: project
env:
- name: CLOUDSDK_PROJECT_ID
cli:
- name: project
option: --gcloud-project
zone:
required: true
description:
- Configures the zone to use when connecting
type: string
vars:
- name: ansible_gcloud_zone
ini:
- section: gcloud
key: zone
env:
- name: CLOUDSDK_COMPUTE_ZONE
cli:
- name: zone
option: --gcloud-zone
private_key_file:
description:
- The path to the SSH key file. By default,
this is ~/.ssh/google_compute_engine
type: string
vars:
- name: ansible_private_key_file
- name: ansible_ssh_private_key_file
- name: ansible_gcloud_private_key_file
env:
- name: ANSIBLE_PRIVATE_KEY_FILE
- name: ANSIBLE_GCLOUD_PRIVATE_KEY_FILE
ini:
- section: defaults
key: private_key_file
- section: gcloud
key: private_key_file
cli:
- name: private_key_file
option: --gcloud-private-key
default: ~/.ssh/google_compute_engine
use_tty:
description: add -tt to ssh commands to force tty allocation
type: bool
default: true
ini:
- section: ssh_connection
key: usetty
- section: gcloud
key: usetty
env:
- name: ANSIBLE_SSH_USE_TTY
vars:
- name: ansible_ssh_use_tty
- name: ansible_gcloud_use_tty
timeout:
description:
- This is the default amount of time we will wait while establishing
a connection.
- This also controls how long we can wait to access reading the
connection once established.
default: 10
type: int
env:
- name: ANSIBLE_TIMEOUT
- name: ANSIBLE_SSH_TIMEOUT
- name: ANSIBLE_GCLOUD_TIMEOUT
ini:
- section: defaults
key: timeout
- section: ssh_connection
key: timeout
- section: gcloud
key: timeout
vars:
- name: ansible_ssh_timeout
- name: ansible_gcloud_timeout
cli:
- name: timeout
ssh_args:
description: Arguments to pass to all SSH CLI tools.
type: string
default: '-C -o ControlMaster=auto -o ControlPersist=60s'
ini:
- section: ssh_connection
key: ssh_args
- section: gcloud
key: ssh_args
env:
- name: ANSIBLE_SSH_ARGS
vars:
- name: ansible_ssh_args
ssh_extra_args:
description:
- Extra arguments exclusive to SSH
type: string
vars:
- name: ansible_ssh_extra_args
env:
- name: ANSIBLE_SSH_EXTRA_ARGS
ini:
- section: ssh_connection
key: ssh_extra_args
- section: gcloud
key: ssh_extra_args
cli:
- name: ssh_extra_args
scp_extra_args:
description: Extra exclusive to SCP
type: string
vars:
- name: ansible_scp_extra_args
env:
- name: ANSIBLE_SCP_EXTRA_ARGS
ini:
- section: ssh_connection
key: scp_extra_args
- section: gcloud
key: scp_extra_args
cli:
- name: scp_extra_args
"""
D = display.Display()
DEFAULT_TIMEOUT: int = 10
DEFAULT_GCLOUD: T.Optional[str] = shutil.which("gcloud")
class Connection(connection.ConnectionBase):
"""Connections via `gcloud compute ssh`"""
gcloud_executable: T.Optional[str] = None
has_pipelining = False
transport = "gcloud-ssh" # type: ignore[override]
def __init__(self, *args: T.Any, **kwargs: T.Any) -> None:
super(Connection, self).__init__(*args, **kwargs)
exec: T.Optional[str] = self.get_option("gcloud_executable")
if exec is None:
self.gcloud_executable = DEFAULT_GCLOUD
else:
self.gcloud_executable = exec
if self.gcloud_executable is None:
raise errors.AnsiblePluginError(
"Plugin Error: no gcloud binary found in $PATH and "
"no executable defined in ansible config"
)
def _connect(self) -> Connection:
"""connect to the instance using gcloud compute ssh"""
return self
def close(self) -> None:
"""mark connection as closed"""
self._connected = False
def _build_flags_for(self, what: str) -> T.List[str]:
flags: T.List[str] = []
args: str = self.ssh_args or ""
args += f" -o ConnectTimeout={self.timeout} "
if what == "ssh":
args += self.ssh_extra_args or ""
elif what == "scp":
args += self.scp_extra_args or ""
pieces: T.List[str] = shlex.split(args.strip())
flag: str = ""
for piece in pieces:
if flag == "" and piece.startswith("-"):
# start of a flag
flag = piece
continue
elif flag != "" and piece.startswith("-"):
# we encountered a flag after another flag
# append the previous flag first and continue
flags.append(f"--{what}-flag={flag}")
flag = piece
continue
elif not piece.startswith("-"):
# this is the argument to the ongoing flag
# concatenate and append
flag += " " + piece
flags.append(f"--{what}-flag={flag}")
flag = ""
else:
pass
# if there are any remnant flags, add them
if flag != "":
flags.append(f"--{what}-flag={flag}")
D.vvvvv(f"flags: {flags}", host=self.host)
return flags
def _build_command(
self,
what: str,
cmd: str,
in_path: T.Optional[str] = None, # only used for scp
out_path: T.Optional[str] = None, # only used for scp
) -> T.List[str]:
parts: T.List[str]
parts = [
self.gcloud_executable or "", # to silence pyright
"compute",
what,
f"--project={self.gcp_project}",
f"--zone={self.gcp_zone}",
f"--ssh-key-file={self.private_key_file}",
"--quiet", # no prompts
"--no-user-output-enabled", # no extra gcloud output
"--tunnel-through-iap",
]
if self.gcp_configuration is not None:
parts.append(f"--configuration={self.gcp_configuration}")
parts.extend(self._build_flags_for(what))
# handle options for ssh only
if what == "ssh":
if self.use_tty:
parts.append("--ssh-flag=-tt")
parts.extend([f"{self.user}@{self.host}", "--", cmd])
elif what == "scp":
parts.append("--compress")
if cmd == "put":
parts.append(str(in_path))
parts.append(f"{self.user}@{self.host}:{out_path}")
elif cmd == "get":
parts.append(f"{self.user}@{self.host}:{in_path}")
parts.append(str(out_path))
return parts
def _run(
self,
cmd: list[str],
in_data: T.Optional[bytes],
sudoable: bool = True,
checkrc: bool = True,
) -> tuple[int, bytes, bytes]:
D.vvv(f"EXEC: {shlex.join(cmd)}", host=self.host)
D.vvvvv("running command with Popen()", host=self.host)
# I could just not open a pty and be done with it, because I am
# not writing the whole pipelining and keyboard interaction just now.
# It may come at a later date so I am just laying the groundwork
p: T.Optional[subprocess.Popen[bytes]] = None
master_fd: int = 0
slave_fd: int = 0
if in_data is None: # attempt to open a pty
try:
master_fd, slave_fd = pty.openpty()
p = subprocess.Popen(
cmd,
stdin=slave_fd,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
)
# we don't need the input half to be open
os.close(slave_fd)
D.vvvvv("created pty subprocess", host=self.host)
except OSError:
D.vvvvv("failed to create pty", host=self.host)
if p is None: # fallback to non-pty
try:
p = subprocess.Popen(
cmd,
stdin=subprocess.PIPE,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
)
D.vvvvv("created non-pty subprocess", host=self.host)
except OSError as ose:
raise errors.AnsibleError(
"Unable to execute ssh command on controller."
) from ose
D.vvvvv("done running command with Popen()", host=self.host)
D.vvvvv("getting output with communicate()", host=self.host)
if p is not None:
# output: t.Tuple[t.Optional[bytes], t.Optional[bytes]] = p.communicate()
stdout, stderr = p.communicate()
D.vvvvv("done getting output with communicate()", host=self.host)
# close the last half of the pty if created
if master_fd != 0:
os.close(master_fd)
D.debug(f"stdout: >>>>>{stdout}<<<<<", host=self.host)
D.debug(f"stderr: >>>>>{stderr}<<<<<", host=self.host)
return (p.returncode, stdout, stderr)
def exec_command(
self, cmd: str, in_data: T.Optional[bytes] = None, sudoable: bool = True
) -> tuple[int, bytes, bytes]:
"""run a command on the remote instance"""
super(Connection, self).exec_command(cmd, in_data=in_data, sudoable=sudoable)
self.host: T.Optional[str] = self.get_option("instance")
self.user: T.Optional[str] = self.get_option("remote_user")
self.ssh_args: T.Optional[str] = self.get_option("ssh_args")
self.ssh_extra_args: T.Optional[str] = self.get_option("ssh_extra_args")
self.scp_extra_args: T.Optional[str] = self.get_option("scp_extra_args")
self.private_key_file: T.Optional[str] = self.get_option("private_key_file")
self.use_tty: bool = bool(self.get_option("use_tty"))
if self.private_key_file is not None: # to silence pyright
self.private_key_file = ospath.abspath(
ospath.expanduser(self.private_key_file)
)
self.gcp_configuration: T.Optional[str] = self.get_option("configuration")
self.gcp_project: T.Optional[str] = self.get_option("project")
self.gcp_zone: T.Optional[str] = self.get_option("zone")
self.timeout: int = int(self.get_option("timeout") or DEFAULT_TIMEOUT)
display.Display().vvv(
f"GCLOUD SSH CONNECTION FOR USER: {self.user}", host=self.host
)
full_cmd: T.List[str] = self._build_command("ssh", cmd)
return self._run(cmd=full_cmd, in_data=in_data, sudoable=sudoable)
def _transfer_file(
self, in_path: str, out_path: str, cmd: str
) -> tuple[int, bytes, bytes]:
"""Common reads for both put and fetch"""
self.host: T.Optional[str] = self.get_option("instance")
self.user: T.Optional[str] = self.get_option("remote_user")
self.ssh_args: T.Optional[str] = self.get_option("ssh_args")
self.scp_extra_args: T.Optional[str] = self.get_option("scp_extra_args")
self.private_key_file: T.Optional[str] = self.get_option("private_key_file")
if self.private_key_file is not None: # to silence pyright
self.private_key_file = ospath.abspath(
ospath.expanduser(self.private_key_file)
)
self.gcp_configuration: T.Optional[str] = self.get_option("configuration")
self.gcp_project: T.Optional[str] = self.get_option("project")
self.gcp_zone: T.Optional[str] = self.get_option("zone")
self.timeout: int = int(self.get_option("timeout") or DEFAULT_TIMEOUT)
D.vvv(f"{cmd.upper()}: {in_path} TO {out_path}", host=self.host)
if not ospath.exists(
converters.to_bytes(in_path, errors="surrogate_or_strict")
):
raise errors.AnsibleFileNotFound(
f"File or module does not exist: {converters.to_native(in_path)}"
)
full_cmd: T.List[str] = self._build_command("scp", cmd, in_path, out_path)
return self._run(full_cmd, None)
def put_file(self, in_path: str, out_path: str) -> tuple[int, bytes, bytes]: # type: ignore[override]
"""uploads a file to the cloud instance"""
super(Connection, self).put_file(in_path, out_path)
return self._transfer_file(in_path, out_path, "put")
def fetch_file(self, in_path: str, out_path: str) -> None:
"""downloads a file from the cloud instance"""
super(Connection, self).fetch_file(in_path, out_path)
return self._transfer_file(in_path, out_path, "get")

360
plugins/connection/iap.py Normal file
View file

@ -0,0 +1,360 @@
# Copyright (c) 2025 Red Hat
# GNU General Public License v3.0+ https://www.gnu.org/licenses/gpl-3.0.txt
from __future__ import absolute_import, annotations
import os
import os.path as ospath
import re
import pty
import shlex
import select
import shutil
import subprocess
import threading
import time
import yaml
import tempfile
import typing as T
import ansible.plugins.connection.ssh as sshconn
import ansible.errors as errors
import ansible.utils.display as display
_my_opts = {
"gcloud_executable": {
"description": [
"Path to the gcloud executable, defaults to whatever is found in",
"the PATH environment variable",
],
"type": "string",
"vars": [{"name": "ansible_gcloud_executable"}],
"ini": [
{"section": "gcloud", "key": "executable"},
],
},
"gcloud_configuration": {
"description": ["Path to the gcloud configuration file if non default"],
"type": "string",
"vars": [{"name": "ansible_gcloud_configuration"}],
"ini": [{"section": "gcloud", "key": "configuration"}],
"env": [{"name": "CLOUDSDK_ACTIVE_CONFIG_NAME"}],
},
"gcloud_project": {
"description": [
"The Google Cloud project ID to use for this invocation.",
"If omitted, then the current project is assumed",
],
"type": "string",
"vars": [{"name": "ansible_gcloud_project"}],
"ini": [{"section": "gcloud", "key": "project"}],
"env": [{"name": "CLOUDSDK_CORE_PROJECT"}],
},
"gcloud_account": {
"description": ["Google cloud account to use for invocation"],
"type": "string",
"vars": [{"name": "ansible_gcloud_account"}],
"ini": [{"section": "gcloud", "key": "account"}],
"env": [{"name": "CLOUDSDK_CORE_ACCOUNT"}],
},
"gcloud_zone": {
"description": ["The Google Cloud zone to use for the instance(s)"],
"type": "string",
"vars": [{"name": "ansible_gcloud_zone"}],
"ini": [{"section": "gcloud", "key": "zone"}],
"env": [{"name": "CLOUDSDK_COMPUTE_ZONE"}],
},
"gcloud_access_token_file": {
"description": [
"A file to read the access token from. ",
"The credentials of the active account (if exists) will be ignored.",
],
"type": "string",
"vars": [{"name": "ansible_access_token_file"}],
"ini": [{"section": "gcloud", "key": "access_token_file"}],
"env": [{"name": "CLOUDSDK_AUTH_ACCESS_TOKEN_FILE"}],
},
}
# piggy back on top of upstream SSH plugin's docs, this somehow breaks ansible-doc but
# it still works for other ansible commands. This is expensive but I don't want to
# duplicate the entire doc string from ssh.py
_doc = yaml.safe_load(sshconn.DOCUMENTATION)
_doc["name"] = "gcloud-iap"
_doc["short_description"] = "connect using Google Cloud's Identity Aware Proxy (IAP)"
_doc["description"] = [
"This connection plugin behaves almost like the stock SSH plugin, ",
"but it creates a new IAP process per host in the inventory so connections ",
"are tunneled through it. ",
"This plugin requires you to have set authentication prior to using it.",
]
_doc["author"] = "Jorge A Gallegos (@kad)"
# Add custom opts for this plugin
_doc["options"].update(_my_opts)
# Change default to stock SSH key used by gcloud
_doc["options"]["private_key_file"]["default"] = "~/.ssh/google_compute_engine"
DOCUMENTATION = yaml.dump(_doc)
D = display.Display()
DEFAULT_GCLOUD: T.Optional[str] = shutil.which("gcloud")
DEFAULT_SSH_PORT: int = 22
PORT_REGEX = re.compile(r"\d+")
class IAP:
host: str
local_port: int
remote_port: int
master_fd: int
up: bool = False
process: T.Optional[subprocess.Popen] = None
thread: T.Optional[threading.Thread] = None
ready: threading.Event = threading.Event()
output: T.List[str] = []
def __init__(
self,
gcloud_bin: str,
host: str,
remote_port: int,
project: T.Optional[str],
account: T.Optional[str],
zone: T.Optional[str],
config: T.Optional[str] = None,
token_file: T.Optional[str] = None,
) -> None:
self.host = host
self.remote_port = remote_port
cmd: T.List[str] = [
gcloud_bin,
"compute",
"start-iap-tunnel",
host,
str(self.remote_port),
]
if config is not None:
cmd.extend(
[
"--configuration",
shlex.quote(ospath.realpath(ospath.expanduser(config.strip()))),
]
)
if project is not None:
cmd.extend(
[
"--project",
shlex.quote(project.strip()),
]
)
if account is not None:
cmd.extend(
[
"--account",
shlex.quote(account.strip()),
]
)
if zone is not None:
cmd.extend(
[
"--zone",
shlex.quote(zone.strip()),
]
)
if token_file is not None:
cmd.extend(
[
"--access-token-file",
shlex.quote(token_file.strip()),
]
)
D.vvv(f"IAP: CMD {' '.join(cmd)}", host=self.host)
try:
# start-iap-tunnel prints 2 lines:
# - Picking local unused port [$PORT].
# - Testing if tunnel connection works.
# and only when the terminal is a pty, a 3rd line:
# - Listening on port [$PORT].
# The last line only displayed after the tunnel has been tested,
# that's why we use a PTY for the subprocess
self.master_fd, slave_fd = pty.openpty()
self.process = subprocess.Popen(
cmd, stdout=slave_fd, stderr=slave_fd, text=True, close_fds=True
)
os.close(slave_fd)
self.thread = threading.Thread(target=self._monitor, daemon=True)
self.thread.start()
D.vvvvv("started IAP thread", host=self.host)
except Exception as e:
self.process = None
raise Exception from e
def _monitor(self) -> None:
"""Monitor the thread handling the IAP subprocess until it is 'up'"""
while self.process is not None and self.process.poll() is None:
rlist, _, _ = select.select([self.master_fd], [], [], 0.1)
if rlist is not None:
try:
output = os.read(self.master_fd, 1024).decode("utf-8")
if output:
for line in output.splitlines():
self.output.append(line)
if line.startswith("Listening on port"):
m = PORT_REGEX.search(line)
if m is not None:
self.local_port = int(m.group())
self.up = True
D.vvv(
f"IAP: LOCAL PORT {self.local_port}",
host=self.host,
)
except OSError: # pty is closed
break
if self.up: # no need to monitor if already up
break
if not self.ready.is_set():
self.ready.set()
os.close(self.master_fd)
def terminate(self) -> None:
"""Gracefully terminate the IAP subprocess"""
D.vvv("IAP: STOPPING TUNNEL", host=self.host)
if self.process is not None and self.process.poll() is None:
try:
self.process.terminate()
self.process.wait(timeout=5) # wait up to 5 seconds to terminate IAP
except subprocess.TimeoutExpired:
self.process.kill()
D.vvvvv("terminated/killed IAP", host=self.host)
if self.thread is not None and self.thread.is_alive():
self.thread.join(timeout=1) # joining thread back should be quick
class Connection(sshconn.Connection):
"""
This is pretty much the same as the upstream ssh plugin, just overloads
the connection handling to start/stop the IAP tunnel with gcloud as appropriate
"""
iaps: dict[str, IAP] = {}
lock: threading.Lock = threading.Lock()
gcloud_executable: T.Optional[str] = None
ssh_config: str
transport = "gcloud-iap" # type: ignore[override]
def __init__(self, *args: T.Any, **kwargs: T.Any) -> None:
super(Connection, self).__init__(*args, **kwargs)
# If the gcloud binary isn't found/configured, bail out immediately
exec: T.Optional[str] = self.get_option("gcloud_executable")
if exec is None:
self.gcloud_executable = DEFAULT_GCLOUD
else:
self.gcloud_executable = exec
if self.gcloud_executable is None:
raise errors.AnsiblePluginError(
"Plugin Error: no gcloud binary found in $PATH and "
"no executable defined in ansible config"
)
# have to trick SSH to connect to localhost instead of the instances
fd, self.ssh_config = tempfile.mkstemp(
suffix="ssh_config", prefix="ansible_gcloud", text=True
)
with open(fd, "w") as fp:
fp.write("Host *\n")
fp.write(" HostName localhost\n")
def _connect(self) -> Connection:
"""Upstream ssh is empty, overload with the stuff starting the IAP tunnel"""
host: T.Optional[str] = self.get_option("host")
project: T.Optional[str] = self.get_option("gcloud_project")
account: T.Optional[str] = self.get_option("gcloud_account")
zone: T.Optional[str] = self.get_option("gcloud_zone")
token_file: T.Optional[str] = self.get_option("gcloud_access_token_file")
config: T.Optional[str] = self.get_option("gcloud_configuration")
port: T.Optional[int] = self.get_option("port")
# this shouldn't happen, but still.
if host is None:
raise errors.AnsibleAssertionError("No host defined")
with self.lock:
if host not in self.iaps:
self.iaps[host] = IAP(
str(self.gcloud_executable),
host=host,
remote_port=int(port or DEFAULT_SSH_PORT),
project=project,
zone=zone,
account=account,
config=config,
token_file=token_file,
)
success = self.iaps[host].ready.wait(timeout=5)
is_up: bool = False
for _ in range(3):
is_up = self.iaps[host].up
if success and is_up:
D.vvv("IAP: TUNNEL IS UP", host=host)
is_up = True
break
else:
time.sleep(0.5)
if not is_up:
D.vvv("IAP: TUNNEL FAILURE", host=host)
for line in self.iaps[host].output:
D.vvvvv(line, host=host)
raise errors.AnsibleRuntimeError("Failure when starting IAP tunnel")
# override port with the random IAP port
self.set_option("port", self.iaps[host].local_port)
# disable host_key_checking because it's impossible to know ports beforehand
self.set_option("host_key_checking", False)
# prepend our generated tiny ssh config to all ssh_args if not already present
if self.ssh_config not in str(self.get_option("ssh_args")):
self.set_option(
"ssh_args", f"-F {self.ssh_config} " + str(self.get_option("ssh_args"))
)
self._connected = True
return self
def close(self) -> None:
"""
Upstream only marks the connection as closed, we have to terminate
all IAP tunnels as well
"""
# Terminate IAP
with self.lock:
for iap in self.iaps.values():
iap.terminate()
self.iaps.clear()
# remove ssh config
os.unlink(self.ssh_config)
self._connected = False