203 lines
6.9 KiB
Python
203 lines
6.9 KiB
Python
import collections
|
|
import time
|
|
import logging
|
|
from base64 import b64encode
|
|
|
|
from django.conf import settings
|
|
from kubernetes import client, config
|
|
from django.utils.functional import cached_property
|
|
|
|
from awx.main.utils.common import parse_yaml_or_json
|
|
|
|
logger = logging.getLogger('awx.main.scheduler')
|
|
|
|
|
|
def deepmerge(a, b):
|
|
"""
|
|
Merge dict structures and return the result.
|
|
|
|
>>> a = {'first': {'all_rows': {'pass': 'dog', 'number': '1'}}}
|
|
>>> b = {'first': {'all_rows': {'fail': 'cat', 'number': '5'}}}
|
|
>>> import pprint; pprint.pprint(deepmerge(a, b))
|
|
{'first': {'all_rows': {'fail': 'cat', 'number': '5', 'pass': 'dog'}}}
|
|
"""
|
|
if isinstance(a, dict) and isinstance(b, dict):
|
|
return dict([(k, deepmerge(a.get(k), b.get(k)))
|
|
for k in set(a.keys()).union(b.keys())])
|
|
elif b is None:
|
|
return a
|
|
else:
|
|
return b
|
|
|
|
|
|
class PodManager(object):
|
|
|
|
def __init__(self, task=None):
|
|
self.task = task
|
|
|
|
def deploy(self):
|
|
if not self.credential.kubernetes:
|
|
raise RuntimeError('Pod deployment cannot occur without a Kubernetes credential')
|
|
|
|
self.kube_api.create_namespaced_pod(body=self.pod_definition,
|
|
namespace=self.namespace,
|
|
_request_timeout=settings.AWX_CONTAINER_GROUP_K8S_API_TIMEOUT)
|
|
|
|
num_retries = settings.AWX_CONTAINER_GROUP_POD_LAUNCH_RETRIES
|
|
for retry_attempt in range(num_retries - 1):
|
|
logger.debug(f"Checking for pod {self.pod_name}. Attempt {retry_attempt + 1} of {num_retries}")
|
|
pod = self.kube_api.read_namespaced_pod(name=self.pod_name,
|
|
namespace=self.namespace,
|
|
_request_timeout=settings.AWX_CONTAINER_GROUP_K8S_API_TIMEOUT)
|
|
if pod.status.phase != 'Pending':
|
|
break
|
|
else:
|
|
logger.debug(f"Pod {self.pod_name} is Pending.")
|
|
time.sleep(settings.AWX_CONTAINER_GROUP_POD_LAUNCH_RETRY_DELAY)
|
|
continue
|
|
|
|
if pod.status.phase == 'Running':
|
|
logger.debug(f"Pod {self.pod_name} is online.")
|
|
return pod
|
|
else:
|
|
logger.warn(f"Pod {self.pod_name} did not start. Status is {pod.status.phase}.")
|
|
|
|
@classmethod
|
|
def list_active_jobs(self, instance_group):
|
|
task = collections.namedtuple('Task', 'id instance_group')(
|
|
id='',
|
|
instance_group=instance_group
|
|
)
|
|
pm = PodManager(task)
|
|
try:
|
|
for pod in pm.kube_api.list_namespaced_pod(
|
|
pm.namespace,
|
|
label_selector='ansible-awx={}'.format(settings.INSTALL_UUID)
|
|
).to_dict().get('items', []):
|
|
job = pod['metadata'].get('labels', {}).get('ansible-awx-job-id')
|
|
if job:
|
|
try:
|
|
yield int(job)
|
|
except ValueError:
|
|
pass
|
|
except Exception:
|
|
logger.exception('Failed to list pods for container group {}'.format(instance_group))
|
|
|
|
def delete(self):
|
|
return self.kube_api.delete_namespaced_pod(name=self.pod_name,
|
|
namespace=self.namespace,
|
|
_request_timeout=settings.AWX_CONTAINER_GROUP_K8S_API_TIMEOUT)
|
|
|
|
@property
|
|
def namespace(self):
|
|
return self.pod_definition['metadata']['namespace']
|
|
|
|
@property
|
|
def credential(self):
|
|
return self.task.instance_group.credential
|
|
|
|
@cached_property
|
|
def kube_config(self):
|
|
return generate_tmp_kube_config(self.credential, self.namespace)
|
|
|
|
@cached_property
|
|
def kube_api(self):
|
|
# this feels a little janky, but it's what k8s' own code does
|
|
# internally when it reads kube config files from disk:
|
|
# https://github.com/kubernetes-client/python-base/blob/0b208334ef0247aad9afcaae8003954423b61a0d/config/kube_config.py#L643
|
|
loader = config.kube_config.KubeConfigLoader(
|
|
config_dict=self.kube_config
|
|
)
|
|
cfg = type.__call__(client.Configuration)
|
|
loader.load_and_set(cfg)
|
|
return client.CoreV1Api(api_client=client.ApiClient(
|
|
configuration=cfg
|
|
))
|
|
|
|
@property
|
|
def pod_name(self):
|
|
return f"awx-job-{self.task.id}"
|
|
|
|
@property
|
|
def pod_definition(self):
|
|
default_pod_spec = {
|
|
"apiVersion": "v1",
|
|
"kind": "Pod",
|
|
"metadata": {
|
|
"namespace": settings.AWX_CONTAINER_GROUP_DEFAULT_NAMESPACE
|
|
},
|
|
"spec": {
|
|
"containers": [{
|
|
"image": settings.AWX_CONTAINER_GROUP_DEFAULT_IMAGE,
|
|
"tty": True,
|
|
"stdin": True,
|
|
"imagePullPolicy": "Always",
|
|
"args": [
|
|
'sleep', 'infinity'
|
|
]
|
|
}]
|
|
}
|
|
}
|
|
|
|
pod_spec_override = {}
|
|
if self.task and self.task.instance_group.pod_spec_override:
|
|
pod_spec_override = parse_yaml_or_json(
|
|
self.task.instance_group.pod_spec_override)
|
|
pod_spec = {**default_pod_spec, **pod_spec_override}
|
|
|
|
if self.task:
|
|
pod_spec['metadata'] = deepmerge(
|
|
pod_spec.get('metadata', {}),
|
|
dict(name=self.pod_name,
|
|
labels={
|
|
'ansible-awx': settings.INSTALL_UUID,
|
|
'ansible-awx-job-id': str(self.task.id)
|
|
}))
|
|
pod_spec['spec']['containers'][0]['name'] = self.pod_name
|
|
|
|
return pod_spec
|
|
|
|
|
|
def generate_tmp_kube_config(credential, namespace):
|
|
host_input = credential.get_input('host')
|
|
config = {
|
|
"apiVersion": "v1",
|
|
"kind": "Config",
|
|
"preferences": {},
|
|
"clusters": [
|
|
{
|
|
"name": host_input,
|
|
"cluster": {
|
|
"server": host_input
|
|
}
|
|
}
|
|
],
|
|
"users": [
|
|
{
|
|
"name": host_input,
|
|
"user": {
|
|
"token": credential.get_input('bearer_token')
|
|
}
|
|
}
|
|
],
|
|
"contexts": [
|
|
{
|
|
"name": host_input,
|
|
"context": {
|
|
"cluster": host_input,
|
|
"user": host_input,
|
|
"namespace": namespace
|
|
}
|
|
}
|
|
],
|
|
"current-context": host_input
|
|
}
|
|
|
|
if credential.get_input('verify_ssl') and 'ssl_ca_cert' in credential.inputs:
|
|
config["clusters"][0]["cluster"]["certificate-authority-data"] = b64encode(
|
|
credential.get_input('ssl_ca_cert').encode() # encode to bytes
|
|
).decode() # decode the base64 data into a str
|
|
else:
|
|
config["clusters"][0]["cluster"]["insecure-skip-tls-verify"] = True
|
|
return config
|