docker.images/ansible.awx/awx-17.1.0/awx/main/scheduler/kubernetes.py

203 lines
6.9 KiB
Python

import collections
import time
import logging
from base64 import b64encode
from django.conf import settings
from kubernetes import client, config
from django.utils.functional import cached_property
from awx.main.utils.common import parse_yaml_or_json
logger = logging.getLogger('awx.main.scheduler')
def deepmerge(a, b):
"""
Merge dict structures and return the result.
>>> a = {'first': {'all_rows': {'pass': 'dog', 'number': '1'}}}
>>> b = {'first': {'all_rows': {'fail': 'cat', 'number': '5'}}}
>>> import pprint; pprint.pprint(deepmerge(a, b))
{'first': {'all_rows': {'fail': 'cat', 'number': '5', 'pass': 'dog'}}}
"""
if isinstance(a, dict) and isinstance(b, dict):
return dict([(k, deepmerge(a.get(k), b.get(k)))
for k in set(a.keys()).union(b.keys())])
elif b is None:
return a
else:
return b
class PodManager(object):
def __init__(self, task=None):
self.task = task
def deploy(self):
if not self.credential.kubernetes:
raise RuntimeError('Pod deployment cannot occur without a Kubernetes credential')
self.kube_api.create_namespaced_pod(body=self.pod_definition,
namespace=self.namespace,
_request_timeout=settings.AWX_CONTAINER_GROUP_K8S_API_TIMEOUT)
num_retries = settings.AWX_CONTAINER_GROUP_POD_LAUNCH_RETRIES
for retry_attempt in range(num_retries - 1):
logger.debug(f"Checking for pod {self.pod_name}. Attempt {retry_attempt + 1} of {num_retries}")
pod = self.kube_api.read_namespaced_pod(name=self.pod_name,
namespace=self.namespace,
_request_timeout=settings.AWX_CONTAINER_GROUP_K8S_API_TIMEOUT)
if pod.status.phase != 'Pending':
break
else:
logger.debug(f"Pod {self.pod_name} is Pending.")
time.sleep(settings.AWX_CONTAINER_GROUP_POD_LAUNCH_RETRY_DELAY)
continue
if pod.status.phase == 'Running':
logger.debug(f"Pod {self.pod_name} is online.")
return pod
else:
logger.warn(f"Pod {self.pod_name} did not start. Status is {pod.status.phase}.")
@classmethod
def list_active_jobs(self, instance_group):
task = collections.namedtuple('Task', 'id instance_group')(
id='',
instance_group=instance_group
)
pm = PodManager(task)
try:
for pod in pm.kube_api.list_namespaced_pod(
pm.namespace,
label_selector='ansible-awx={}'.format(settings.INSTALL_UUID)
).to_dict().get('items', []):
job = pod['metadata'].get('labels', {}).get('ansible-awx-job-id')
if job:
try:
yield int(job)
except ValueError:
pass
except Exception:
logger.exception('Failed to list pods for container group {}'.format(instance_group))
def delete(self):
return self.kube_api.delete_namespaced_pod(name=self.pod_name,
namespace=self.namespace,
_request_timeout=settings.AWX_CONTAINER_GROUP_K8S_API_TIMEOUT)
@property
def namespace(self):
return self.pod_definition['metadata']['namespace']
@property
def credential(self):
return self.task.instance_group.credential
@cached_property
def kube_config(self):
return generate_tmp_kube_config(self.credential, self.namespace)
@cached_property
def kube_api(self):
# this feels a little janky, but it's what k8s' own code does
# internally when it reads kube config files from disk:
# https://github.com/kubernetes-client/python-base/blob/0b208334ef0247aad9afcaae8003954423b61a0d/config/kube_config.py#L643
loader = config.kube_config.KubeConfigLoader(
config_dict=self.kube_config
)
cfg = type.__call__(client.Configuration)
loader.load_and_set(cfg)
return client.CoreV1Api(api_client=client.ApiClient(
configuration=cfg
))
@property
def pod_name(self):
return f"awx-job-{self.task.id}"
@property
def pod_definition(self):
default_pod_spec = {
"apiVersion": "v1",
"kind": "Pod",
"metadata": {
"namespace": settings.AWX_CONTAINER_GROUP_DEFAULT_NAMESPACE
},
"spec": {
"containers": [{
"image": settings.AWX_CONTAINER_GROUP_DEFAULT_IMAGE,
"tty": True,
"stdin": True,
"imagePullPolicy": "Always",
"args": [
'sleep', 'infinity'
]
}]
}
}
pod_spec_override = {}
if self.task and self.task.instance_group.pod_spec_override:
pod_spec_override = parse_yaml_or_json(
self.task.instance_group.pod_spec_override)
pod_spec = {**default_pod_spec, **pod_spec_override}
if self.task:
pod_spec['metadata'] = deepmerge(
pod_spec.get('metadata', {}),
dict(name=self.pod_name,
labels={
'ansible-awx': settings.INSTALL_UUID,
'ansible-awx-job-id': str(self.task.id)
}))
pod_spec['spec']['containers'][0]['name'] = self.pod_name
return pod_spec
def generate_tmp_kube_config(credential, namespace):
host_input = credential.get_input('host')
config = {
"apiVersion": "v1",
"kind": "Config",
"preferences": {},
"clusters": [
{
"name": host_input,
"cluster": {
"server": host_input
}
}
],
"users": [
{
"name": host_input,
"user": {
"token": credential.get_input('bearer_token')
}
}
],
"contexts": [
{
"name": host_input,
"context": {
"cluster": host_input,
"user": host_input,
"namespace": namespace
}
}
],
"current-context": host_input
}
if credential.get_input('verify_ssl') and 'ssl_ca_cert' in credential.inputs:
config["clusters"][0]["cluster"]["certificate-authority-data"] = b64encode(
credential.get_input('ssl_ca_cert').encode() # encode to bytes
).decode() # decode the base64 data into a str
else:
config["clusters"][0]["cluster"]["insecure-skip-tls-verify"] = True
return config