docker.images/ansible.awx/awx-17.1.0/awx/main/analytics/collectors.py

import io
import os
import os.path
import platform

from django.db import connection
from django.db.models import Count
from django.conf import settings
from django.utils.timezone import now
from django.utils.translation import ugettext_lazy as _

from awx.conf.license import get_license
from awx.main.utils import (get_awx_version, get_ansible_version,
                            get_custom_venv_choices, camelcase_to_underscore)
from awx.main import models
from django.contrib.sessions.models import Session
from awx.main.analytics import register

'''
This module is used to define metrics collected by awx.main.analytics.gather()
Each function is decorated with a key name, and should return a data
structure that can be serialized to JSON

@register('something', '1.0')
def something(since):
    # the generated archive will contain a `something.json` w/ this JSON
    return {'some': 'json'}

All functions - when called - will be passed a datetime.datetime object,
`since`, which represents the last time analytics were gathered (some metrics
functions - like those that return metadata about playbook runs, may return
data _since_ the last report date - i.e., new data in the last 24 hours)
'''


@register('config', '1.2', description=_('General platform configuration.'))
def config(since, **kwargs):
    license_info = get_license()
    install_type = 'traditional'
    if os.environ.get('container') == 'oci':
        install_type = 'openshift'
    elif 'KUBERNETES_SERVICE_PORT' in os.environ:
        install_type = 'k8s'
    return {
        'platform': {
            'system': platform.system(),
            'dist': platform.dist(),
            'release': platform.release(),
            'type': install_type,
        },
        'install_uuid': settings.INSTALL_UUID,
        'instance_uuid': settings.SYSTEM_UUID,
        'tower_url_base': settings.TOWER_URL_BASE,
        'tower_version': get_awx_version(),
        'ansible_version': get_ansible_version(),
        'license_type': license_info.get('license_type', 'UNLICENSED'),
        'free_instances': license_info.get('free_instances', 0),
        'total_licensed_instances': license_info.get('instance_count', 0),
        'license_expiry': license_info.get('time_remaining', 0),
        'pendo_tracking': settings.PENDO_TRACKING_STATE,
        'authentication_backends': settings.AUTHENTICATION_BACKENDS,
        'logging_aggregators': settings.LOG_AGGREGATOR_LOGGERS,
        'external_logger_enabled': settings.LOG_AGGREGATOR_ENABLED,
        'external_logger_type': getattr(settings, 'LOG_AGGREGATOR_TYPE', None),
    }


@register('counts', '1.0', description=_('Counts of objects such as organizations, inventories, and projects'))
def counts(since, **kwargs):
    counts = {}
    for cls in (models.Organization, models.Team, models.User,
                models.Inventory, models.Credential, models.Project,
                models.JobTemplate, models.WorkflowJobTemplate,
                models.Host, models.Schedule, models.CustomInventoryScript,
                models.NotificationTemplate):
        counts[camelcase_to_underscore(cls.__name__)] = cls.objects.count()

    venvs = get_custom_venv_choices()
    counts['custom_virtualenvs'] = len([
        v for v in venvs
        if os.path.basename(v.rstrip('/')) != 'ansible'
    ])

    inv_counts = dict(models.Inventory.objects.order_by().values_list('kind').annotate(Count('kind')))
    inv_counts['normal'] = inv_counts.get('', 0)
    inv_counts.pop('', None)
    inv_counts['smart'] = inv_counts.get('smart', 0)
    counts['inventories'] = inv_counts

    counts['unified_job'] = models.UnifiedJob.objects.exclude(launch_type='sync').count() # excludes implicit project_updates
    counts['active_host_count'] = models.Host.objects.active_count()
    active_sessions = Session.objects.filter(expire_date__gte=now()).count()
    active_user_sessions = models.UserSessionMembership.objects.select_related('session').filter(session__expire_date__gte=now()).count()
    active_anonymous_sessions = active_sessions - active_user_sessions
    counts['active_sessions'] = active_sessions
    counts['active_user_sessions'] = active_user_sessions
    counts['active_anonymous_sessions'] = active_anonymous_sessions
    counts['running_jobs'] = models.UnifiedJob.objects.exclude(launch_type='sync').filter(status__in=('running', 'waiting',)).count()
    counts['pending_jobs'] = models.UnifiedJob.objects.exclude(launch_type='sync').filter(status__in=('pending',)).count()
    return counts


@register('org_counts', '1.0', description=_('Counts of users and teams by organization'))
def org_counts(since, **kwargs):
    counts = {}
    for org in models.Organization.objects.annotate(num_users=Count('member_role__members', distinct=True),
                                                    num_teams=Count('teams', distinct=True)).values('name', 'id', 'num_users', 'num_teams'):
        counts[org['id']] = {'name': org['name'],
                             'users': org['num_users'],
                             'teams': org['num_teams']
                             }
    return counts


@register('cred_type_counts', '1.0', description=_('Counts of credentials by credential type'))
def cred_type_counts(since, **kwargs):
    counts = {}
    for cred_type in models.CredentialType.objects.annotate(num_credentials=Count(
                                                            'credentials', distinct=True)).values('name', 'id', 'managed_by_tower', 'num_credentials'):
        counts[cred_type['id']] = {'name': cred_type['name'],
                                   'credential_count': cred_type['num_credentials'],
                                   'managed_by_tower': cred_type['managed_by_tower']
                                   }
    return counts


@register('inventory_counts', '1.2', description=_('Inventories, their inventory sources, and host counts'))
def inventory_counts(since, **kwargs):
    counts = {}
    for inv in models.Inventory.objects.filter(kind='').annotate(num_sources=Count('inventory_sources', distinct=True),
                                                                 num_hosts=Count('hosts', distinct=True)).only('id', 'name', 'kind'):
        source_list = []
        for source in inv.inventory_sources.filter().annotate(num_hosts=Count('hosts', distinct=True)).values('name','source', 'num_hosts'):
            source_list.append(source)
        counts[inv.id] = {'name': inv.name,
                          'kind': inv.kind,
                          'hosts': inv.num_hosts,
                          'sources': inv.num_sources,
                          'source_list': source_list
                          }

    for smart_inv in models.Inventory.objects.filter(kind='smart'):
        counts[smart_inv.id] = {'name': smart_inv.name,
                                'kind': smart_inv.kind,
                                'hosts': smart_inv.hosts.count(),
                                'sources': 0,
                                'source_list': []
                                }
    return counts


@register('projects_by_scm_type', '1.0', description=_('Counts of projects by source control type'))
def projects_by_scm_type(since, **kwargs):
    counts = dict(
        (t[0] or 'manual', 0)
        for t in models.Project.SCM_TYPE_CHOICES
    )
    for result in models.Project.objects.values('scm_type').annotate(
        count=Count('scm_type')
    ).order_by('scm_type'):
        counts[result['scm_type'] or 'manual'] = result['count']
    return counts


def _get_isolated_datetime(last_check):
    if last_check:
        return last_check.isoformat()
    return last_check


@register('instance_info', '1.0', description=_('Cluster topology and capacity'))
def instance_info(since, include_hostnames=False, **kwargs):
    info = {}
    instances = models.Instance.objects.values_list('hostname').values(
        'uuid', 'version', 'capacity', 'cpu', 'memory', 'managed_by_policy', 'hostname', 'last_isolated_check', 'enabled')
    for instance in instances:
        consumed_capacity = sum(x.task_impact for x in models.UnifiedJob.objects.filter(execution_node=instance['hostname'],
                                status__in=('running', 'waiting')))
        instance_info = {
            'uuid': instance['uuid'],
            'version': instance['version'],
            'capacity': instance['capacity'],
            'cpu': instance['cpu'],
            'memory': instance['memory'],
            'managed_by_policy': instance['managed_by_policy'],
            'last_isolated_check': _get_isolated_datetime(instance['last_isolated_check']),
            'enabled': instance['enabled'],
            'consumed_capacity': consumed_capacity,
            'remaining_capacity': instance['capacity'] - consumed_capacity
        }
        if include_hostnames is True:
            instance_info['hostname'] = instance['hostname']
        info[instance['uuid']] = instance_info
    return info


def job_counts(since, **kwargs):
    counts = {}
    counts['total_jobs'] = models.UnifiedJob.objects.exclude(launch_type='sync').count()
    counts['status'] = dict(models.UnifiedJob.objects.exclude(launch_type='sync').values_list('status').annotate(Count('status')).order_by())
    counts['launch_type'] = dict(models.UnifiedJob.objects.exclude(launch_type='sync').values_list(
        'launch_type').annotate(Count('launch_type')).order_by())
    return counts


def job_instance_counts(since, **kwargs):
    counts = {}
    job_types = models.UnifiedJob.objects.exclude(launch_type='sync').values_list(
        'execution_node', 'launch_type').annotate(job_launch_type=Count('launch_type')).order_by()
    for job in job_types:
        counts.setdefault(job[0], {}).setdefault('launch_type', {})[job[1]] = job[2]

    job_statuses = models.UnifiedJob.objects.exclude(launch_type='sync').values_list(
        'execution_node', 'status').annotate(job_status=Count('status')).order_by()
    for job in job_statuses:
        counts.setdefault(job[0], {}).setdefault('status', {})[job[1]] = job[2]
    return counts


@register('query_info', '1.0', description=_('Metadata about the analytics collected'))
def query_info(since, collection_type, until, **kwargs):
    query_info = {}
    query_info['last_run'] = str(since)
    query_info['current_time'] = str(until)
    query_info['collection_type'] = collection_type
    return query_info


'''
The event table can be *very* large, and we have a 100MB upload limit.

Split large table dumps at dump time into a series of files.
'''
MAX_TABLE_SIZE = 200 * 1048576


class FileSplitter(io.StringIO):
    def __init__(self, filespec=None, *args, **kwargs):
        self.filespec = filespec
        self.files = []
        self.currentfile = None
        self.header = None
        self.counter = 0
        self.cycle_file()

    def cycle_file(self):
        if self.currentfile:
            self.currentfile.close()
        self.counter = 0
        fname = '{}_split{}'.format(self.filespec, len(self.files))
        self.currentfile = open(fname, 'w', encoding='utf-8')
        self.files.append(fname)
        if self.header:
            self.currentfile.write('{}\n'.format(self.header))

    def file_list(self):
        self.currentfile.close()
        # Check for an empty dump
        if len(self.header) + 1 == self.counter:
            os.remove(self.files[-1])
            self.files = self.files[:-1]
        # If we only have one file, remove the suffix
        if len(self.files) == 1:
            os.rename(self.files[0],self.files[0].replace('_split0',''))
        return self.files

    def write(self, s):
        if not self.header:
            self.header = s[0:s.index('\n')]
        self.counter += self.currentfile.write(s)
        if self.counter >= MAX_TABLE_SIZE:
            self.cycle_file()


def _copy_table(table, query, path):
    file_path = os.path.join(path, table + '_table.csv')
    file = FileSplitter(filespec=file_path)
    with connection.cursor() as cursor:
        cursor.copy_expert(query, file)
    return file.file_list()


@register('events_table', '1.2', format='csv', description=_('Automation task records'), expensive=True)
def events_table(since, full_path, until, **kwargs):
    events_query = '''COPY (SELECT main_jobevent.id,
                              main_jobevent.created,
                              main_jobevent.modified,
                              main_jobevent.uuid,
                              main_jobevent.parent_uuid,
                              main_jobevent.event,
                              main_jobevent.event_data::json->'task_action' AS task_action,
                              (CASE WHEN event = 'playbook_on_stats' THEN event_data END) as playbook_on_stats,
                              main_jobevent.failed,
                              main_jobevent.changed,
                              main_jobevent.playbook,
                              main_jobevent.play,
                              main_jobevent.task,
                              main_jobevent.role,
                              main_jobevent.job_id,
                              main_jobevent.host_id,
                              main_jobevent.host_name
                              , CAST(main_jobevent.event_data::json->>'start' AS TIMESTAMP WITH TIME ZONE) AS start,
                              CAST(main_jobevent.event_data::json->>'end' AS TIMESTAMP WITH TIME ZONE) AS end,
                              main_jobevent.event_data::json->'duration' AS duration,
                              main_jobevent.event_data::json->'res'->'warnings' AS warnings,
                              main_jobevent.event_data::json->'res'->'deprecations' AS deprecations
                              FROM main_jobevent
                              WHERE (main_jobevent.created > '{}' AND main_jobevent.created <= '{}')
                              ORDER BY main_jobevent.id ASC) TO STDOUT WITH CSV HEADER
                   '''.format(since.isoformat(),until.isoformat())
    return _copy_table(table='events', query=events_query, path=full_path)


@register('unified_jobs_table', '1.1', format='csv', description=_('Data on jobs run'), expensive=True)
def unified_jobs_table(since, full_path, until, **kwargs):
    unified_job_query = '''COPY (SELECT main_unifiedjob.id,
                                 main_unifiedjob.polymorphic_ctype_id,
                                 django_content_type.model,
                                 main_unifiedjob.organization_id,
                                 main_organization.name as organization_name,
                                 main_job.inventory_id,
                                 main_inventory.name as inventory_name,
                                 main_unifiedjob.created,
                                 main_unifiedjob.name,
                                 main_unifiedjob.unified_job_template_id,
                                 main_unifiedjob.launch_type,
                                 main_unifiedjob.schedule_id,
                                 main_unifiedjob.execution_node,
                                 main_unifiedjob.controller_node,
                                 main_unifiedjob.cancel_flag,
                                 main_unifiedjob.status,
                                 main_unifiedjob.failed,
                                 main_unifiedjob.started,
                                 main_unifiedjob.finished,
                                 main_unifiedjob.elapsed,
                                 main_unifiedjob.job_explanation,
                                 main_unifiedjob.instance_group_id
                                 FROM main_unifiedjob
                                 JOIN django_content_type ON main_unifiedjob.polymorphic_ctype_id = django_content_type.id
                                 LEFT JOIN main_job ON main_unifiedjob.id = main_job.unifiedjob_ptr_id
                                 LEFT JOIN main_inventory ON main_job.inventory_id = main_inventory.id
                                 LEFT JOIN main_organization ON main_organization.id = main_unifiedjob.organization_id
                                 WHERE ((main_unifiedjob.created > '{0}' AND main_unifiedjob.created <= '{1}')
                                       OR (main_unifiedjob.finished > '{0}' AND main_unifiedjob.finished <= '{1}'))
                                       AND main_unifiedjob.launch_type != 'sync'
                                 ORDER BY main_unifiedjob.id ASC) TO STDOUT WITH CSV HEADER
                        '''.format(since.isoformat(),until.isoformat())
    return _copy_table(table='unified_jobs', query=unified_job_query, path=full_path)


@register('unified_job_template_table', '1.0', format='csv', description=_('Data on job templates'))
def unified_job_template_table(since, full_path, **kwargs):
    unified_job_template_query = '''COPY (SELECT main_unifiedjobtemplate.id,
                                 main_unifiedjobtemplate.polymorphic_ctype_id,
                                 django_content_type.model,
                                 main_unifiedjobtemplate.created,
                                 main_unifiedjobtemplate.modified,
                                 main_unifiedjobtemplate.created_by_id,
                                 main_unifiedjobtemplate.modified_by_id,
                                 main_unifiedjobtemplate.name,
                                 main_unifiedjobtemplate.current_job_id,
                                 main_unifiedjobtemplate.last_job_id,
                                 main_unifiedjobtemplate.last_job_failed,
                                 main_unifiedjobtemplate.last_job_run,
                                 main_unifiedjobtemplate.next_job_run,
                                 main_unifiedjobtemplate.next_schedule_id,
                                 main_unifiedjobtemplate.status
                                 FROM main_unifiedjobtemplate, django_content_type
                                 WHERE main_unifiedjobtemplate.polymorphic_ctype_id = django_content_type.id
                                 ORDER BY main_unifiedjobtemplate.id ASC) TO STDOUT WITH CSV HEADER'''
    return _copy_table(table='unified_job_template', query=unified_job_template_query, path=full_path)


@register('workflow_job_node_table', '1.0', format='csv', description=_('Data on workflow runs'), expensive=True)
def workflow_job_node_table(since, full_path, until, **kwargs):
    workflow_job_node_query = '''COPY (SELECT main_workflowjobnode.id,
                                 main_workflowjobnode.created,
                                 main_workflowjobnode.modified,
                                 main_workflowjobnode.job_id,
                                 main_workflowjobnode.unified_job_template_id,
                                 main_workflowjobnode.workflow_job_id,
                                 main_workflowjobnode.inventory_id,
                                 success_nodes.nodes AS success_nodes,
                                 failure_nodes.nodes AS failure_nodes,
                                 always_nodes.nodes AS always_nodes,
                                 main_workflowjobnode.do_not_run,
                                 main_workflowjobnode.all_parents_must_converge
                                 FROM main_workflowjobnode
                                 LEFT JOIN (
                                     SELECT from_workflowjobnode_id, ARRAY_AGG(to_workflowjobnode_id) AS nodes
                                     FROM main_workflowjobnode_success_nodes
                                     GROUP BY from_workflowjobnode_id
                                 ) success_nodes ON main_workflowjobnode.id = success_nodes.from_workflowjobnode_id
                                 LEFT JOIN (
                                     SELECT from_workflowjobnode_id, ARRAY_AGG(to_workflowjobnode_id) AS nodes
                                     FROM main_workflowjobnode_failure_nodes
                                     GROUP BY from_workflowjobnode_id
                                 ) failure_nodes ON main_workflowjobnode.id = failure_nodes.from_workflowjobnode_id
                                 LEFT JOIN (
                                     SELECT from_workflowjobnode_id, ARRAY_AGG(to_workflowjobnode_id) AS nodes
                                     FROM main_workflowjobnode_always_nodes
                                     GROUP BY from_workflowjobnode_id
                                 ) always_nodes ON main_workflowjobnode.id = always_nodes.from_workflowjobnode_id
                                 WHERE (main_workflowjobnode.modified > '{}' AND main_workflowjobnode.modified <= '{}')
                                 ORDER BY main_workflowjobnode.id ASC) TO STDOUT WITH CSV HEADER
                              '''.format(since.isoformat(),until.isoformat())
    return _copy_table(table='workflow_job_node', query=workflow_job_node_query, path=full_path)


@register('workflow_job_template_node_table', '1.0', format='csv', description=_('Data on workflows'))
def workflow_job_template_node_table(since, full_path, **kwargs):
    workflow_job_template_node_query = '''COPY (SELECT main_workflowjobtemplatenode.id,
                                 main_workflowjobtemplatenode.created,
                                 main_workflowjobtemplatenode.modified,
                                 main_workflowjobtemplatenode.unified_job_template_id,
                                 main_workflowjobtemplatenode.workflow_job_template_id,
                                 main_workflowjobtemplatenode.inventory_id,
                                 success_nodes.nodes AS success_nodes,
                                 failure_nodes.nodes AS failure_nodes,
                                 always_nodes.nodes AS always_nodes,
                                 main_workflowjobtemplatenode.all_parents_must_converge
                                 FROM main_workflowjobtemplatenode
                                 LEFT JOIN (
                                     SELECT from_workflowjobtemplatenode_id, ARRAY_AGG(to_workflowjobtemplatenode_id) AS nodes
                                     FROM main_workflowjobtemplatenode_success_nodes
                                     GROUP BY from_workflowjobtemplatenode_id
                                 ) success_nodes ON main_workflowjobtemplatenode.id = success_nodes.from_workflowjobtemplatenode_id
                                 LEFT JOIN (
                                     SELECT from_workflowjobtemplatenode_id, ARRAY_AGG(to_workflowjobtemplatenode_id) AS nodes
                                     FROM main_workflowjobtemplatenode_failure_nodes
                                     GROUP BY from_workflowjobtemplatenode_id
                                 ) failure_nodes ON main_workflowjobtemplatenode.id = failure_nodes.from_workflowjobtemplatenode_id
                                 LEFT JOIN (
                                     SELECT from_workflowjobtemplatenode_id, ARRAY_AGG(to_workflowjobtemplatenode_id) AS nodes
                                     FROM main_workflowjobtemplatenode_always_nodes
                                     GROUP BY from_workflowjobtemplatenode_id
                                 ) always_nodes ON main_workflowjobtemplatenode.id = always_nodes.from_workflowjobtemplatenode_id
                                 ORDER BY main_workflowjobtemplatenode.id ASC) TO STDOUT WITH CSV HEADER'''
    return _copy_table(table='workflow_job_template_node', query=workflow_job_template_node_query, path=full_path)