# Copyright (c) 2015 Ansible, Inc.
# All Rights Reserved.

# Python
from io import StringIO
import datetime
import codecs
import json
import logging
import os
import re
import socket
import subprocess
import tempfile
from collections import OrderedDict

# Django
from django.conf import settings
from django.db import models, connection
from django.core.exceptions import NON_FIELD_ERRORS
from django.utils.translation import ugettext_lazy as _
from django.utils.timezone import now
from django.utils.encoding import smart_text
from django.contrib.contenttypes.models import ContentType

# REST Framework
from rest_framework.exceptions import ParseError

# Django-Polymorphic
from polymorphic.models import PolymorphicModel

# AWX
from awx.main.models.base import (
    CommonModelNameNotUnique,
    PasswordFieldsModel,
    NotificationFieldsModel,
    prevent_search
)
from awx.main.dispatch import get_local_queuename
from awx.main.dispatch.control import Control as ControlDispatcher
from awx.main.registrar import activity_stream_registrar
from awx.main.models.mixins import ResourceMixin, TaskManagerUnifiedJobMixin
from awx.main.utils import (
    camelcase_to_underscore, get_model_for_type,
    encrypt_dict, decrypt_field, _inventory_updates,
    copy_model_by_class, copy_m2m_relationships,
    get_type_for_model, parse_yaml_or_json, getattr_dne,
    polymorphic, schedule_task_manager
)
from awx.main.constants import ACTIVE_STATES, CAN_CANCEL
from awx.main.redact import UriCleaner, REPLACE_STR
from awx.main.consumers import emit_channel_notification
from awx.main.fields import JSONField, AskForField, OrderedManyToManyField

__all__ = ['UnifiedJobTemplate', 'UnifiedJob', 'StdoutMaxBytesExceeded']

logger = logging.getLogger('awx.main.models.unified_jobs')

# NOTE: ACTIVE_STATES moved to constants because it is used by parent modules


class UnifiedJobTemplate(PolymorphicModel, CommonModelNameNotUnique, NotificationFieldsModel):
    '''
    Concrete base class for unified job templates.
    '''

    # status inherits from related jobs. Thus, status must be able to be set to any status that a job status is settable to.
    JOB_STATUS_CHOICES = [
        ('new', _('New')),                  # Job has been created, but not started.
        ('pending', _('Pending')),          # Job is pending Task Manager processing (blocked by dependency req, capacity or a concurrent job)
        ('waiting', _('Waiting')),          # Job has been assigned to run on a specific node (and is about to run).
        ('running', _('Running')),          # Job is currently running.
        ('successful', _('Successful')),    # Job completed successfully.
        ('failed', _('Failed')),            # Job completed, but with failures.
        ('error', _('Error')),              # The job was unable to run.
        ('canceled', _('Canceled')),        # The job was canceled before completion.
    ]

    COMMON_STATUS_CHOICES = JOB_STATUS_CHOICES + [
        ('never updated', _('Never Updated')),     # A job has never been run using this template.
    ]

    PROJECT_STATUS_CHOICES = COMMON_STATUS_CHOICES + [
        ('ok', _('OK')),                           # Project is not configured for SCM and path exists.
        ('missing', _('Missing')),                 # Project path does not exist.
    ]

    INVENTORY_SOURCE_STATUS_CHOICES = COMMON_STATUS_CHOICES + [
        ('none', _('No External Source')),      # Inventory source is not configured to update from an external source.
    ]

    JOB_TEMPLATE_STATUS_CHOICES = COMMON_STATUS_CHOICES

    DEPRECATED_STATUS_CHOICES = [
        # No longer used for Project / Inventory Source:
        ('updating', _('Updating')),            # Same as running.
    ]

    ALL_STATUS_CHOICES = OrderedDict(PROJECT_STATUS_CHOICES + INVENTORY_SOURCE_STATUS_CHOICES + JOB_TEMPLATE_STATUS_CHOICES + DEPRECATED_STATUS_CHOICES).items()

    class Meta:
        app_label = 'main'
        ordering = ('name',)
        # unique_together here is intentionally commented out. Please make sure sub-classes of this model
        # contain at least this uniqueness restriction: SOFT_UNIQUE_TOGETHER = [('polymorphic_ctype', 'name')]
        #unique_together = [('polymorphic_ctype', 'name', 'organization')]

    old_pk = models.PositiveIntegerField(
        null=True,
        default=None,
        editable=False,
    )
    current_job = models.ForeignKey(
        'UnifiedJob',
        null=True,
        default=None,
        editable=False,
        related_name='%(class)s_as_current_job+',
        on_delete=models.SET_NULL,
    )
    last_job = models.ForeignKey(
        'UnifiedJob',
        null=True,
        default=None,
        editable=False,
        related_name='%(class)s_as_last_job+',
        on_delete=models.SET_NULL,
    )
    last_job_failed = models.BooleanField(
        default=False,
        editable=False,
    )
    last_job_run = models.DateTimeField(
        null=True,
        default=None,
        editable=False,
    )
    #on_missed_schedule = models.CharField(
    #    max_length=32,
    #    choices=[],
    #)
    next_job_run = models.DateTimeField(
        null=True,
        default=None,
        editable=False,
    )
    next_schedule = models.ForeignKey( # Schedule entry responsible for next_job_run.
        'Schedule',
        null=True,
        default=None,
        editable=False,
        related_name='%(class)s_as_next_schedule+',
        on_delete=polymorphic.SET_NULL,
    )
    status = models.CharField(
        max_length=32,
        choices=ALL_STATUS_CHOICES,
        default='ok',
        editable=False,
    )
    organization = models.ForeignKey(
        'Organization',
        blank=True,
        null=True,
        on_delete=polymorphic.SET_NULL,
        related_name='%(class)ss',
        help_text=_('The organization used to determine access to this template.'),
    )
    credentials = models.ManyToManyField(
        'Credential',
        related_name='%(class)ss',
    )
    labels = models.ManyToManyField(
        "Label",
        blank=True,
        related_name='%(class)s_labels'
    )
    instance_groups = OrderedManyToManyField(
        'InstanceGroup',
        blank=True,
        through='UnifiedJobTemplateInstanceGroupMembership'
    )

    def get_absolute_url(self, request=None):
        real_instance = self.get_real_instance()
        if real_instance != self:
            return real_instance.get_absolute_url(request=request)
        else:
            return ''

    def unique_error_message(self, model_class, unique_check):
        # If polymorphic_ctype is part of a unique check, return a list of the
        # remaining fields instead of the error message.
        if len(unique_check) >= 2 and 'polymorphic_ctype' in unique_check:
            return [x for x in unique_check if x != 'polymorphic_ctype']
        else:
            return super(UnifiedJobTemplate, self).unique_error_message(model_class, unique_check)

    @classmethod
    def _submodels_with_roles(cls):
        ujt_classes = [c for c in cls.__subclasses__()
                       if c._meta.model_name not in ['inventorysource', 'systemjobtemplate']]
        ct_dict = ContentType.objects.get_for_models(*ujt_classes)
        return [ct.id for ct in ct_dict.values()]

    @classmethod
    def accessible_pk_qs(cls, accessor, role_field):
        '''
        A re-implementation of accessible pk queryset for the "normal" unified JTs.
        Does not return inventory sources or system JTs, these should
        be handled inside of get_queryset where it is utilized.
        '''
        # do not use this if in a subclass
        if cls != UnifiedJobTemplate:
            return super(UnifiedJobTemplate, cls).accessible_pk_qs(accessor, role_field)
        return ResourceMixin._accessible_pk_qs(
            cls, accessor, role_field, content_types=cls._submodels_with_roles())

    def _perform_unique_checks(self, unique_checks):
        # Handle the list of unique fields returned above. Replace with an
        # appropriate error message for the remaining field(s) in the unique
        # check and cleanup the errors dictionary.
        errors = super(UnifiedJobTemplate, self)._perform_unique_checks(unique_checks)
        for key, msgs in errors.items():
            if key != NON_FIELD_ERRORS:
                continue
            for msg in msgs:
                if isinstance(msg, (list, tuple)):
                    if len(msg) == 1:
                        new_key = msg[0]
                    else:
                        new_key = NON_FIELD_ERRORS
                    model_class = self.get_real_concrete_instance_class()
                    errors.setdefault(new_key, []).append(self.unique_error_message(model_class, msg))
            errors[key] = [x for x in msgs if not isinstance(x, (list, tuple))]
        for key, msgs in errors.items():
            if not msgs:
                del errors[key]
        return errors

    def validate_unique(self, exclude=None):
        # Make sure we set the polymorphic_ctype before validating, and omit
        # it from the list of excluded fields.
        self.pre_save_polymorphic()
        if exclude and 'polymorphic_ctype' in exclude:
            exclude = [x for x in exclude if x != 'polymorphic_ctype']
        return super(UnifiedJobTemplate, self).validate_unique(exclude)

    @property   # Alias for backwards compatibility.
    def current_update(self):
        return self.current_job

    @property   # Alias for backwards compatibility.
    def last_update(self):
        return self.last_job

    @property   # Alias for backwards compatibility.
    def last_update_failed(self):
        return self.last_job_failed

    @property   # Alias for backwards compatibility.
    def last_updated(self):
        return self.last_job_run

    def update_computed_fields(self):
        related_schedules = self.schedules.filter(enabled=True, next_run__isnull=False).order_by('-next_run')
        new_next_schedule = related_schedules.first()
        if new_next_schedule:
            if new_next_schedule.pk == self.next_schedule_id and new_next_schedule.next_run == self.next_job_run:
                return  # no-op, common for infrequent schedules
            self.next_schedule = new_next_schedule
            self.next_job_run = new_next_schedule.next_run
            self.save(update_fields=['next_schedule', 'next_job_run'])

    def save(self, *args, **kwargs):
        # If update_fields has been specified, add our field names to it,
        # if it hasn't been specified, then we're just doing a normal save.
        update_fields = kwargs.get('update_fields', [])
        # Update status and last_updated fields.
        if not getattr(_inventory_updates, 'is_updating', False):
            updated_fields = self._set_status_and_last_job_run(save=False)
            for field in updated_fields:
                if field not in update_fields:
                    update_fields.append(field)
        # Do the actual save.
        super(UnifiedJobTemplate, self).save(*args, **kwargs)


    def _get_current_status(self):
        # Override in subclasses as needed.
        if self.current_job and self.current_job.status:
            return self.current_job.status
        elif not self.last_job:
            return 'never updated'
        elif self.last_job_failed:
            return 'failed'
        else:
            return 'successful'

    def _get_last_job_run(self):
        # Override in subclasses as needed.
        if self.last_job:
            return self.last_job.finished

    def _set_status_and_last_job_run(self, save=True):
        status = self._get_current_status()
        last_job_run = self._get_last_job_run()
        return self.update_fields(status=status, last_job_run=last_job_run,
                                  save=save)

    def _can_update(self):
        # Override in subclasses as needed.
        return False

    @property
    def can_update(self):
        return self._can_update()

    def update(self, **kwargs):
        if self.can_update:
            unified_job = self.create_unified_job()
            unified_job.signal_start(**kwargs)
            return unified_job

    @classmethod
    def _get_unified_job_class(cls):
        '''
        Return subclass of UnifiedJob that is created from this template.
        '''
        raise NotImplementedError # Implement in subclass.

    @property
    def notification_templates(self):
        '''
        Return notification_templates relevant to this Unified Job Template
        '''
        # NOTE: Derived classes should implement
        from awx.main.models.notifications import NotificationTemplate
        return NotificationTemplate.objects.none()

    def create_unified_job(self, **kwargs):
        '''
        Create a new unified job based on this unified job template.
        '''
        new_job_passwords = kwargs.pop('survey_passwords', {})
        eager_fields = kwargs.pop('_eager_fields', None)

        # automatically encrypt survey fields
        if hasattr(self, 'survey_spec') and getattr(self, 'survey_enabled', False):
            password_list = self.survey_password_variables()
            encrypt_dict(kwargs.get('extra_vars', {}), password_list)

        unified_job_class = self._get_unified_job_class()
        fields = self._get_unified_job_field_names()
        parent_field_name = None
        if "_unified_job_class" in kwargs:
            # Special case where spawned job is different type than usual
            # Only used for slice jobs
            unified_job_class = kwargs.pop("_unified_job_class")
            fields = unified_job_class._get_unified_job_field_names() & fields
            parent_field_name = kwargs.pop('_parent_field_name')

        unallowed_fields = set(kwargs.keys()) - set(fields)
        validated_kwargs = kwargs.copy()
        if unallowed_fields:
            if parent_field_name is None:
                logger.warn('Fields {} are not allowed as overrides to spawn from {}.'.format(
                    ', '.join(unallowed_fields), self
                ))
            for f in unallowed_fields:
                validated_kwargs.pop(f)

        unified_job = copy_model_by_class(self, unified_job_class, fields, validated_kwargs)

        if eager_fields:
            for fd, val in eager_fields.items():
                setattr(unified_job, fd, val)

        # NOTE: slice workflow jobs _get_parent_field_name method
        # is not correct until this is set
        if not parent_field_name:
            parent_field_name = unified_job._get_parent_field_name()
        setattr(unified_job, parent_field_name, self)

        # For JobTemplate-based jobs with surveys, add passwords to list for perma-redaction
        if hasattr(self, 'survey_spec') and getattr(self, 'survey_enabled', False):
            for password in self.survey_password_variables():
                new_job_passwords[password] = REPLACE_STR
        if new_job_passwords:
            unified_job.survey_passwords = new_job_passwords
            kwargs['survey_passwords'] = new_job_passwords  # saved in config object for relaunch

        from awx.main.signals import disable_activity_stream, activity_stream_create
        with disable_activity_stream():
            # Don't emit the activity stream record here for creation,
            # because we haven't attached important M2M relations yet, like
            # credentials and labels
            unified_job.save()

        # Labels and credentials copied here
        if validated_kwargs.get('credentials'):
            Credential = UnifiedJob._meta.get_field('credentials').related_model
            cred_dict = Credential.unique_dict(self.credentials.all())
            prompted_dict = Credential.unique_dict(validated_kwargs['credentials'])
            # combine prompted credentials with JT
            cred_dict.update(prompted_dict)
            validated_kwargs['credentials'] = [cred for cred in cred_dict.values()]
            kwargs['credentials'] = validated_kwargs['credentials']

        with disable_activity_stream():
            copy_m2m_relationships(self, unified_job, fields, kwargs=validated_kwargs)

        if 'extra_vars' in validated_kwargs:
            unified_job.handle_extra_data(validated_kwargs['extra_vars'])

        # Create record of provided prompts for relaunch and rescheduling
        unified_job.create_config_from_prompts(kwargs, parent=self)

        # manually issue the create activity stream entry _after_ M2M relations
        # have been associated to the UJ
        if unified_job.__class__ in activity_stream_registrar.models:
            activity_stream_create(None, unified_job, True)

        return unified_job

    @classmethod
    def get_ask_mapping(cls):
        '''
        Creates dictionary that maps the unified job field (keys)
        to the field that enables prompting for the field (values)
        '''
        mapping = {}
        for field in cls._meta.fields:
            if isinstance(field, AskForField):
                mapping[field.allows_field] = field.name
        return mapping

    @classmethod
    def _get_unified_jt_copy_names(cls):
        return cls._get_unified_job_field_names()

    def copy_unified_jt(self):
        '''
        Returns saved object, including related fields.
        Create a copy of this unified job template.
        '''
        unified_jt_class = self.__class__
        fields = self._get_unified_jt_copy_names()
        unified_jt = copy_model_by_class(self, unified_jt_class, fields, {})

        time_now = now()
        unified_jt.name = unified_jt.name.split('@', 1)[0] + ' @ ' + time_now.strftime('%I:%M:%S %p')

        unified_jt.save()
        copy_m2m_relationships(self, unified_jt, fields)
        return unified_jt

    def _accept_or_ignore_job_kwargs(self, _exclude_errors=(), **kwargs):
        '''
        Override in subclass if template accepts _any_ prompted params
        '''
        errors = {}
        if kwargs:
            for field_name in kwargs.keys():
                errors[field_name] = [_("Field is not allowed on launch.")]
        return ({}, kwargs, errors)

    def accept_or_ignore_variables(self, data, errors=None, _exclude_errors=(), extra_passwords=None):
        '''
        If subclasses accept any `variables` or `extra_vars`, they should
        define _accept_or_ignore_variables to place those variables in the accepted dict,
        according to the acceptance rules of the template.
        '''
        if errors is None:
            errors = {}
        if not isinstance(data, dict):
            try:
                data = parse_yaml_or_json(data, silent_failure=False)
            except ParseError as exc:
                errors['extra_vars'] = [str(exc)]
                return ({}, data, errors)
        if hasattr(self, '_accept_or_ignore_variables'):
            # SurveyJobTemplateMixin cannot override any methods because of
            # resolution order, forced by how metaclass processes fields,
            # thus the need for hasattr check
            if extra_passwords:
                return self._accept_or_ignore_variables(
                    data, errors, _exclude_errors=_exclude_errors, extra_passwords=extra_passwords)
            else:
                return self._accept_or_ignore_variables(data, errors, _exclude_errors=_exclude_errors)
        elif data:
            errors['extra_vars'] = [
                _('Variables {list_of_keys} provided, but this template cannot accept variables.'.format(
                    list_of_keys=', '.join(data.keys())))]
        return ({}, data, errors)


class UnifiedJobTypeStringMixin(object):
    @classmethod
    def get_instance_by_type(cls, job_type, job_id):
        model = get_model_for_type(job_type)
        if not model:
            return None
        return model.objects.get(id=job_id)

    def model_to_str(self):
        return camelcase_to_underscore(self.__class__.__name__)


class UnifiedJobDeprecatedStdout(models.Model):

    class Meta:
        managed = False
        db_table = 'main_unifiedjob'

    result_stdout_text = models.TextField(
        null=True,
        editable=False,
    )


class StdoutMaxBytesExceeded(Exception):

    def __init__(self, total, supported):
        self.total = total
        self.supported = supported


class UnifiedJob(PolymorphicModel, PasswordFieldsModel, CommonModelNameNotUnique,
                 UnifiedJobTypeStringMixin, TaskManagerUnifiedJobMixin):
    '''
    Concrete base class for unified job run by the task engine.
    '''

    STATUS_CHOICES = UnifiedJobTemplate.JOB_STATUS_CHOICES

    LAUNCH_TYPE_CHOICES = [
        ('manual', _('Manual')),            # Job was started manually by a user.
        ('relaunch', _('Relaunch')),        # Job was started via relaunch.
        ('callback', _('Callback')),        # Job was started via host callback.
        ('scheduled', _('Scheduled')),      # Job was started from a schedule.
        ('dependency', _('Dependency')),    # Job was started as a dependency of another job.
        ('workflow', _('Workflow')),        # Job was started from a workflow job.
        ('webhook', _('Webhook')),          # Job was started from a webhook event.
        ('sync', _('Sync')),                # Job was started from a project sync.
        ('scm', _('SCM Update'))            # Job was created as an Inventory SCM sync.
    ]

    PASSWORD_FIELDS = ('start_args',)

    class Meta:
        app_label = 'main'
        ordering = ('id',)

    old_pk = models.PositiveIntegerField(
        null=True,
        default=None,
        editable=False,
    )
    emitted_events = models.PositiveIntegerField(
        default=0,
        editable=False,
    )
    unified_job_template = models.ForeignKey(
        'UnifiedJobTemplate',
        null=True, # Some jobs can be run without a template.
        default=None,
        editable=False,
        related_name='%(class)s_unified_jobs',
        on_delete=polymorphic.SET_NULL,
    )
    created = models.DateTimeField(
        default=None,
        editable=False,
        db_index=True,  # add an index, this is a commonly queried field
    )
    launch_type = models.CharField(
        max_length=20,
        choices=LAUNCH_TYPE_CHOICES,
        default='manual',
        editable=False,
        db_index=True
    )
    schedule = models.ForeignKey( # Which schedule entry was responsible for starting this job.
        'Schedule',
        null=True,
        default=None,
        editable=False,
        on_delete=polymorphic.SET_NULL,
    )
    dependent_jobs = models.ManyToManyField(
        'self',
        editable=False,
        related_name='%(class)s_blocked_jobs+',
    )
    execution_node = models.TextField(
        blank=True,
        default='',
        editable=False,
        help_text=_("The node the job executed on."),
    )
    controller_node = models.TextField(
        blank=True,
        default='',
        editable=False,
        help_text=_("The instance that managed the isolated execution environment."),
    )
    notifications = models.ManyToManyField(
        'Notification',
        editable=False,
        related_name='%(class)s_notifications',
    )
    cancel_flag = models.BooleanField(
        blank=True,
        default=False,
        editable=False,
    )
    status = models.CharField(
        max_length=20,
        choices=STATUS_CHOICES,
        default='new',
        editable=False,
        db_index=True,
    )
    failed = models.BooleanField(
        default=False,
        editable=False,
    )
    started = models.DateTimeField(
        null=True,
        default=None,
        editable=False,
        help_text=_("The date and time the job was queued for starting."),
    )
    dependencies_processed = models.BooleanField(
        default=False,
        editable=False,
        help_text=_("If True, the task manager has already processed potential dependencies for this job.")
    )
    finished = models.DateTimeField(
        null=True,
        default=None,
        editable=False,
        help_text=_("The date and time the job finished execution."),
        db_index=True,
    )
    canceled_on = models.DateTimeField(
        null=True,
        default=None,
        editable=False,
        help_text=_("The date and time when the cancel request was sent."),
        db_index=True,
    )
    elapsed = models.DecimalField(
        max_digits=12,
        decimal_places=3,
        editable=False,
        help_text=_("Elapsed time in seconds that the job ran."),
    )
    job_args = prevent_search(models.TextField(
        blank=True,
        default='',
        editable=False,
    ))
    job_cwd = models.CharField(
        max_length=1024,
        blank=True,
        default='',
        editable=False,
    )
    job_env = prevent_search(JSONField(
        blank=True,
        default=dict,
        editable=False,
    ))
    job_explanation = models.TextField(
        blank=True,
        default='',
        editable=False,
        help_text=_("A status field to indicate the state of the job if it wasn't able to run and capture stdout"),
    )
    start_args = prevent_search(models.TextField(
        blank=True,
        default='',
        editable=False,
    ))
    result_traceback = models.TextField(
        blank=True,
        default='',
        editable=False,
    )
    celery_task_id = models.CharField(
        max_length=100,
        blank=True,
        default='',
        editable=False,
    )
    labels = models.ManyToManyField(
        "Label",
        blank=True,
        related_name='%(class)s_labels'
    )
    instance_group = models.ForeignKey(
        'InstanceGroup',
        blank=True,
        null=True,
        default=None,
        on_delete=polymorphic.SET_NULL,
        help_text=_('The Instance group the job was run under'),
    )
    organization = models.ForeignKey(
        'Organization',
        blank=True,
        null=True,
        on_delete=polymorphic.SET_NULL,
        related_name='%(class)ss',
        help_text=_('The organization used to determine access to this unified job.'),
    )
    credentials = models.ManyToManyField(
        'Credential',
        related_name='%(class)ss',
    )

    def get_absolute_url(self, request=None):
        RealClass = self.get_real_instance_class()
        if RealClass != UnifiedJob:
            return RealClass.get_absolute_url(RealClass(pk=self.pk), request=request)
        else:
            return ''

    def get_ui_url(self):
        real_instance = self.get_real_instance()
        if real_instance != self:
            return real_instance.get_ui_url()
        else:
            return ''

    @classmethod
    def _get_task_class(cls):
        raise NotImplementedError # Implement in subclasses.

    @classmethod
    def supports_isolation(cls):
        return False

    @property
    def can_run_containerized(self):
        return False

    def _get_parent_field_name(self):
        return 'unified_job_template' # Override in subclasses.

    @classmethod
    def _get_unified_job_template_class(cls):
        '''
        Return subclass of UnifiedJobTemplate that applies to this unified job.
        '''
        raise NotImplementedError # Implement in subclass.

    def _global_timeout_setting(self):
        "Override in child classes, None value indicates this is not configurable"
        return None

    def _resources_sufficient_for_launch(self):
        return True

    def __str__(self):
        return u'%s-%s-%s' % (self.created, self.id, self.status)

    @property
    def log_format(self):
        return '{} {} ({})'.format(get_type_for_model(type(self)), self.id, self.status)

    def _get_parent_instance(self):
        return getattr(self, self._get_parent_field_name(), None)

    def _update_parent_instance_no_save(self, parent_instance, update_fields=None):
        if update_fields is None:
            update_fields = []

        def parent_instance_set(key, val):
            setattr(parent_instance, key, val)
            if key not in update_fields:
                update_fields.append(key)

        if parent_instance:
            if self.status in ('pending', 'waiting', 'running'):
                if parent_instance.current_job != self:
                    parent_instance_set('current_job', self)
                # Update parent with all the 'good' states of it's child
                if parent_instance.status != self.status:
                    parent_instance_set('status', self.status)
            elif self.status in ('successful', 'failed', 'error', 'canceled'):
                if parent_instance.current_job == self:
                    parent_instance_set('current_job', None)
                parent_instance_set('last_job', self)
                parent_instance_set('last_job_failed', self.failed)

        return update_fields

    def _update_parent_instance(self):
        parent_instance = self._get_parent_instance()
        if parent_instance:
            update_fields = self._update_parent_instance_no_save(parent_instance)
            parent_instance.save(update_fields=update_fields)

    def save(self, *args, **kwargs):
        """Save the job, with current status, to the database.
        Ensure that all data is consistent before doing so.
        """
        # If update_fields has been specified, add our field names to it,
        # if it hasn't been specified, then we're just doing a normal save.
        update_fields = kwargs.get('update_fields', [])

        # Get status before save...
        status_before = self.status or 'new'

        # If this job already exists in the database, retrieve a copy of
        # the job in its prior state.
        if self.pk:
            self_before = self.__class__.objects.get(pk=self.pk)
            if self_before.status != self.status:
                status_before = self_before.status

        # Sanity check: Is this a failure? Ensure that the failure value
        # matches the status.
        failed = bool(self.status in ('failed', 'error', 'canceled'))
        if self.failed != failed:
            self.failed = failed
            if 'failed' not in update_fields:
                update_fields.append('failed')

        # Sanity check: Has the job just started? If so, mark down its start
        # time.
        if self.status == 'running' and not self.started:
            self.started = now()
            if 'started' not in update_fields:
                update_fields.append('started')

        # Sanity check: Has the job just completed? If so, mark down its
        # completion time, and record its output to the database.
        if self.status in ('successful', 'failed', 'error', 'canceled') and not self.finished:
            # Record the `finished` time.
            self.finished = now()
            if 'finished' not in update_fields:
                update_fields.append('finished')

        # If we have a start and finished time, and haven't already calculated
        # out the time that elapsed, do so.
        if self.started and self.finished and not self.elapsed:
            td = self.finished - self.started
            elapsed = (td.microseconds + (td.seconds + td.days * 24 * 3600) * 10 ** 6) / (10 ** 6 * 1.0)
        else:
            elapsed = 0.0
        if self.elapsed != elapsed:
            self.elapsed = str(elapsed)
            if 'elapsed' not in update_fields:
                update_fields.append('elapsed')

        # Ensure that the job template information is current.
        if self.unified_job_template != self._get_parent_instance():
            self.unified_job_template = self._get_parent_instance()
            if 'unified_job_template' not in update_fields:
                update_fields.append('unified_job_template')
        
        if self.cancel_flag and not self.canceled_on:
            # Record the 'canceled' time.
            self.canceled_on = now()
            if 'canceled_on' not in update_fields:
                update_fields.append('canceled_on')
        # Okay; we're done. Perform the actual save.
        result = super(UnifiedJob, self).save(*args, **kwargs)

        # If status changed, update the parent instance.
        if self.status != status_before:
            # Update parent outside of the transaction for Job w/ allow_simultaneous=True
            # This dodges lock contention at the expense of the foreign key not being
            # completely correct.
            if getattr(self, 'allow_simultaneous', False):
                connection.on_commit(self._update_parent_instance)
            else:
                self._update_parent_instance()

        # Done.
        return result

    def copy_unified_job(self, _eager_fields=None, **new_prompts):
        '''
        Returns saved object, including related fields.
        Create a copy of this unified job for the purpose of relaunch
        '''
        unified_job_class = self.__class__
        unified_jt_class = self._get_unified_job_template_class()
        parent_field_name = self._get_parent_field_name()
        fields = unified_jt_class._get_unified_job_field_names() | set([parent_field_name])

        create_data = {}
        if _eager_fields:
            create_data = _eager_fields.copy()
        create_data["launch_type"] = "relaunch"

        prompts = self.launch_prompts()
        if self.unified_job_template and (prompts is not None):
            prompts.update(new_prompts)
            prompts['_eager_fields'] = create_data
            unified_job = self.unified_job_template.create_unified_job(**prompts)
        else:
            unified_job = copy_model_by_class(self, unified_job_class, fields, {})
            for fd, val in create_data.items():
                setattr(unified_job, fd, val)
            unified_job.save()

            # Labels copied here
            from awx.main.signals import disable_activity_stream
            with disable_activity_stream():
                copy_m2m_relationships(self, unified_job, fields)

        return unified_job

    def launch_prompts(self):
        '''
        Return dictionary of prompts job was launched with
        returns None if unknown
        '''
        JobLaunchConfig = self._meta.get_field('launch_config').related_model
        try:
            config = self.launch_config
            return config.prompts_dict()
        except JobLaunchConfig.DoesNotExist:
            return None

    def create_config_from_prompts(self, kwargs, parent=None):
        '''
        Create a launch configuration entry for this job, given prompts
        returns None if it can not be created
        '''
        JobLaunchConfig = self._meta.get_field('launch_config').related_model
        config = JobLaunchConfig(job=self)
        if parent is None:
            parent = getattr(self, self._get_parent_field_name())
        if parent is None:
            return
        valid_fields = list(parent.get_ask_mapping().keys())
        # Special cases allowed for workflows
        if hasattr(self, 'extra_vars'):
            valid_fields.extend(['survey_passwords', 'extra_vars'])
        else:
            kwargs.pop('survey_passwords', None)
        for field_name, value in kwargs.items():
            if field_name not in valid_fields:
                raise Exception('Unrecognized launch config field {}.'.format(field_name))
            if field_name == 'credentials':
                continue
            key = field_name
            if key == 'extra_vars':
                key = 'extra_data'
            setattr(config, key, value)
        config.save()

        job_creds = set(kwargs.get('credentials', []))
        if 'credentials' in [field.name for field in parent._meta.get_fields()]:
            job_creds = job_creds - set(parent.credentials.all())
        if job_creds:
            config.credentials.add(*job_creds)
        return config

    @property
    def event_class(self):
        raise NotImplementedError()

    @property
    def job_type_name(self):
        return self.get_real_instance_class()._meta.verbose_name.replace(' ', '_')

    @property
    def result_stdout_text(self):
        related = UnifiedJobDeprecatedStdout.objects.get(pk=self.pk)
        return related.result_stdout_text or ''

    @result_stdout_text.setter
    def result_stdout_text(self, value):
        # TODO: remove this method once all stdout is based on jobevents
        # (because it won't be used for writing anymore)
        related = UnifiedJobDeprecatedStdout.objects.get(pk=self.pk)
        related.result_stdout_text = value
        related.save()

    @property
    def event_parent_key(self):
        tablename = self._meta.db_table
        return {
            'main_job': 'job_id',
            'main_adhoccommand': 'ad_hoc_command_id',
            'main_projectupdate': 'project_update_id',
            'main_inventoryupdate': 'inventory_update_id',
            'main_systemjob': 'system_job_id',
        }[tablename]

    def get_event_queryset(self):
        return self.event_class.objects.filter(**{self.event_parent_key: self.id})

    @property
    def event_processing_finished(self):
        '''
        Returns True / False, whether all events from job have been saved
        '''
        if self.status in ACTIVE_STATES:
            return False  # tally of events is only available at end of run
        try:
            event_qs = self.get_event_queryset()
        except NotImplementedError:
            return True  # Model without events, such as WFJT
        return self.emitted_events == event_qs.count()

    def result_stdout_raw_handle(self, enforce_max_bytes=True):
        """
        This method returns a file-like object ready to be read which contains
        all stdout for the UnifiedJob.

        If the size of the file is greater than
        `settings.STDOUT_MAX_BYTES_DISPLAY`, a StdoutMaxBytesExceeded exception
        will be raised.
        """
        max_supported = settings.STDOUT_MAX_BYTES_DISPLAY

        if enforce_max_bytes:
            # If enforce_max_bytes is True, we're not grabbing the whole file,
            # just the first <settings.STDOUT_MAX_BYTES_DISPLAY> bytes;
            # in this scenario, it's probably safe to use a StringIO.
            fd = StringIO()
        else:
            # If enforce_max_bytes = False, that means they're downloading
            # the entire file.  To avoid ballooning memory, let's write the
            # stdout content to a temporary disk location
            if not os.path.exists(settings.JOBOUTPUT_ROOT):
                os.makedirs(settings.JOBOUTPUT_ROOT)
            fd = tempfile.NamedTemporaryFile(
                mode='w',
                prefix='{}-{}-'.format(self.model_to_str(), self.pk),
                suffix='.out',
                dir=settings.JOBOUTPUT_ROOT,
                encoding='utf-8'
            )
            from awx.main.tasks import purge_old_stdout_files  # circular import
            purge_old_stdout_files.apply_async()

        # Before the addition of event-based stdout, older versions of
        # awx stored stdout as raw text blobs in a certain database column
        # (`main_unifiedjob.result_stdout_text`)
        # For older installs, this data still exists in the database; check for
        # it and use if it exists
        legacy_stdout_text = self.result_stdout_text
        if legacy_stdout_text:
            if enforce_max_bytes and len(legacy_stdout_text) > max_supported:
                raise StdoutMaxBytesExceeded(len(legacy_stdout_text), max_supported)
            fd.write(legacy_stdout_text)
            if hasattr(fd, 'name'):
                fd.flush()
                return codecs.open(fd.name, 'r', encoding='utf-8')
            else:
                # we just wrote to this StringIO, so rewind it
                fd.seek(0)
                return fd
        else:
            # Note: the code in this block _intentionally_ does not use the
            # Django ORM because of the potential size (many MB+) of
            # `main_jobevent.stdout`; we *do not* want to generate queries
            # here that construct model objects by fetching large gobs of
            # data (and potentially ballooning memory usage); instead, we
            # just want to write concatenated values of a certain column
            # (`stdout`) directly to a file

            with connection.cursor() as cursor:

                if enforce_max_bytes:
                    # detect the length of all stdout for this UnifiedJob, and
                    # if it exceeds settings.STDOUT_MAX_BYTES_DISPLAY bytes,
                    # don't bother actually fetching the data
                    total = self.get_event_queryset().aggregate(
                        total=models.Sum(models.Func(models.F('stdout'), function='LENGTH'))
                    )['total'] or 0
                    if total > max_supported:
                        raise StdoutMaxBytesExceeded(total, max_supported)

                # psycopg2's copy_expert writes bytes, but callers of this
                # function assume a str-based fd will be returned; decode
                # .write() calls on the fly to maintain this interface
                _write = fd.write
                fd.write = lambda s: _write(smart_text(s))

                cursor.copy_expert(
                    "copy (select stdout from {} where {}={} and stdout != '' order by start_line) to stdout".format(
                        self._meta.db_table + 'event',
                        self.event_parent_key,
                        self.id
                    ),
                    fd
                )

                if hasattr(fd, 'name'):
                    # If we're dealing with a physical file, use `sed` to clean
                    # up escaped line sequences
                    fd.flush()
                    subprocess.Popen("sed -i 's/\\\\r\\\\n/\\n/g' {}".format(fd.name), shell=True).wait()
                    return codecs.open(fd.name, 'r', encoding='utf-8')
                else:
                    # If we're dealing with an in-memory string buffer, use
                    # string.replace()
                    fd = StringIO(fd.getvalue().replace('\\r\\n', '\n'))
                    return fd

    def _escape_ascii(self, content):
        # Remove ANSI escape sequences used to embed event data.
        content = re.sub(r'\x1b\[K(?:[A-Za-z0-9+/=]+\x1b\[\d+D)+\x1b\[K', '', content)
        # Remove ANSI color escape sequences.
        content = re.sub(r'\x1b[^m]*m', '', content)
        return content

    def _result_stdout_raw(self, redact_sensitive=False, escape_ascii=False):
        content = self.result_stdout_raw_handle().read()
        if redact_sensitive:
            content = UriCleaner.remove_sensitive(content)
        if escape_ascii:
            content = self._escape_ascii(content)
        return content

    @property
    def result_stdout_raw(self):
        return self._result_stdout_raw()

    @property
    def result_stdout(self):
        return self._result_stdout_raw(escape_ascii=True)

    def _result_stdout_raw_limited(self, start_line=0, end_line=None, redact_sensitive=True, escape_ascii=False):
        return_buffer = StringIO()
        if end_line is not None:
            end_line = int(end_line)
        stdout_lines = self.result_stdout_raw_handle().readlines()
        absolute_end = len(stdout_lines)
        for line in stdout_lines[int(start_line):end_line]:
            return_buffer.write(line)
        if int(start_line) < 0:
            start_actual = len(stdout_lines) + int(start_line)
            end_actual = len(stdout_lines)
        else:
            start_actual = int(start_line)
            if end_line is not None:
                end_actual = min(int(end_line), len(stdout_lines))
            else:
                end_actual = len(stdout_lines)

        return_buffer = return_buffer.getvalue()
        if redact_sensitive:
            return_buffer = UriCleaner.remove_sensitive(return_buffer)
        if escape_ascii:
            return_buffer = self._escape_ascii(return_buffer)

        return return_buffer, start_actual, end_actual, absolute_end

    def result_stdout_raw_limited(self, start_line=0, end_line=None, redact_sensitive=False):
        return self._result_stdout_raw_limited(start_line, end_line, redact_sensitive)

    def result_stdout_limited(self, start_line=0, end_line=None, redact_sensitive=False):
        return self._result_stdout_raw_limited(start_line, end_line, redact_sensitive, escape_ascii=True)

    @property
    def workflow_job_id(self):
        workflow_job = self.get_workflow_job()
        if workflow_job:
            return workflow_job.pk
        return None

    @property
    def spawned_by_workflow(self):
        return self.launch_type == 'workflow'

    def get_workflow_job(self):
        if self.spawned_by_workflow:
            try:
                return self.unified_job_node.workflow_job
            except UnifiedJob.unified_job_node.RelatedObjectDoesNotExist:
                pass
        return None

    @property
    def workflow_node_id(self):
        if self.spawned_by_workflow:
            try:
                return self.unified_job_node.pk
            except UnifiedJob.unified_job_node.RelatedObjectDoesNotExist:
                pass
        return None

    def get_passwords_needed_to_start(self):
        return []

    def handle_extra_data(self, extra_data):
        if hasattr(self, 'extra_vars') and extra_data:
            extra_data_dict = {}
            try:
                extra_data_dict = parse_yaml_or_json(extra_data, silent_failure=False)
            except Exception as e:
                logger.warn("Exception deserializing extra vars: " + str(e))
            evars = self.extra_vars_dict
            evars.update(extra_data_dict)
            self.update_fields(extra_vars=json.dumps(evars))

    @property
    def can_start(self):
        return bool(self.status in ('new', 'waiting'))

    @property
    def can_schedule(self):
        if getattr(self, 'passwords_needed_to_start', None):
            return False
        if getattr(self, 'inventory', None) is None:
            return False
        JobLaunchConfig = self._meta.get_field('launch_config').related_model
        try:
            self.launch_config
            if self.unified_job_template is None:
                return False
            return True
        except JobLaunchConfig.DoesNotExist:
            return False

    @property
    def task_impact(self):
        raise NotImplementedError # Implement in subclass.

    def websocket_emit_data(self):
        ''' Return extra data that should be included when submitting data to the browser over the websocket connection '''
        websocket_data = dict(type=self.job_type_name)
        if self.spawned_by_workflow:
            websocket_data.update(dict(workflow_job_id=self.workflow_job_id,
                                       workflow_node_id=self.workflow_node_id))
        return websocket_data

    def _websocket_emit_status(self, status):
        try:
            status_data = dict(unified_job_id=self.id, status=status)
            if status == 'waiting':
                if self.instance_group:
                    status_data['instance_group_name'] = self.instance_group.name
                else:
                    status_data['instance_group_name'] = None
            elif status in ['successful', 'failed', 'canceled'] and self.finished:
                status_data['finished'] = datetime.datetime.strftime(self.finished, "%Y-%m-%dT%H:%M:%S.%fZ")
            status_data.update(self.websocket_emit_data())
            status_data['group_name'] = 'jobs'
            if getattr(self, 'unified_job_template_id', None):
                status_data['unified_job_template_id'] = self.unified_job_template_id
            emit_channel_notification('jobs-status_changed', status_data)

            if self.spawned_by_workflow:
                status_data['group_name'] = "workflow_events"
                status_data['workflow_job_template_id'] = self.unified_job_template.id
                emit_channel_notification('workflow_events-' + str(self.workflow_job_id), status_data)
        except IOError:  # includes socket errors
            logger.exception('%s failed to emit channel msg about status change', self.log_format)

    def websocket_emit_status(self, status):
        connection.on_commit(lambda: self._websocket_emit_status(status))
        if hasattr(self, 'update_webhook_status'):
            connection.on_commit(lambda: self.update_webhook_status(status))

    def notification_data(self):
        return dict(id=self.id,
                    name=self.name,
                    url=self.get_ui_url(),
                    created_by=smart_text(self.created_by),
                    started=self.started.isoformat() if self.started is not None else None,
                    finished=self.finished.isoformat() if self.finished is not None else None,
                    status=self.status,
                    traceback=self.result_traceback)

    def pre_start(self, **kwargs):
        if not self.can_start:
            self.job_explanation = u'%s is not in a startable state: %s, expecting one of %s' % (self._meta.verbose_name, self.status, str(('new', 'waiting')))
            self.save(update_fields=['job_explanation'])
            return (False, None)

        # verify that any associated credentials aren't missing required field data
        missing_credential_inputs = []
        for credential in self.credentials.all():
            defined_fields = credential.credential_type.defined_fields
            for required in credential.credential_type.inputs.get('required', []):
                if required in defined_fields and not credential.has_input(required):
                    missing_credential_inputs.append(required)

        if missing_credential_inputs:
            self.job_explanation = '{} cannot start because Credential {} does not provide one or more required fields ({}).'.format(
                self._meta.verbose_name.title(),
                credential.name,
                ', '.join(sorted(missing_credential_inputs))
            )
            self.save(update_fields=['job_explanation'])
            return (False, None)

        needed = self.get_passwords_needed_to_start()
        try:
            start_args = json.loads(decrypt_field(self, 'start_args'))
        except Exception:
            start_args = None

        if start_args in (None, ''):
            start_args = kwargs

        opts = dict([(field, start_args.get(field, '')) for field in needed])

        if not all(opts.values()):
            missing_fields = ', '.join([k for k,v in opts.items() if not v])
            self.job_explanation = u'Missing needed fields: %s.' % missing_fields
            self.save(update_fields=['job_explanation'])
            return (False, None)

        if 'extra_vars' in kwargs:
            self.handle_extra_data(kwargs['extra_vars'])

        return (True, opts)

    def signal_start(self, **kwargs):
        """Notify the task runner system to begin work on this task."""

        # Sanity check: Are we able to start the job? If not, do not attempt
        # to do so.
        if not self.can_start:
            return False

        # Get any passwords or other data that are prerequisites to running
        # the job.
        needed = self.get_passwords_needed_to_start()
        opts = dict([(field, kwargs.get(field, '')) for field in needed])
        if not all(opts.values()):
            return False

        # Save the pending status, and inform the SocketIO listener.
        self.update_fields(start_args=json.dumps(kwargs), status='pending')
        self.websocket_emit_status("pending")

        schedule_task_manager()

        # Each type of unified job has a different Task class; get the
        # appropirate one.
        # task_type = get_type_for_model(self)

        # Actually tell the task runner to run this task.
        # FIXME: This will deadlock the task runner
        #from awx.main.tasks import notify_task_runner
        #notify_task_runner.delay({'id': self.id, 'metadata': kwargs,
        #                          'task_type': task_type})

        # Done!
        return True


    @property
    def actually_running(self):
        # returns True if the job is running in the appropriate dispatcher process
        running = False
        if all([
            self.status == 'running',
            self.celery_task_id,
            self.execution_node
        ]):
            # If the job is marked as running, but the dispatcher
            # doesn't know about it (or the dispatcher doesn't reply),
            # then cancel the job
            timeout = 5
            try:
                running = self.celery_task_id in ControlDispatcher(
                    'dispatcher', self.controller_node or self.execution_node
                ).running(timeout=timeout)
            except (socket.timeout, RuntimeError):
                logger.error('could not reach dispatcher on {} within {}s'.format(
                    self.execution_node, timeout
                ))
                running = False
        return running

    @property
    def can_cancel(self):
        return bool(self.status in CAN_CANCEL)

    def _build_job_explanation(self):
        if not self.job_explanation:
            return 'Previous Task Canceled: {"job_type": "%s", "job_name": "%s", "job_id": "%s"}' % \
                   (self.model_to_str(), self.name, self.id)
        return None

    def cancel(self, job_explanation=None, is_chain=False):
        if self.can_cancel:
            if not is_chain:
                for x in self.get_jobs_fail_chain():
                    x.cancel(job_explanation=self._build_job_explanation(), is_chain=True)

            if not self.cancel_flag:
                self.cancel_flag = True
                self.start_args = ''  # blank field to remove encrypted passwords
                cancel_fields = ['cancel_flag', 'start_args']
                if self.status in ('pending', 'waiting', 'new'):
                    self.status = 'canceled'
                    cancel_fields.append('status')
                if self.status == 'running' and not self.actually_running:
                    self.status = 'canceled'
                    cancel_fields.append('status')
                if job_explanation is not None:
                    self.job_explanation = job_explanation
                    cancel_fields.append('job_explanation')
                self.save(update_fields=cancel_fields)
                self.websocket_emit_status("canceled")
        return self.cancel_flag

    @property
    def preferred_instance_groups(self):
        '''
        Return Instance/Rampart Groups preferred by this unified job templates
        '''
        if not self.unified_job_template:
            return []
        template_groups = [x for x in self.unified_job_template.instance_groups.all()]
        return template_groups

    @property
    def global_instance_groups(self):
        from awx.main.models.ha import InstanceGroup
        default_instance_group = InstanceGroup.objects.filter(name='tower')
        if default_instance_group.exists():
            return [default_instance_group.first()]
        return []

    def awx_meta_vars(self):
        '''
        The result of this method is used as extra_vars of a job launched
        by AWX, for purposes of client playbook hooks
        '''
        r = {}
        for name in ('awx', 'tower'):
            r['{}_job_id'.format(name)] = self.pk
            r['{}_job_launch_type'.format(name)] = self.launch_type

        created_by = getattr_dne(self, 'created_by')

        wj = self.get_workflow_job()
        if wj:
            schedule = getattr_dne(wj, 'schedule')
            for name in ('awx', 'tower'):
                r['{}_workflow_job_id'.format(name)] = wj.pk
                r['{}_workflow_job_name'.format(name)] = wj.name
                if schedule:
                    r['{}_parent_job_schedule_id'.format(name)] = schedule.pk
                    r['{}_parent_job_schedule_name'.format(name)] = schedule.name

        if not created_by:
            schedule = getattr_dne(self, 'schedule')
            if schedule:
                for name in ('awx', 'tower'):
                    r['{}_schedule_id'.format(name)] = schedule.pk
                    r['{}_schedule_name'.format(name)] = schedule.name

        if created_by:
            for name in ('awx', 'tower'):
                r['{}_user_id'.format(name)] = created_by.pk
                r['{}_user_name'.format(name)] = created_by.username
                r['{}_user_email'.format(name)] = created_by.email
                r['{}_user_first_name'.format(name)] = created_by.first_name
                r['{}_user_last_name'.format(name)] = created_by.last_name

        inventory = getattr_dne(self, 'inventory')
        if inventory:
            for name in ('awx', 'tower'):
                r['{}_inventory_id'.format(name)] = inventory.pk
                r['{}_inventory_name'.format(name)] = inventory.name

        return r

    def get_queue_name(self):
        return self.controller_node or self.execution_node or get_local_queuename()

    def is_isolated(self):
        return bool(self.controller_node)

    @property
    def is_containerized(self):
        return False