71 lines
2.7 KiB
Python
71 lines
2.7 KiB
Python
import re
|
|
import urllib.parse as urlparse
|
|
|
|
REPLACE_STR = '$encrypted$'
|
|
|
|
|
|
class UriCleaner(object):
|
|
REPLACE_STR = REPLACE_STR
|
|
SENSITIVE_URI_PATTERN = re.compile(r'(\w{1,20}:(\/?\/?)[^\s]+)', re.MULTILINE) # NOQA
|
|
|
|
@staticmethod
|
|
def remove_sensitive(cleartext):
|
|
redactedtext = cleartext
|
|
text_index = 0
|
|
while True:
|
|
match = UriCleaner.SENSITIVE_URI_PATTERN.search(redactedtext, text_index)
|
|
if not match:
|
|
break
|
|
uri_str = match.group(1)
|
|
try:
|
|
# May raise a ValueError if invalid URI for one reason or another
|
|
o = urlparse.urlsplit(uri_str)
|
|
|
|
if not o.username and not o.password:
|
|
if o.netloc and ":" in o.netloc:
|
|
# Handle the special case url http://username:password that can appear in SCM url
|
|
# on account of a bug? in ansible redaction
|
|
(username, password) = o.netloc.split(':')
|
|
else:
|
|
text_index += len(match.group(1))
|
|
continue
|
|
else:
|
|
username = o.username
|
|
password = o.password
|
|
|
|
# Given a python MatchObject, with respect to redactedtext, find and
|
|
# replace the first occurance of username and the first and second
|
|
# occurance of password
|
|
|
|
uri_str = redactedtext[match.start():match.end()]
|
|
if username:
|
|
uri_str = uri_str.replace(username, UriCleaner.REPLACE_STR, 1)
|
|
# 2, just in case the password is $encrypted$
|
|
if password:
|
|
uri_str = uri_str.replace(password, UriCleaner.REPLACE_STR, 2)
|
|
|
|
t = redactedtext[:match.start()] + uri_str
|
|
text_index = len(t)
|
|
if (match.end() < len(redactedtext)):
|
|
t += redactedtext[match.end():]
|
|
|
|
redactedtext = t
|
|
if text_index >= len(redactedtext):
|
|
text_index = len(redactedtext) - 1
|
|
except ValueError:
|
|
# Invalid URI, redact the whole URI to be safe
|
|
redactedtext = redactedtext[:match.start()] + UriCleaner.REPLACE_STR + redactedtext[match.end():]
|
|
text_index = match.start() + len(UriCleaner.REPLACE_STR)
|
|
|
|
return redactedtext
|
|
|
|
|
|
class PlainTextCleaner(object):
|
|
REPLACE_STR = REPLACE_STR
|
|
|
|
@staticmethod
|
|
def remove_sensitive(cleartext, sensitive):
|
|
if sensitive == '':
|
|
return cleartext
|
|
return re.sub(r'%s' % re.escape(sensitive), '$encrypted$', cleartext)
|