From a87e66758731a9e76c9c394dc2190b9882ddbceb Mon Sep 17 00:00:00 2001 From: Nate Sesti Date: Fri, 21 Jul 2023 19:51:23 -0700 Subject: clean pii from telemetry --- .../src/continuedev/libs/util/commonregex.py | 138 +++++++++++++++++++++ continuedev/src/continuedev/libs/util/telemetry.py | 7 +- 2 files changed, 144 insertions(+), 1 deletion(-) create mode 100644 continuedev/src/continuedev/libs/util/commonregex.py (limited to 'continuedev') diff --git a/continuedev/src/continuedev/libs/util/commonregex.py b/continuedev/src/continuedev/libs/util/commonregex.py new file mode 100644 index 00000000..55da7fc0 --- /dev/null +++ b/continuedev/src/continuedev/libs/util/commonregex.py @@ -0,0 +1,138 @@ +# coding: utf-8 +import json +import re +from typing import Any, Dict + +date = re.compile( + '(?:(?]+[^\s`!()\[\]{};:\'".,<>?\xab\xbb\u201c\u201d\u2018\u2019])?)', re.IGNORECASE) +email = re.compile( + "([a-z0-9!#$%&'*+\/=?^_`{|.}~-]+@(?:[a-z0-9](?:[a-z0-9-]*[a-z0-9])?\.)+[a-z0-9](?:[a-z0-9-]*[a-z0-9])?)", re.IGNORECASE) +ip = re.compile('(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)', re.IGNORECASE) +ipv6 = re.compile( + '\s*(?!.*::.*::)(?:(?!:)|:(?=:))(?:[0-9a-f]{0,4}(?:(?<=::)|(?", + "unix_absolute_filepath": "", + "dates": "", + "times": "