summaryrefslogtreecommitdiff
path: root/test-chill/testchill/_extract.py
diff options
context:
space:
mode:
authorDerick Huth <derickhuth@gmail.com>2016-01-18 15:43:52 -0700
committerDerick Huth <derickhuth@gmail.com>2016-01-18 15:43:52 -0700
commit983749787ee0dc1beb1107873e8a13ebdaeba576 (patch)
treee9bff337b4d5582b87ad2edc25baa4d3b0c163fa /test-chill/testchill/_extract.py
parent0cff3f9a3c4ccd434900162ebef4bd814850f481 (diff)
downloadchill-983749787ee0dc1beb1107873e8a13ebdaeba576.tar.gz
chill-983749787ee0dc1beb1107873e8a13ebdaeba576.tar.bz2
chill-983749787ee0dc1beb1107873e8a13ebdaeba576.zip
restore test suite
Diffstat (limited to 'test-chill/testchill/_extract.py')
-rw-r--r--test-chill/testchill/_extract.py98
1 files changed, 98 insertions, 0 deletions
diff --git a/test-chill/testchill/_extract.py b/test-chill/testchill/_extract.py
new file mode 100644
index 0000000..f6984ac
--- /dev/null
+++ b/test-chill/testchill/_extract.py
@@ -0,0 +1,98 @@
+import collections
+import os
+import os.path
+import itertools
+import re
+
+from . import util
+
+if util.python_version_major == 2:
+ from HTMLParser import HTMLParser
+else:
+ from html.parser import HTMLParser
+
+class _TagExtractor(HTMLParser):
+ _comment_style_expr = {
+ 'c': [('/(/)+',r'[\n]'),(r'/\*',r'\*/')],
+ 'cc': [('/(/)+',r'[\n]'),(r'/\*',r'\*/')],
+ 'cpp': [('/(/)+',r'[\n]'),(r'/\*',r'\*/')],
+ 'h': [('/(/)+',r'[\n]'),(r'/\*',r'\*/')],
+ 'hh': [('/(/)+',r'[\n]'),(r'/\*',r'\*/')],
+ 'hpp': [('/(/)+',r'[\n]'),(r'/\*',r'\*/')],
+ 'py': [('#+',r'[\n]'),('\'\'\'',),('"""',)],
+ 'script': [('#+',r'[\n]')],
+ 'lua': [(r'--\[\[',r'\]\]--')]
+ }
+
+ def __init__(self, tagname):
+ HTMLParser.__init__(self)
+ self.tagname = tagname
+ self._readin = False
+ self._value = ''
+
+ def handle_starttag(self, tag, attrs):
+ if tag == self.tagname:
+ self._readin = True
+ self._attrs = dict(attrs)
+
+ def handle_endtag(self, tag):
+ if tag == self.tagname:
+ self._readin = False
+ self._tag_list.append((self._value, self._attrs))
+ self._value = ''
+
+ def handle_data(self, txt):
+ if self._readin:
+ self._value += txt
+
+ @classmethod
+ def _parse(cls, tagname, txt):
+ reader = cls(tagname)
+ reader._readin = False
+ reader._value = ''
+ reader._tag_list = []
+ reader.feed(txt)
+ return reader._tag_list
+
+ @classmethod
+ def _get_commentstyles(cls, ext):
+ for comment_style in cls._comment_style_expr[ext]:
+ if len(comment_style) == 1:
+ start_expr = comment_style[0]
+ end_expr = comment_style[0]
+ elif len(comment_style) == 2:
+ start_expr = comment_style[0]
+ end_expr = comment_style[1]
+ yield start_expr, end_expr
+
+ @classmethod
+ def _commented(cls, txt, ext):
+ comment_spans = list()
+ for start_expr, end_expr in cls._get_commentstyles(ext):
+ pos = 0
+ while pos < len(txt):
+ start_match = re.search(start_expr, txt[pos:])
+ if start_match:
+ start_pos = pos + start_match.end()
+ end_match = re.search(end_expr, txt[start_pos:])
+ if end_match:
+ end_pos = start_pos + end_match.start()
+ pos = start_pos + end_match.end()
+ else:
+ end_pos = len(txt)
+ pos = end_pos
+ comment_spans.append((start_pos, end_pos))
+ else:
+ break
+ for span in sorted(comment_spans, key=lambda s: s[0]):
+ yield txt[span[0]:span[1]]
+
+ @classmethod
+ def extract_tag(cls, tagname, filename, wd=os.getcwd()):
+ with open(os.path.join(wd, filename), 'r') as f:
+ txt = f.read()
+ ext = filename.split('.')[-1]
+ return cls._parse(tagname, '\n'.join(cls._commented(txt, ext)))
+
+extract_tag = _TagExtractor.extract_tag
+