diff options
Diffstat (limited to 'test-chill/testchill/_extract.py')
-rw-r--r-- | test-chill/testchill/_extract.py | 98 |
1 files changed, 0 insertions, 98 deletions
diff --git a/test-chill/testchill/_extract.py b/test-chill/testchill/_extract.py deleted file mode 100644 index f6984ac..0000000 --- a/test-chill/testchill/_extract.py +++ /dev/null @@ -1,98 +0,0 @@ -import collections -import os -import os.path -import itertools -import re - -from . import util - -if util.python_version_major == 2: - from HTMLParser import HTMLParser -else: - from html.parser import HTMLParser - -class _TagExtractor(HTMLParser): - _comment_style_expr = { - 'c': [('/(/)+',r'[\n]'),(r'/\*',r'\*/')], - 'cc': [('/(/)+',r'[\n]'),(r'/\*',r'\*/')], - 'cpp': [('/(/)+',r'[\n]'),(r'/\*',r'\*/')], - 'h': [('/(/)+',r'[\n]'),(r'/\*',r'\*/')], - 'hh': [('/(/)+',r'[\n]'),(r'/\*',r'\*/')], - 'hpp': [('/(/)+',r'[\n]'),(r'/\*',r'\*/')], - 'py': [('#+',r'[\n]'),('\'\'\'',),('"""',)], - 'script': [('#+',r'[\n]')], - 'lua': [(r'--\[\[',r'\]\]--')] - } - - def __init__(self, tagname): - HTMLParser.__init__(self) - self.tagname = tagname - self._readin = False - self._value = '' - - def handle_starttag(self, tag, attrs): - if tag == self.tagname: - self._readin = True - self._attrs = dict(attrs) - - def handle_endtag(self, tag): - if tag == self.tagname: - self._readin = False - self._tag_list.append((self._value, self._attrs)) - self._value = '' - - def handle_data(self, txt): - if self._readin: - self._value += txt - - @classmethod - def _parse(cls, tagname, txt): - reader = cls(tagname) - reader._readin = False - reader._value = '' - reader._tag_list = [] - reader.feed(txt) - return reader._tag_list - - @classmethod - def _get_commentstyles(cls, ext): - for comment_style in cls._comment_style_expr[ext]: - if len(comment_style) == 1: - start_expr = comment_style[0] - end_expr = comment_style[0] - elif len(comment_style) == 2: - start_expr = comment_style[0] - end_expr = comment_style[1] - yield start_expr, end_expr - - @classmethod - def _commented(cls, txt, ext): - comment_spans = list() - for start_expr, end_expr in cls._get_commentstyles(ext): - pos = 0 - while pos < len(txt): - start_match = re.search(start_expr, txt[pos:]) - if start_match: - start_pos = pos + start_match.end() - end_match = re.search(end_expr, txt[start_pos:]) - if end_match: - end_pos = start_pos + end_match.start() - pos = start_pos + end_match.end() - else: - end_pos = len(txt) - pos = end_pos - comment_spans.append((start_pos, end_pos)) - else: - break - for span in sorted(comment_spans, key=lambda s: s[0]): - yield txt[span[0]:span[1]] - - @classmethod - def extract_tag(cls, tagname, filename, wd=os.getcwd()): - with open(os.path.join(wd, filename), 'r') as f: - txt = f.read() - ext = filename.split('.')[-1] - return cls._parse(tagname, '\n'.join(cls._commented(txt, ext))) - -extract_tag = _TagExtractor.extract_tag - |