diff options
Diffstat (limited to 'test-chill/testchill/_extract.py')
| -rw-r--r-- | test-chill/testchill/_extract.py | 98 | 
1 files changed, 0 insertions, 98 deletions
| diff --git a/test-chill/testchill/_extract.py b/test-chill/testchill/_extract.py deleted file mode 100644 index f6984ac..0000000 --- a/test-chill/testchill/_extract.py +++ /dev/null @@ -1,98 +0,0 @@ -import collections -import os -import os.path -import itertools -import re - -from . import util - -if util.python_version_major == 2: -    from HTMLParser import HTMLParser -else: -    from html.parser import HTMLParser - -class _TagExtractor(HTMLParser): -    _comment_style_expr = { -            'c':      [('/(/)+',r'[\n]'),(r'/\*',r'\*/')], -            'cc':     [('/(/)+',r'[\n]'),(r'/\*',r'\*/')], -            'cpp':    [('/(/)+',r'[\n]'),(r'/\*',r'\*/')], -            'h':      [('/(/)+',r'[\n]'),(r'/\*',r'\*/')], -            'hh':     [('/(/)+',r'[\n]'),(r'/\*',r'\*/')], -            'hpp':    [('/(/)+',r'[\n]'),(r'/\*',r'\*/')], -            'py':     [('#+',r'[\n]'),('\'\'\'',),('"""',)], -            'script': [('#+',r'[\n]')], -            'lua':    [(r'--\[\[',r'\]\]--')] -        } -     -    def __init__(self, tagname): -        HTMLParser.__init__(self) -        self.tagname = tagname -        self._readin = False -        self._value = '' -     -    def handle_starttag(self, tag, attrs): -        if tag == self.tagname: -            self._readin = True -            self._attrs = dict(attrs) -     -    def handle_endtag(self, tag): -        if tag == self.tagname: -            self._readin = False -            self._tag_list.append((self._value, self._attrs)) -            self._value = '' -     -    def handle_data(self, txt): -        if self._readin: -            self._value += txt -     -    @classmethod -    def _parse(cls, tagname, txt): -        reader = cls(tagname) -        reader._readin = False -        reader._value = '' -        reader._tag_list = [] -        reader.feed(txt) -        return reader._tag_list -     -    @classmethod -    def _get_commentstyles(cls, ext): -        for comment_style in cls._comment_style_expr[ext]: -            if len(comment_style) == 1: -                start_expr = comment_style[0] -                end_expr = comment_style[0] -            elif len(comment_style) == 2: -                start_expr = comment_style[0] -                end_expr = comment_style[1] -            yield start_expr, end_expr -     -    @classmethod -    def _commented(cls, txt, ext): -        comment_spans = list() -        for start_expr, end_expr in cls._get_commentstyles(ext): -            pos = 0 -            while pos < len(txt): -                start_match = re.search(start_expr, txt[pos:]) -                if start_match: -                    start_pos = pos + start_match.end() -                    end_match = re.search(end_expr, txt[start_pos:]) -                    if end_match: -                        end_pos = start_pos + end_match.start() -                        pos = start_pos + end_match.end() -                    else: -                        end_pos = len(txt) -                        pos = end_pos -                    comment_spans.append((start_pos, end_pos)) -                else: -                    break -        for span in sorted(comment_spans, key=lambda s: s[0]): -            yield txt[span[0]:span[1]] -     -    @classmethod -    def extract_tag(cls, tagname, filename, wd=os.getcwd()): -        with open(os.path.join(wd, filename), 'r') as f: -            txt = f.read() -        ext = filename.split('.')[-1] -        return cls._parse(tagname, '\n'.join(cls._commented(txt, ext))) - -extract_tag = _TagExtractor.extract_tag - | 
