-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathpyml.py
144 lines (125 loc) · 3.93 KB
/
pyml.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
from collections import defaultdict
from re import VERBOSE
from funcparserlib.lexer import make_tokenizer, Spec
from funcparserlib.parser import many, eof, skip
from funcparserlib.contrib.common import sometok
ENCODING = 'utf-8'
regexps = {
'escaped': ur'''
\\ # Escape
((?P<standard>["\\/bfnrt]) # Standard escapes
| (u(?P<unicode>[0-9A-Fa-f]{4}))) # uXXXX
''',
'unescaped': ur'''
[\x20-\x21\x23-\x5b\x5d-\uffff] # Unescaped: avoid ["\\]
''',
}
specs = [
Spec('eol', r'[\r\n]+'),
Spec('space', r'\s+'),
Spec('string', ur'"(%(unescaped)s | %(escaped)s)*"' % regexps, VERBOSE),
Spec('name', r'[A-Za-z_][A-Za-z_0-9]*'),
Spec('class', r'\.[A-Za-z_][A-Za-z_0-9]*'),
Spec('id', r'#[A-Za-z_][A-Za-z_0-9]*'),
Spec('eq', r'='),
Spec('>', '>'),
Spec('<', '<'),
]
tokenizer = make_tokenizer(specs)
class Eol(object):
def __init__(self, data):
pass
class Spaces(object):
def __init__(self, s):
self.len = len(s)
class Tag(object):
def __init__(self, data):
self.name = data[0]
self.data = defaultdict(list)
for tok in data[1]:
self.data[tok.__class__.__name__].append(tok)
def render(self):
def join_all(name):
return ' '.join(t.render() for t in self.data[name])
classes = join_all('Class')
ids = join_all('Id')
return '<{name} id="{ids}" class="{classes}" {attrs}>'.format(
name=self.name,
ids=ids, classes=classes, attrs=join_all('Attribute'))
def close(self):
return '</{0}>'.format(self.name)
class Class(object):
def __init__(self, name):
self.name = name.lstrip('.')
def render(self):
return self.name
class Id(object):
def __init__(self, name):
self.name = name.lstrip('#')
def render(self):
return self.name
class Attribute(object):
def __init__(self, (name, value)):
self.name = name
self.value = value
def render(self):
return '{s.name}="{s.value}"'.format(s=self)
eol = sometok('eol') >> Eol
space = sometok('space') >> Spaces
string = sometok("string") >> (lambda s: s[1:-1])
name = sometok('name') >> (lambda s: s)
cls = sometok('class') >> Class
attr = name + skip(sometok('eq')) + string >> Attribute
identificator = sometok('id') >> Id
tag = name + many(cls|identificator|attr|skip(space)) >> Tag
complete = many(eol|tag|string|space) + eof
def compile(pyml_text, spaces=False):
parsed = complete.parse(list(tokenizer(pyml_text)))[0]
start = True
lens = [0]
tags = [[]]
for d in parsed:
if isinstance(d, Tag):
yield d.render()
tags[-1].append(d)
elif isinstance(d, Spaces) and start:
start = False
if lens[-1] < d.len:
tags.append([])
lens.append(d.len)
elif lens[-1] >= d.len:
if spaces:
yield ' ' * lens[-1]
for tag in tags[-1]:
yield tag.close()
if spaces:
yield '\n'
tags[-1] = []
if lens[-1] > d.len:
tags.pop(-1)
lens.pop(-1)
if spaces:
yield ' ' * d.len
elif isinstance(d, Eol):
start = True
if spaces:
yield '\n'
elif isinstance(d, basestring):
yield d
for tgs, l in reversed(zip(tags, lens)):
if spaces:
yield ' ' * l
for tag in reversed(tgs):
yield tag.close()
if spaces:
yield '\n'
if __name__ == '__main__':
test = """
div.big .orange
form method="POST" #main_form
table tr
td.first "Hello World!"
td.second "Hello World!"
td.third "Hello World!"
"""
print ''.join(compile(test, True))