DISPATCH
tokenizer.py
1
"""Fortran namelist tokenizer.
2
3
:copyright: Copyright 2017 Marshall Ward, see AUTHORS for details.
4
:license: Apache License, Version 2.0, see LICENSE for details.
5
"""
6
import
string
7
import
itertools
8
9
10
class
Tokenizer
(object):
11
"""Fortran namelist tokenizer."""
12
13
# I don't use these two
14
special_chars =
' =+-*/\\()[]{},.:;!"%&~<>?\'`|$#@'
# Table 3.1
15
lexical_tokens =
'=+-*/()[],.:;%&<>'
# Meaningful?
16
17
# I only use this one
18
punctuation =
'=+-*/\\()[]{},:;%&~<>?`|$#@'
# Unhandled Table 3.1 tokens
19
20
def
__init__
(self):
21
"""Initialise the tokenizer."""
22
self.
characters
=
None
23
self.
prior_char
=
None
24
self.
char
=
None
25
self.
idx
=
None
26
self.
whitespace
= string.whitespace.replace(
'\n'
,
''
)
27
self.
prior_delim
=
None
28
29
# Set to true if inside a namelist group
30
self.
group_token
=
None
31
32
def
parse
(self, line):
33
"""Tokenize a line of Fortran source."""
34
tokens = []
35
36
self.
idx
= -1
# Bogus value to ensure idx = 0 after first iteration
37
self.
characters
= iter(line)
38
self.
update_chars
()
39
40
while
self.
char
!=
'\n'
:
41
42
# Update namelist group status
43
if
self.
char
in
(
'&'
,
'$'
):
44
self.
group_token
= self.
char
45
46
if
self.
group_token
and
(
47
(self.
group_token
, self.
char
)
in
((
'&'
,
'/'
), (
'$'
,
'$'
))):
48
self.
group_token
=
False
49
50
word =
''
51
if
self.
char
in
self.
whitespace
:
52
while
self.
char
in
self.
whitespace
:
53
word += self.
char
54
self.
update_chars
()
55
56
elif
self.
char
in
(
'!'
,
'#'
)
or
self.
group_token
is
None
:
57
# Abort the iteration and build the comment token
58
word = line[self.
idx
:-1]
59
self.
char
=
'\n'
60
61
elif
self.
char
in
'"\''
or
self.
prior_delim
:
62
word = self.
parse_string
()
63
64
elif
self.
char
.isalpha():
65
word = self.
parse_name
(line)
66
67
elif
self.
char
.isdigit()
or
self.
char
==
'-'
:
68
word = self.
parse_numeric
()
69
70
elif
self.
char
==
'.'
:
71
self.
update_chars
()
72
if
self.
char
.isdigit():
73
frac = self.
parse_numeric
()
74
word =
'.'
+ frac
75
else
:
76
word =
'.'
77
while
self.
char
.isalpha():
78
word += self.
char
79
self.
update_chars
()
80
if
self.
char
==
'.'
:
81
word += self.
char
82
self.
update_chars
()
83
84
elif
self.
char
in
Tokenizer.punctuation:
85
word = self.
char
86
self.
update_chars
()
87
88
else
:
89
# This should never happen
90
raise
ValueError
91
92
tokens.append(word)
93
94
return
tokens
95
96
def
parse_name
(self, line):
97
"""Tokenize a Fortran name, such as a variable or subroutine."""
98
end = self.
idx
99
for
char
in
line[self.
idx
:]:
100
if
not
char.isalnum()
and
char
not
in
'\'"_'
:
101
break
102
end += 1
103
104
word = line[self.
idx
:end]
105
106
self.
idx
= end - 1
107
# Update iterator, minus first character which was already read
108
self.
characters
= itertools.islice(self.
characters
, len(word) - 1,
109
None
)
110
self.
update_chars
()
111
112
return
word
113
114
def
parse_string
(self):
115
"""Tokenize a Fortran string."""
116
word =
''
117
118
if
self.
prior_delim
:
119
delim = self.
prior_delim
120
self.
prior_delim
=
None
121
else
:
122
delim = self.
char
123
word += self.
char
124
self.
update_chars
()
125
126
while
True
:
127
if
self.
char
== delim:
128
# Check for escaped delimiters
129
self.
update_chars
()
130
if
self.
char
== delim:
131
word += 2 * delim
132
self.
update_chars
()
133
else
:
134
word += delim
135
break
136
elif
self.
char
==
'\n'
:
137
self.
prior_delim
= delim
138
break
139
else
:
140
word += self.
char
141
self.
update_chars
()
142
143
return
word
144
145
def
parse_numeric
(self):
146
"""Tokenize a Fortran numerical value."""
147
word =
''
148
frac =
False
149
150
if
self.
char
==
'-'
:
151
word += self.
char
152
self.
update_chars
()
153
154
while
self.
char
.isdigit()
or
(self.
char
==
'.'
and
not
frac):
155
# Only allow one decimal point
156
if
self.
char
==
'.'
:
157
frac =
True
158
word += self.
char
159
self.
update_chars
()
160
161
# Check for float exponent
162
if
self.
char
in
'eEdD'
:
163
word += self.
char
164
self.
update_chars
()
165
166
if
self.
char
in
'+-'
:
167
word += self.
char
168
self.
update_chars
()
169
while
self.
char
.isdigit():
170
word += self.
char
171
self.
update_chars
()
172
173
return
word
174
175
def
update_chars
(self):
176
"""Update the current charters in the tokenizer."""
177
# NOTE: We spoof non-Unix files by returning '\n' on StopIteration
178
self.
prior_char
, self.
char
= self.
char
, next(self.
characters
,
'\n'
)
179
self.
idx
+= 1
f90nml.tokenizer.Tokenizer.update_chars
def update_chars(self)
Definition:
tokenizer.py:175
f90nml.tokenizer.Tokenizer.__init__
def __init__(self)
Definition:
tokenizer.py:20
f90nml.tokenizer.Tokenizer.idx
idx
Definition:
tokenizer.py:25
f90nml.tokenizer.Tokenizer
Definition:
tokenizer.py:10
f90nml.tokenizer.Tokenizer.char
char
Definition:
tokenizer.py:24
f90nml.tokenizer.Tokenizer.prior_char
prior_char
Definition:
tokenizer.py:23
f90nml.tokenizer.Tokenizer.parse_string
def parse_string(self)
Definition:
tokenizer.py:114
f90nml.tokenizer.Tokenizer.prior_delim
prior_delim
Definition:
tokenizer.py:27
f90nml.tokenizer.Tokenizer.parse
def parse(self, line)
Definition:
tokenizer.py:32
f90nml.tokenizer.Tokenizer.whitespace
whitespace
Definition:
tokenizer.py:26
f90nml.tokenizer.Tokenizer.parse_numeric
def parse_numeric(self)
Definition:
tokenizer.py:145
f90nml.tokenizer.Tokenizer.group_token
group_token
Definition:
tokenizer.py:30
f90nml.tokenizer.Tokenizer.characters
characters
Definition:
tokenizer.py:22
f90nml.tokenizer.Tokenizer.parse_name
def parse_name(self, line)
Definition:
tokenizer.py:96
utilities
python
f90nml
tokenizer.py
Generated by
1.8.13