ast.py 56 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773
  1. #!/usr/bin/env python
  2. #
  3. # Copyright 2007 Neal Norwitz
  4. # Portions Copyright 2007 Google Inc.
  5. #
  6. # Licensed under the Apache License, Version 2.0 (the "License");
  7. # you may not use this file except in compliance with the License.
  8. # You may obtain a copy of the License at
  9. #
  10. # http://www.apache.org/licenses/LICENSE-2.0
  11. #
  12. # Unless required by applicable law or agreed to in writing, software
  13. # distributed under the License is distributed on an "AS IS" BASIS,
  14. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  15. # See the License for the specific language governing permissions and
  16. # limitations under the License.
  17. """Generate an Abstract Syntax Tree (AST) for C++."""
  18. # FIXME:
  19. # * Tokens should never be exported, need to convert to Nodes
  20. # (return types, parameters, etc.)
  21. # * Handle static class data for templatized classes
  22. # * Handle casts (both C++ and C-style)
  23. # * Handle conditions and loops (if/else, switch, for, while/do)
  24. #
  25. # TODO much, much later:
  26. # * Handle #define
  27. # * exceptions
  28. try:
  29. # Python 3.x
  30. import builtins
  31. except ImportError:
  32. # Python 2.x
  33. import __builtin__ as builtins
  34. import sys
  35. import traceback
  36. from cpp import keywords
  37. from cpp import tokenize
  38. from cpp import utils
  39. if not hasattr(builtins, 'reversed'):
  40. # Support Python 2.3 and earlier.
  41. def reversed(seq):
  42. for i in range(len(seq)-1, -1, -1):
  43. yield seq[i]
  44. if not hasattr(builtins, 'next'):
  45. # Support Python 2.5 and earlier.
  46. def next(obj):
  47. return obj.next()
  48. VISIBILITY_PUBLIC, VISIBILITY_PROTECTED, VISIBILITY_PRIVATE = range(3)
  49. FUNCTION_NONE = 0x00
  50. FUNCTION_CONST = 0x01
  51. FUNCTION_VIRTUAL = 0x02
  52. FUNCTION_PURE_VIRTUAL = 0x04
  53. FUNCTION_CTOR = 0x08
  54. FUNCTION_DTOR = 0x10
  55. FUNCTION_ATTRIBUTE = 0x20
  56. FUNCTION_UNKNOWN_ANNOTATION = 0x40
  57. FUNCTION_THROW = 0x80
  58. FUNCTION_OVERRIDE = 0x100
  59. """
  60. These are currently unused. Should really handle these properly at some point.
  61. TYPE_MODIFIER_INLINE = 0x010000
  62. TYPE_MODIFIER_EXTERN = 0x020000
  63. TYPE_MODIFIER_STATIC = 0x040000
  64. TYPE_MODIFIER_CONST = 0x080000
  65. TYPE_MODIFIER_REGISTER = 0x100000
  66. TYPE_MODIFIER_VOLATILE = 0x200000
  67. TYPE_MODIFIER_MUTABLE = 0x400000
  68. TYPE_MODIFIER_MAP = {
  69. 'inline': TYPE_MODIFIER_INLINE,
  70. 'extern': TYPE_MODIFIER_EXTERN,
  71. 'static': TYPE_MODIFIER_STATIC,
  72. 'const': TYPE_MODIFIER_CONST,
  73. 'register': TYPE_MODIFIER_REGISTER,
  74. 'volatile': TYPE_MODIFIER_VOLATILE,
  75. 'mutable': TYPE_MODIFIER_MUTABLE,
  76. }
  77. """
  78. _INTERNAL_TOKEN = 'internal'
  79. _NAMESPACE_POP = 'ns-pop'
  80. # TODO(nnorwitz): use this as a singleton for templated_types, etc
  81. # where we don't want to create a new empty dict each time. It is also const.
  82. class _NullDict(object):
  83. __contains__ = lambda self: False
  84. keys = values = items = iterkeys = itervalues = iteritems = lambda self: ()
  85. # TODO(nnorwitz): move AST nodes into a separate module.
  86. class Node(object):
  87. """Base AST node."""
  88. def __init__(self, start, end):
  89. self.start = start
  90. self.end = end
  91. def IsDeclaration(self):
  92. """Returns bool if this node is a declaration."""
  93. return False
  94. def IsDefinition(self):
  95. """Returns bool if this node is a definition."""
  96. return False
  97. def IsExportable(self):
  98. """Returns bool if this node exportable from a header file."""
  99. return False
  100. def Requires(self, node):
  101. """Does this AST node require the definition of the node passed in?"""
  102. return False
  103. def XXX__str__(self):
  104. return self._StringHelper(self.__class__.__name__, '')
  105. def _StringHelper(self, name, suffix):
  106. if not utils.DEBUG:
  107. return '%s(%s)' % (name, suffix)
  108. return '%s(%d, %d, %s)' % (name, self.start, self.end, suffix)
  109. def __repr__(self):
  110. return str(self)
  111. class Define(Node):
  112. def __init__(self, start, end, name, definition):
  113. Node.__init__(self, start, end)
  114. self.name = name
  115. self.definition = definition
  116. def __str__(self):
  117. value = '%s %s' % (self.name, self.definition)
  118. return self._StringHelper(self.__class__.__name__, value)
  119. class Include(Node):
  120. def __init__(self, start, end, filename, system):
  121. Node.__init__(self, start, end)
  122. self.filename = filename
  123. self.system = system
  124. def __str__(self):
  125. fmt = '"%s"'
  126. if self.system:
  127. fmt = '<%s>'
  128. return self._StringHelper(self.__class__.__name__, fmt % self.filename)
  129. class Goto(Node):
  130. def __init__(self, start, end, label):
  131. Node.__init__(self, start, end)
  132. self.label = label
  133. def __str__(self):
  134. return self._StringHelper(self.__class__.__name__, str(self.label))
  135. class Expr(Node):
  136. def __init__(self, start, end, expr):
  137. Node.__init__(self, start, end)
  138. self.expr = expr
  139. def Requires(self, node):
  140. # TODO(nnorwitz): impl.
  141. return False
  142. def __str__(self):
  143. return self._StringHelper(self.__class__.__name__, str(self.expr))
  144. class Return(Expr):
  145. pass
  146. class Delete(Expr):
  147. pass
  148. class Friend(Expr):
  149. def __init__(self, start, end, expr, namespace):
  150. Expr.__init__(self, start, end, expr)
  151. self.namespace = namespace[:]
  152. class Using(Node):
  153. def __init__(self, start, end, names):
  154. Node.__init__(self, start, end)
  155. self.names = names
  156. def __str__(self):
  157. return self._StringHelper(self.__class__.__name__, str(self.names))
  158. class Parameter(Node):
  159. def __init__(self, start, end, name, parameter_type, default):
  160. Node.__init__(self, start, end)
  161. self.name = name
  162. self.type = parameter_type
  163. self.default = default
  164. def Requires(self, node):
  165. # TODO(nnorwitz): handle namespaces, etc.
  166. return self.type.name == node.name
  167. def __str__(self):
  168. name = str(self.type)
  169. suffix = '%s %s' % (name, self.name)
  170. if self.default:
  171. suffix += ' = ' + ''.join([d.name for d in self.default])
  172. return self._StringHelper(self.__class__.__name__, suffix)
  173. class _GenericDeclaration(Node):
  174. def __init__(self, start, end, name, namespace):
  175. Node.__init__(self, start, end)
  176. self.name = name
  177. self.namespace = namespace[:]
  178. def FullName(self):
  179. prefix = ''
  180. if self.namespace and self.namespace[-1]:
  181. prefix = '::'.join(self.namespace) + '::'
  182. return prefix + self.name
  183. def _TypeStringHelper(self, suffix):
  184. if self.namespace:
  185. names = [n or '<anonymous>' for n in self.namespace]
  186. suffix += ' in ' + '::'.join(names)
  187. return self._StringHelper(self.__class__.__name__, suffix)
  188. # TODO(nnorwitz): merge with Parameter in some way?
  189. class VariableDeclaration(_GenericDeclaration):
  190. def __init__(self, start, end, name, var_type, initial_value, namespace):
  191. _GenericDeclaration.__init__(self, start, end, name, namespace)
  192. self.type = var_type
  193. self.initial_value = initial_value
  194. def Requires(self, node):
  195. # TODO(nnorwitz): handle namespaces, etc.
  196. return self.type.name == node.name
  197. def ToString(self):
  198. """Return a string that tries to reconstitute the variable decl."""
  199. suffix = '%s %s' % (self.type, self.name)
  200. if self.initial_value:
  201. suffix += ' = ' + self.initial_value
  202. return suffix
  203. def __str__(self):
  204. return self._StringHelper(self.__class__.__name__, self.ToString())
  205. class Typedef(_GenericDeclaration):
  206. def __init__(self, start, end, name, alias, namespace):
  207. _GenericDeclaration.__init__(self, start, end, name, namespace)
  208. self.alias = alias
  209. def IsDefinition(self):
  210. return True
  211. def IsExportable(self):
  212. return True
  213. def Requires(self, node):
  214. # TODO(nnorwitz): handle namespaces, etc.
  215. name = node.name
  216. for token in self.alias:
  217. if token is not None and name == token.name:
  218. return True
  219. return False
  220. def __str__(self):
  221. suffix = '%s, %s' % (self.name, self.alias)
  222. return self._TypeStringHelper(suffix)
  223. class _NestedType(_GenericDeclaration):
  224. def __init__(self, start, end, name, fields, namespace):
  225. _GenericDeclaration.__init__(self, start, end, name, namespace)
  226. self.fields = fields
  227. def IsDefinition(self):
  228. return True
  229. def IsExportable(self):
  230. return True
  231. def __str__(self):
  232. suffix = '%s, {%s}' % (self.name, self.fields)
  233. return self._TypeStringHelper(suffix)
  234. class Union(_NestedType):
  235. pass
  236. class Enum(_NestedType):
  237. pass
  238. class Class(_GenericDeclaration):
  239. def __init__(self, start, end, name, bases, templated_types, body, namespace):
  240. _GenericDeclaration.__init__(self, start, end, name, namespace)
  241. self.bases = bases
  242. self.body = body
  243. self.templated_types = templated_types
  244. def IsDeclaration(self):
  245. return self.bases is None and self.body is None
  246. def IsDefinition(self):
  247. return not self.IsDeclaration()
  248. def IsExportable(self):
  249. return not self.IsDeclaration()
  250. def Requires(self, node):
  251. # TODO(nnorwitz): handle namespaces, etc.
  252. if self.bases:
  253. for token_list in self.bases:
  254. # TODO(nnorwitz): bases are tokens, do name comparision.
  255. for token in token_list:
  256. if token.name == node.name:
  257. return True
  258. # TODO(nnorwitz): search in body too.
  259. return False
  260. def __str__(self):
  261. name = self.name
  262. if self.templated_types:
  263. name += '<%s>' % self.templated_types
  264. suffix = '%s, %s, %s' % (name, self.bases, self.body)
  265. return self._TypeStringHelper(suffix)
  266. class Struct(Class):
  267. pass
  268. class Function(_GenericDeclaration):
  269. def __init__(self, start, end, name, return_type, parameters,
  270. modifiers, templated_types, body, namespace):
  271. _GenericDeclaration.__init__(self, start, end, name, namespace)
  272. converter = TypeConverter(namespace)
  273. self.return_type = converter.CreateReturnType(return_type)
  274. self.parameters = converter.ToParameters(parameters)
  275. self.modifiers = modifiers
  276. self.body = body
  277. self.templated_types = templated_types
  278. def IsDeclaration(self):
  279. return self.body is None
  280. def IsDefinition(self):
  281. return self.body is not None
  282. def IsExportable(self):
  283. if self.return_type and 'static' in self.return_type.modifiers:
  284. return False
  285. return None not in self.namespace
  286. def Requires(self, node):
  287. if self.parameters:
  288. # TODO(nnorwitz): parameters are tokens, do name comparision.
  289. for p in self.parameters:
  290. if p.name == node.name:
  291. return True
  292. # TODO(nnorwitz): search in body too.
  293. return False
  294. def __str__(self):
  295. # TODO(nnorwitz): add templated_types.
  296. suffix = ('%s %s(%s), 0x%02x, %s' %
  297. (self.return_type, self.name, self.parameters,
  298. self.modifiers, self.body))
  299. return self._TypeStringHelper(suffix)
  300. class Method(Function):
  301. def __init__(self, start, end, name, in_class, return_type, parameters,
  302. modifiers, templated_types, body, namespace):
  303. Function.__init__(self, start, end, name, return_type, parameters,
  304. modifiers, templated_types, body, namespace)
  305. # TODO(nnorwitz): in_class could also be a namespace which can
  306. # mess up finding functions properly.
  307. self.in_class = in_class
  308. class Type(_GenericDeclaration):
  309. """Type used for any variable (eg class, primitive, struct, etc)."""
  310. def __init__(self, start, end, name, templated_types, modifiers,
  311. reference, pointer, array):
  312. """
  313. Args:
  314. name: str name of main type
  315. templated_types: [Class (Type?)] template type info between <>
  316. modifiers: [str] type modifiers (keywords) eg, const, mutable, etc.
  317. reference, pointer, array: bools
  318. """
  319. _GenericDeclaration.__init__(self, start, end, name, [])
  320. self.templated_types = templated_types
  321. if not name and modifiers:
  322. self.name = modifiers.pop()
  323. self.modifiers = modifiers
  324. self.reference = reference
  325. self.pointer = pointer
  326. self.array = array
  327. def __str__(self):
  328. prefix = ''
  329. if self.modifiers:
  330. prefix = ' '.join(self.modifiers) + ' '
  331. name = str(self.name)
  332. if self.templated_types:
  333. name += '<%s>' % self.templated_types
  334. suffix = prefix + name
  335. if self.reference:
  336. suffix += '&'
  337. if self.pointer:
  338. suffix += '*'
  339. if self.array:
  340. suffix += '[]'
  341. return self._TypeStringHelper(suffix)
  342. # By definition, Is* are always False. A Type can only exist in
  343. # some sort of variable declaration, parameter, or return value.
  344. def IsDeclaration(self):
  345. return False
  346. def IsDefinition(self):
  347. return False
  348. def IsExportable(self):
  349. return False
  350. class TypeConverter(object):
  351. def __init__(self, namespace_stack):
  352. self.namespace_stack = namespace_stack
  353. def _GetTemplateEnd(self, tokens, start):
  354. count = 1
  355. end = start
  356. while 1:
  357. token = tokens[end]
  358. end += 1
  359. if token.name == '<':
  360. count += 1
  361. elif token.name == '>':
  362. count -= 1
  363. if count == 0:
  364. break
  365. return tokens[start:end-1], end
  366. def ToType(self, tokens):
  367. """Convert [Token,...] to [Class(...), ] useful for base classes.
  368. For example, code like class Foo : public Bar<x, y> { ... };
  369. the "Bar<x, y>" portion gets converted to an AST.
  370. Returns:
  371. [Class(...), ...]
  372. """
  373. result = []
  374. name_tokens = []
  375. reference = pointer = array = False
  376. def AddType(templated_types):
  377. # Partition tokens into name and modifier tokens.
  378. names = []
  379. modifiers = []
  380. for t in name_tokens:
  381. if keywords.IsKeyword(t.name):
  382. modifiers.append(t.name)
  383. else:
  384. names.append(t.name)
  385. name = ''.join(names)
  386. if name_tokens:
  387. result.append(Type(name_tokens[0].start, name_tokens[-1].end,
  388. name, templated_types, modifiers,
  389. reference, pointer, array))
  390. del name_tokens[:]
  391. i = 0
  392. end = len(tokens)
  393. while i < end:
  394. token = tokens[i]
  395. if token.name == '<':
  396. new_tokens, new_end = self._GetTemplateEnd(tokens, i+1)
  397. AddType(self.ToType(new_tokens))
  398. # If there is a comma after the template, we need to consume
  399. # that here otherwise it becomes part of the name.
  400. i = new_end
  401. reference = pointer = array = False
  402. elif token.name == ',':
  403. AddType([])
  404. reference = pointer = array = False
  405. elif token.name == '*':
  406. pointer = True
  407. elif token.name == '&':
  408. reference = True
  409. elif token.name == '[':
  410. pointer = True
  411. elif token.name == ']':
  412. pass
  413. else:
  414. name_tokens.append(token)
  415. i += 1
  416. if name_tokens:
  417. # No '<' in the tokens, just a simple name and no template.
  418. AddType([])
  419. return result
  420. def DeclarationToParts(self, parts, needs_name_removed):
  421. name = None
  422. default = []
  423. if needs_name_removed:
  424. # Handle default (initial) values properly.
  425. for i, t in enumerate(parts):
  426. if t.name == '=':
  427. default = parts[i+1:]
  428. name = parts[i-1].name
  429. if name == ']' and parts[i-2].name == '[':
  430. name = parts[i-3].name
  431. i -= 1
  432. parts = parts[:i-1]
  433. break
  434. else:
  435. if parts[-1].token_type == tokenize.NAME:
  436. name = parts.pop().name
  437. else:
  438. # TODO(nnorwitz): this is a hack that happens for code like
  439. # Register(Foo<T>); where it thinks this is a function call
  440. # but it's actually a declaration.
  441. name = '???'
  442. modifiers = []
  443. type_name = []
  444. other_tokens = []
  445. templated_types = []
  446. i = 0
  447. end = len(parts)
  448. while i < end:
  449. p = parts[i]
  450. if keywords.IsKeyword(p.name):
  451. modifiers.append(p.name)
  452. elif p.name == '<':
  453. templated_tokens, new_end = self._GetTemplateEnd(parts, i+1)
  454. templated_types = self.ToType(templated_tokens)
  455. i = new_end - 1
  456. # Don't add a spurious :: to data members being initialized.
  457. next_index = i + 1
  458. if next_index < end and parts[next_index].name == '::':
  459. i += 1
  460. elif p.name in ('[', ']', '='):
  461. # These are handled elsewhere.
  462. other_tokens.append(p)
  463. elif p.name not in ('*', '&', '>'):
  464. # Ensure that names have a space between them.
  465. if (type_name and type_name[-1].token_type == tokenize.NAME and
  466. p.token_type == tokenize.NAME):
  467. type_name.append(tokenize.Token(tokenize.SYNTAX, ' ', 0, 0))
  468. type_name.append(p)
  469. else:
  470. other_tokens.append(p)
  471. i += 1
  472. type_name = ''.join([t.name for t in type_name])
  473. return name, type_name, templated_types, modifiers, default, other_tokens
  474. def ToParameters(self, tokens):
  475. if not tokens:
  476. return []
  477. result = []
  478. name = type_name = ''
  479. type_modifiers = []
  480. pointer = reference = array = False
  481. first_token = None
  482. default = []
  483. def AddParameter(end):
  484. if default:
  485. del default[0] # Remove flag.
  486. parts = self.DeclarationToParts(type_modifiers, True)
  487. (name, type_name, templated_types, modifiers,
  488. unused_default, unused_other_tokens) = parts
  489. parameter_type = Type(first_token.start, first_token.end,
  490. type_name, templated_types, modifiers,
  491. reference, pointer, array)
  492. p = Parameter(first_token.start, end, name,
  493. parameter_type, default)
  494. result.append(p)
  495. template_count = 0
  496. brace_count = 0
  497. for s in tokens:
  498. if not first_token:
  499. first_token = s
  500. # Check for braces before templates, as we can have unmatched '<>'
  501. # inside default arguments.
  502. if s.name == '{':
  503. brace_count += 1
  504. elif s.name == '}':
  505. brace_count -= 1
  506. if brace_count > 0:
  507. type_modifiers.append(s)
  508. continue
  509. if s.name == '<':
  510. template_count += 1
  511. elif s.name == '>':
  512. template_count -= 1
  513. if template_count > 0:
  514. type_modifiers.append(s)
  515. continue
  516. if s.name == ',':
  517. AddParameter(s.start)
  518. name = type_name = ''
  519. type_modifiers = []
  520. pointer = reference = array = False
  521. first_token = None
  522. default = []
  523. elif s.name == '*':
  524. pointer = True
  525. elif s.name == '&':
  526. reference = True
  527. elif s.name == '[':
  528. array = True
  529. elif s.name == ']':
  530. pass # Just don't add to type_modifiers.
  531. elif s.name == '=':
  532. # Got a default value. Add any value (None) as a flag.
  533. default.append(None)
  534. elif default:
  535. default.append(s)
  536. else:
  537. type_modifiers.append(s)
  538. AddParameter(tokens[-1].end)
  539. return result
  540. def CreateReturnType(self, return_type_seq):
  541. if not return_type_seq:
  542. return None
  543. start = return_type_seq[0].start
  544. end = return_type_seq[-1].end
  545. _, name, templated_types, modifiers, default, other_tokens = \
  546. self.DeclarationToParts(return_type_seq, False)
  547. names = [n.name for n in other_tokens]
  548. reference = '&' in names
  549. pointer = '*' in names
  550. array = '[' in names
  551. return Type(start, end, name, templated_types, modifiers,
  552. reference, pointer, array)
  553. def GetTemplateIndices(self, names):
  554. # names is a list of strings.
  555. start = names.index('<')
  556. end = len(names) - 1
  557. while end > 0:
  558. if names[end] == '>':
  559. break
  560. end -= 1
  561. return start, end+1
  562. class AstBuilder(object):
  563. def __init__(self, token_stream, filename, in_class='', visibility=None,
  564. namespace_stack=[]):
  565. self.tokens = token_stream
  566. self.filename = filename
  567. # TODO(nnorwitz): use a better data structure (deque) for the queue.
  568. # Switching directions of the "queue" improved perf by about 25%.
  569. # Using a deque should be even better since we access from both sides.
  570. self.token_queue = []
  571. self.namespace_stack = namespace_stack[:]
  572. self.in_class = in_class
  573. if in_class is None:
  574. self.in_class_name_only = None
  575. else:
  576. self.in_class_name_only = in_class.split('::')[-1]
  577. self.visibility = visibility
  578. self.in_function = False
  579. self.current_token = None
  580. # Keep the state whether we are currently handling a typedef or not.
  581. self._handling_typedef = False
  582. self.converter = TypeConverter(self.namespace_stack)
  583. def HandleError(self, msg, token):
  584. printable_queue = list(reversed(self.token_queue[-20:]))
  585. sys.stderr.write('Got %s in %s @ %s %s\n' %
  586. (msg, self.filename, token, printable_queue))
  587. def Generate(self):
  588. while 1:
  589. token = self._GetNextToken()
  590. if not token:
  591. break
  592. # Get the next token.
  593. self.current_token = token
  594. # Dispatch on the next token type.
  595. if token.token_type == _INTERNAL_TOKEN:
  596. if token.name == _NAMESPACE_POP:
  597. self.namespace_stack.pop()
  598. continue
  599. try:
  600. result = self._GenerateOne(token)
  601. if result is not None:
  602. yield result
  603. except:
  604. self.HandleError('exception', token)
  605. raise
  606. def _CreateVariable(self, pos_token, name, type_name, type_modifiers,
  607. ref_pointer_name_seq, templated_types, value=None):
  608. reference = '&' in ref_pointer_name_seq
  609. pointer = '*' in ref_pointer_name_seq
  610. array = '[' in ref_pointer_name_seq
  611. var_type = Type(pos_token.start, pos_token.end, type_name,
  612. templated_types, type_modifiers,
  613. reference, pointer, array)
  614. return VariableDeclaration(pos_token.start, pos_token.end,
  615. name, var_type, value, self.namespace_stack)
  616. def _GenerateOne(self, token):
  617. if token.token_type == tokenize.NAME:
  618. if (keywords.IsKeyword(token.name) and
  619. not keywords.IsBuiltinType(token.name)):
  620. if token.name == 'enum':
  621. # Pop the next token and only put it back if it's not
  622. # 'class'. This allows us to support the two-token
  623. # 'enum class' keyword as if it were simply 'enum'.
  624. next = self._GetNextToken()
  625. if next.name != 'class':
  626. self._AddBackToken(next)
  627. method = getattr(self, 'handle_' + token.name)
  628. return method()
  629. elif token.name == self.in_class_name_only:
  630. # The token name is the same as the class, must be a ctor if
  631. # there is a paren. Otherwise, it's the return type.
  632. # Peek ahead to get the next token to figure out which.
  633. next = self._GetNextToken()
  634. self._AddBackToken(next)
  635. if next.token_type == tokenize.SYNTAX and next.name == '(':
  636. return self._GetMethod([token], FUNCTION_CTOR, None, True)
  637. # Fall through--handle like any other method.
  638. # Handle data or function declaration/definition.
  639. syntax = tokenize.SYNTAX
  640. temp_tokens, last_token = \
  641. self._GetVarTokensUpToIgnoringTemplates(syntax,
  642. '(', ';', '{', '[')
  643. temp_tokens.insert(0, token)
  644. if last_token.name == '(':
  645. # If there is an assignment before the paren,
  646. # this is an expression, not a method.
  647. expr = bool([e for e in temp_tokens if e.name == '='])
  648. if expr:
  649. new_temp = self._GetTokensUpTo(tokenize.SYNTAX, ';')
  650. temp_tokens.append(last_token)
  651. temp_tokens.extend(new_temp)
  652. last_token = tokenize.Token(tokenize.SYNTAX, ';', 0, 0)
  653. if last_token.name == '[':
  654. # Handle array, this isn't a method, unless it's an operator.
  655. # TODO(nnorwitz): keep the size somewhere.
  656. # unused_size = self._GetTokensUpTo(tokenize.SYNTAX, ']')
  657. temp_tokens.append(last_token)
  658. if temp_tokens[-2].name == 'operator':
  659. temp_tokens.append(self._GetNextToken())
  660. else:
  661. temp_tokens2, last_token = \
  662. self._GetVarTokensUpTo(tokenize.SYNTAX, ';')
  663. temp_tokens.extend(temp_tokens2)
  664. if last_token.name == ';':
  665. # Handle data, this isn't a method.
  666. parts = self.converter.DeclarationToParts(temp_tokens, True)
  667. (name, type_name, templated_types, modifiers, default,
  668. unused_other_tokens) = parts
  669. t0 = temp_tokens[0]
  670. names = [t.name for t in temp_tokens]
  671. if templated_types:
  672. start, end = self.converter.GetTemplateIndices(names)
  673. names = names[:start] + names[end:]
  674. default = ''.join([t.name for t in default])
  675. return self._CreateVariable(t0, name, type_name, modifiers,
  676. names, templated_types, default)
  677. if last_token.name == '{':
  678. self._AddBackTokens(temp_tokens[1:])
  679. self._AddBackToken(last_token)
  680. method_name = temp_tokens[0].name
  681. method = getattr(self, 'handle_' + method_name, None)
  682. if not method:
  683. # Must be declaring a variable.
  684. # TODO(nnorwitz): handle the declaration.
  685. return None
  686. return method()
  687. return self._GetMethod(temp_tokens, 0, None, False)
  688. elif token.token_type == tokenize.SYNTAX:
  689. if token.name == '~' and self.in_class:
  690. # Must be a dtor (probably not in method body).
  691. token = self._GetNextToken()
  692. # self.in_class can contain A::Name, but the dtor will only
  693. # be Name. Make sure to compare against the right value.
  694. if (token.token_type == tokenize.NAME and
  695. token.name == self.in_class_name_only):
  696. return self._GetMethod([token], FUNCTION_DTOR, None, True)
  697. # TODO(nnorwitz): handle a lot more syntax.
  698. elif token.token_type == tokenize.PREPROCESSOR:
  699. # TODO(nnorwitz): handle more preprocessor directives.
  700. # token starts with a #, so remove it and strip whitespace.
  701. name = token.name[1:].lstrip()
  702. if name.startswith('include'):
  703. # Remove "include".
  704. name = name[7:].strip()
  705. assert name
  706. # Handle #include \<newline> "header-on-second-line.h".
  707. if name.startswith('\\'):
  708. name = name[1:].strip()
  709. assert name[0] in '<"', token
  710. assert name[-1] in '>"', token
  711. system = name[0] == '<'
  712. filename = name[1:-1]
  713. return Include(token.start, token.end, filename, system)
  714. if name.startswith('define'):
  715. # Remove "define".
  716. name = name[6:].strip()
  717. assert name
  718. value = ''
  719. for i, c in enumerate(name):
  720. if c.isspace():
  721. value = name[i:].lstrip()
  722. name = name[:i]
  723. break
  724. return Define(token.start, token.end, name, value)
  725. if name.startswith('if') and name[2:3].isspace():
  726. condition = name[3:].strip()
  727. if condition.startswith('0') or condition.startswith('(0)'):
  728. self._SkipIf0Blocks()
  729. return None
  730. def _GetTokensUpTo(self, expected_token_type, expected_token):
  731. return self._GetVarTokensUpTo(expected_token_type, expected_token)[0]
  732. def _GetVarTokensUpTo(self, expected_token_type, *expected_tokens):
  733. last_token = self._GetNextToken()
  734. tokens = []
  735. while (last_token.token_type != expected_token_type or
  736. last_token.name not in expected_tokens):
  737. tokens.append(last_token)
  738. last_token = self._GetNextToken()
  739. return tokens, last_token
  740. # Same as _GetVarTokensUpTo, but skips over '<...>' which could contain an
  741. # expected token.
  742. def _GetVarTokensUpToIgnoringTemplates(self, expected_token_type,
  743. *expected_tokens):
  744. last_token = self._GetNextToken()
  745. tokens = []
  746. nesting = 0
  747. while (nesting > 0 or
  748. last_token.token_type != expected_token_type or
  749. last_token.name not in expected_tokens):
  750. tokens.append(last_token)
  751. last_token = self._GetNextToken()
  752. if last_token.name == '<':
  753. nesting += 1
  754. elif last_token.name == '>':
  755. nesting -= 1
  756. return tokens, last_token
  757. # TODO(nnorwitz): remove _IgnoreUpTo() it shouldn't be necesary.
  758. def _IgnoreUpTo(self, token_type, token):
  759. unused_tokens = self._GetTokensUpTo(token_type, token)
  760. def _SkipIf0Blocks(self):
  761. count = 1
  762. while 1:
  763. token = self._GetNextToken()
  764. if token.token_type != tokenize.PREPROCESSOR:
  765. continue
  766. name = token.name[1:].lstrip()
  767. if name.startswith('endif'):
  768. count -= 1
  769. if count == 0:
  770. break
  771. elif name.startswith('if'):
  772. count += 1
  773. def _GetMatchingChar(self, open_paren, close_paren, GetNextToken=None):
  774. if GetNextToken is None:
  775. GetNextToken = self._GetNextToken
  776. # Assumes the current token is open_paren and we will consume
  777. # and return up to the close_paren.
  778. count = 1
  779. token = GetNextToken()
  780. while 1:
  781. if token.token_type == tokenize.SYNTAX:
  782. if token.name == open_paren:
  783. count += 1
  784. elif token.name == close_paren:
  785. count -= 1
  786. if count == 0:
  787. break
  788. yield token
  789. token = GetNextToken()
  790. yield token
  791. def _GetParameters(self):
  792. return self._GetMatchingChar('(', ')')
  793. def GetScope(self):
  794. return self._GetMatchingChar('{', '}')
  795. def _GetNextToken(self):
  796. if self.token_queue:
  797. return self.token_queue.pop()
  798. try:
  799. return next(self.tokens)
  800. except StopIteration:
  801. return
  802. def _AddBackToken(self, token):
  803. if token.whence == tokenize.WHENCE_STREAM:
  804. token.whence = tokenize.WHENCE_QUEUE
  805. self.token_queue.insert(0, token)
  806. else:
  807. assert token.whence == tokenize.WHENCE_QUEUE, token
  808. self.token_queue.append(token)
  809. def _AddBackTokens(self, tokens):
  810. if tokens:
  811. if tokens[-1].whence == tokenize.WHENCE_STREAM:
  812. for token in tokens:
  813. token.whence = tokenize.WHENCE_QUEUE
  814. self.token_queue[:0] = reversed(tokens)
  815. else:
  816. assert tokens[-1].whence == tokenize.WHENCE_QUEUE, tokens
  817. self.token_queue.extend(reversed(tokens))
  818. def GetName(self, seq=None):
  819. """Returns ([tokens], next_token_info)."""
  820. GetNextToken = self._GetNextToken
  821. if seq is not None:
  822. it = iter(seq)
  823. GetNextToken = lambda: next(it)
  824. next_token = GetNextToken()
  825. tokens = []
  826. last_token_was_name = False
  827. while (next_token.token_type == tokenize.NAME or
  828. (next_token.token_type == tokenize.SYNTAX and
  829. next_token.name in ('::', '<'))):
  830. # Two NAMEs in a row means the identifier should terminate.
  831. # It's probably some sort of variable declaration.
  832. if last_token_was_name and next_token.token_type == tokenize.NAME:
  833. break
  834. last_token_was_name = next_token.token_type == tokenize.NAME
  835. tokens.append(next_token)
  836. # Handle templated names.
  837. if next_token.name == '<':
  838. tokens.extend(self._GetMatchingChar('<', '>', GetNextToken))
  839. last_token_was_name = True
  840. next_token = GetNextToken()
  841. return tokens, next_token
  842. def GetMethod(self, modifiers, templated_types):
  843. return_type_and_name = self._GetTokensUpTo(tokenize.SYNTAX, '(')
  844. assert len(return_type_and_name) >= 1
  845. return self._GetMethod(return_type_and_name, modifiers, templated_types,
  846. False)
  847. def _GetMethod(self, return_type_and_name, modifiers, templated_types,
  848. get_paren):
  849. template_portion = None
  850. if get_paren:
  851. token = self._GetNextToken()
  852. assert token.token_type == tokenize.SYNTAX, token
  853. if token.name == '<':
  854. # Handle templatized dtors.
  855. template_portion = [token]
  856. template_portion.extend(self._GetMatchingChar('<', '>'))
  857. token = self._GetNextToken()
  858. assert token.token_type == tokenize.SYNTAX, token
  859. assert token.name == '(', token
  860. name = return_type_and_name.pop()
  861. # Handle templatized ctors.
  862. if name.name == '>':
  863. index = 1
  864. while return_type_and_name[index].name != '<':
  865. index += 1
  866. template_portion = return_type_and_name[index:] + [name]
  867. del return_type_and_name[index:]
  868. name = return_type_and_name.pop()
  869. elif name.name == ']':
  870. rt = return_type_and_name
  871. assert rt[-1].name == '[', return_type_and_name
  872. assert rt[-2].name == 'operator', return_type_and_name
  873. name_seq = return_type_and_name[-2:]
  874. del return_type_and_name[-2:]
  875. name = tokenize.Token(tokenize.NAME, 'operator[]',
  876. name_seq[0].start, name.end)
  877. # Get the open paren so _GetParameters() below works.
  878. unused_open_paren = self._GetNextToken()
  879. # TODO(nnorwitz): store template_portion.
  880. return_type = return_type_and_name
  881. indices = name
  882. if return_type:
  883. indices = return_type[0]
  884. # Force ctor for templatized ctors.
  885. if name.name == self.in_class and not modifiers:
  886. modifiers |= FUNCTION_CTOR
  887. parameters = list(self._GetParameters())
  888. del parameters[-1] # Remove trailing ')'.
  889. # Handling operator() is especially weird.
  890. if name.name == 'operator' and not parameters:
  891. token = self._GetNextToken()
  892. assert token.name == '(', token
  893. parameters = list(self._GetParameters())
  894. del parameters[-1] # Remove trailing ')'.
  895. token = self._GetNextToken()
  896. while token.token_type == tokenize.NAME:
  897. modifier_token = token
  898. token = self._GetNextToken()
  899. if modifier_token.name == 'const':
  900. modifiers |= FUNCTION_CONST
  901. elif modifier_token.name == '__attribute__':
  902. # TODO(nnorwitz): handle more __attribute__ details.
  903. modifiers |= FUNCTION_ATTRIBUTE
  904. assert token.name == '(', token
  905. # Consume everything between the (parens).
  906. unused_tokens = list(self._GetMatchingChar('(', ')'))
  907. token = self._GetNextToken()
  908. elif modifier_token.name == 'throw':
  909. modifiers |= FUNCTION_THROW
  910. assert token.name == '(', token
  911. # Consume everything between the (parens).
  912. unused_tokens = list(self._GetMatchingChar('(', ')'))
  913. token = self._GetNextToken()
  914. elif modifier_token.name == 'override':
  915. modifiers |= FUNCTION_OVERRIDE
  916. elif modifier_token.name == modifier_token.name.upper():
  917. # HACK(nnorwitz): assume that all upper-case names
  918. # are some macro we aren't expanding.
  919. modifiers |= FUNCTION_UNKNOWN_ANNOTATION
  920. else:
  921. self.HandleError('unexpected token', modifier_token)
  922. assert token.token_type == tokenize.SYNTAX, token
  923. # Handle ctor initializers.
  924. if token.name == ':':
  925. # TODO(nnorwitz): anything else to handle for initializer list?
  926. while token.name != ';' and token.name != '{':
  927. token = self._GetNextToken()
  928. # Handle pointer to functions that are really data but look
  929. # like method declarations.
  930. if token.name == '(':
  931. if parameters[0].name == '*':
  932. # name contains the return type.
  933. name = parameters.pop()
  934. # parameters contains the name of the data.
  935. modifiers = [p.name for p in parameters]
  936. # Already at the ( to open the parameter list.
  937. function_parameters = list(self._GetMatchingChar('(', ')'))
  938. del function_parameters[-1] # Remove trailing ')'.
  939. # TODO(nnorwitz): store the function_parameters.
  940. token = self._GetNextToken()
  941. assert token.token_type == tokenize.SYNTAX, token
  942. assert token.name == ';', token
  943. return self._CreateVariable(indices, name.name, indices.name,
  944. modifiers, '', None)
  945. # At this point, we got something like:
  946. # return_type (type::*name_)(params);
  947. # This is a data member called name_ that is a function pointer.
  948. # With this code: void (sq_type::*field_)(string&);
  949. # We get: name=void return_type=[] parameters=sq_type ... field_
  950. # TODO(nnorwitz): is return_type always empty?
  951. # TODO(nnorwitz): this isn't even close to being correct.
  952. # Just put in something so we don't crash and can move on.
  953. real_name = parameters[-1]
  954. modifiers = [p.name for p in self._GetParameters()]
  955. del modifiers[-1] # Remove trailing ')'.
  956. return self._CreateVariable(indices, real_name.name, indices.name,
  957. modifiers, '', None)
  958. if token.name == '{':
  959. body = list(self.GetScope())
  960. del body[-1] # Remove trailing '}'.
  961. else:
  962. body = None
  963. if token.name == '=':
  964. token = self._GetNextToken()
  965. if token.name == 'default' or token.name == 'delete':
  966. # Ignore explicitly defaulted and deleted special members
  967. # in C++11.
  968. token = self._GetNextToken()
  969. else:
  970. # Handle pure-virtual declarations.
  971. assert token.token_type == tokenize.CONSTANT, token
  972. assert token.name == '0', token
  973. modifiers |= FUNCTION_PURE_VIRTUAL
  974. token = self._GetNextToken()
  975. if token.name == '[':
  976. # TODO(nnorwitz): store tokens and improve parsing.
  977. # template <typename T, size_t N> char (&ASH(T (&seq)[N]))[N];
  978. tokens = list(self._GetMatchingChar('[', ']'))
  979. token = self._GetNextToken()
  980. assert token.name == ';', (token, return_type_and_name, parameters)
  981. # Looks like we got a method, not a function.
  982. if len(return_type) > 2 and return_type[-1].name == '::':
  983. return_type, in_class = \
  984. self._GetReturnTypeAndClassName(return_type)
  985. return Method(indices.start, indices.end, name.name, in_class,
  986. return_type, parameters, modifiers, templated_types,
  987. body, self.namespace_stack)
  988. return Function(indices.start, indices.end, name.name, return_type,
  989. parameters, modifiers, templated_types, body,
  990. self.namespace_stack)
  991. def _GetReturnTypeAndClassName(self, token_seq):
  992. # Splitting the return type from the class name in a method
  993. # can be tricky. For example, Return::Type::Is::Hard::To::Find().
  994. # Where is the return type and where is the class name?
  995. # The heuristic used is to pull the last name as the class name.
  996. # This includes all the templated type info.
  997. # TODO(nnorwitz): if there is only One name like in the
  998. # example above, punt and assume the last bit is the class name.
  999. # Ignore a :: prefix, if exists so we can find the first real name.
  1000. i = 0
  1001. if token_seq[0].name == '::':
  1002. i = 1
  1003. # Ignore a :: suffix, if exists.
  1004. end = len(token_seq) - 1
  1005. if token_seq[end-1].name == '::':
  1006. end -= 1
  1007. # Make a copy of the sequence so we can append a sentinel
  1008. # value. This is required for GetName will has to have some
  1009. # terminating condition beyond the last name.
  1010. seq_copy = token_seq[i:end]
  1011. seq_copy.append(tokenize.Token(tokenize.SYNTAX, '', 0, 0))
  1012. names = []
  1013. while i < end:
  1014. # Iterate through the sequence parsing out each name.
  1015. new_name, next = self.GetName(seq_copy[i:])
  1016. assert new_name, 'Got empty new_name, next=%s' % next
  1017. # We got a pointer or ref. Add it to the name.
  1018. if next and next.token_type == tokenize.SYNTAX:
  1019. new_name.append(next)
  1020. names.append(new_name)
  1021. i += len(new_name)
  1022. # Now that we have the names, it's time to undo what we did.
  1023. # Remove the sentinel value.
  1024. names[-1].pop()
  1025. # Flatten the token sequence for the return type.
  1026. return_type = [e for seq in names[:-1] for e in seq]
  1027. # The class name is the last name.
  1028. class_name = names[-1]
  1029. return return_type, class_name
  1030. def handle_bool(self):
  1031. pass
  1032. def handle_char(self):
  1033. pass
  1034. def handle_int(self):
  1035. pass
  1036. def handle_long(self):
  1037. pass
  1038. def handle_short(self):
  1039. pass
  1040. def handle_double(self):
  1041. pass
  1042. def handle_float(self):
  1043. pass
  1044. def handle_void(self):
  1045. pass
  1046. def handle_wchar_t(self):
  1047. pass
  1048. def handle_unsigned(self):
  1049. pass
  1050. def handle_signed(self):
  1051. pass
  1052. def _GetNestedType(self, ctor):
  1053. name = None
  1054. name_tokens, token = self.GetName()
  1055. if name_tokens:
  1056. name = ''.join([t.name for t in name_tokens])
  1057. # Handle forward declarations.
  1058. if token.token_type == tokenize.SYNTAX and token.name == ';':
  1059. return ctor(token.start, token.end, name, None,
  1060. self.namespace_stack)
  1061. if token.token_type == tokenize.NAME and self._handling_typedef:
  1062. self._AddBackToken(token)
  1063. return ctor(token.start, token.end, name, None,
  1064. self.namespace_stack)
  1065. # Must be the type declaration.
  1066. fields = list(self._GetMatchingChar('{', '}'))
  1067. del fields[-1] # Remove trailing '}'.
  1068. if token.token_type == tokenize.SYNTAX and token.name == '{':
  1069. next = self._GetNextToken()
  1070. new_type = ctor(token.start, token.end, name, fields,
  1071. self.namespace_stack)
  1072. # A name means this is an anonymous type and the name
  1073. # is the variable declaration.
  1074. if next.token_type != tokenize.NAME:
  1075. return new_type
  1076. name = new_type
  1077. token = next
  1078. # Must be variable declaration using the type prefixed with keyword.
  1079. assert token.token_type == tokenize.NAME, token
  1080. return self._CreateVariable(token, token.name, name, [], '', None)
  1081. def handle_struct(self):
  1082. # Special case the handling typedef/aliasing of structs here.
  1083. # It would be a pain to handle in the class code.
  1084. name_tokens, var_token = self.GetName()
  1085. if name_tokens:
  1086. next_token = self._GetNextToken()
  1087. is_syntax = (var_token.token_type == tokenize.SYNTAX and
  1088. var_token.name[0] in '*&')
  1089. is_variable = (var_token.token_type == tokenize.NAME and
  1090. next_token.name == ';')
  1091. variable = var_token
  1092. if is_syntax and not is_variable:
  1093. variable = next_token
  1094. temp = self._GetNextToken()
  1095. if temp.token_type == tokenize.SYNTAX and temp.name == '(':
  1096. # Handle methods declared to return a struct.
  1097. t0 = name_tokens[0]
  1098. struct = tokenize.Token(tokenize.NAME, 'struct',
  1099. t0.start-7, t0.start-2)
  1100. type_and_name = [struct]
  1101. type_and_name.extend(name_tokens)
  1102. type_and_name.extend((var_token, next_token))
  1103. return self._GetMethod(type_and_name, 0, None, False)
  1104. assert temp.name == ';', (temp, name_tokens, var_token)
  1105. if is_syntax or (is_variable and not self._handling_typedef):
  1106. modifiers = ['struct']
  1107. type_name = ''.join([t.name for t in name_tokens])
  1108. position = name_tokens[0]
  1109. return self._CreateVariable(position, variable.name, type_name,
  1110. modifiers, var_token.name, None)
  1111. name_tokens.extend((var_token, next_token))
  1112. self._AddBackTokens(name_tokens)
  1113. else:
  1114. self._AddBackToken(var_token)
  1115. return self._GetClass(Struct, VISIBILITY_PUBLIC, None)
  1116. def handle_union(self):
  1117. return self._GetNestedType(Union)
  1118. def handle_enum(self):
  1119. return self._GetNestedType(Enum)
  1120. def handle_auto(self):
  1121. # TODO(nnorwitz): warn about using auto? Probably not since it
  1122. # will be reclaimed and useful for C++0x.
  1123. pass
  1124. def handle_register(self):
  1125. pass
  1126. def handle_const(self):
  1127. pass
  1128. def handle_inline(self):
  1129. pass
  1130. def handle_extern(self):
  1131. pass
  1132. def handle_static(self):
  1133. pass
  1134. def handle_virtual(self):
  1135. # What follows must be a method.
  1136. token = token2 = self._GetNextToken()
  1137. if token.name == 'inline':
  1138. # HACK(nnorwitz): handle inline dtors by ignoring 'inline'.
  1139. token2 = self._GetNextToken()
  1140. if token2.token_type == tokenize.SYNTAX and token2.name == '~':
  1141. return self.GetMethod(FUNCTION_VIRTUAL + FUNCTION_DTOR, None)
  1142. assert token.token_type == tokenize.NAME or token.name == '::', token
  1143. return_type_and_name, _ = self._GetVarTokensUpToIgnoringTemplates(
  1144. tokenize.SYNTAX, '(') # )
  1145. return_type_and_name.insert(0, token)
  1146. if token2 is not token:
  1147. return_type_and_name.insert(1, token2)
  1148. return self._GetMethod(return_type_and_name, FUNCTION_VIRTUAL,
  1149. None, False)
  1150. def handle_volatile(self):
  1151. pass
  1152. def handle_mutable(self):
  1153. pass
  1154. def handle_public(self):
  1155. assert self.in_class
  1156. self.visibility = VISIBILITY_PUBLIC
  1157. def handle_protected(self):
  1158. assert self.in_class
  1159. self.visibility = VISIBILITY_PROTECTED
  1160. def handle_private(self):
  1161. assert self.in_class
  1162. self.visibility = VISIBILITY_PRIVATE
  1163. def handle_friend(self):
  1164. tokens = self._GetTokensUpTo(tokenize.SYNTAX, ';')
  1165. assert tokens
  1166. t0 = tokens[0]
  1167. return Friend(t0.start, t0.end, tokens, self.namespace_stack)
  1168. def handle_static_cast(self):
  1169. pass
  1170. def handle_const_cast(self):
  1171. pass
  1172. def handle_dynamic_cast(self):
  1173. pass
  1174. def handle_reinterpret_cast(self):
  1175. pass
  1176. def handle_new(self):
  1177. pass
  1178. def handle_delete(self):
  1179. tokens = self._GetTokensUpTo(tokenize.SYNTAX, ';')
  1180. assert tokens
  1181. return Delete(tokens[0].start, tokens[0].end, tokens)
  1182. def handle_typedef(self):
  1183. token = self._GetNextToken()
  1184. if (token.token_type == tokenize.NAME and
  1185. keywords.IsKeyword(token.name)):
  1186. # Token must be struct/enum/union/class.
  1187. method = getattr(self, 'handle_' + token.name)
  1188. self._handling_typedef = True
  1189. tokens = [method()]
  1190. self._handling_typedef = False
  1191. else:
  1192. tokens = [token]
  1193. # Get the remainder of the typedef up to the semi-colon.
  1194. tokens.extend(self._GetTokensUpTo(tokenize.SYNTAX, ';'))
  1195. # TODO(nnorwitz): clean all this up.
  1196. assert tokens
  1197. name = tokens.pop()
  1198. indices = name
  1199. if tokens:
  1200. indices = tokens[0]
  1201. if not indices:
  1202. indices = token
  1203. if name.name == ')':
  1204. # HACK(nnorwitz): Handle pointers to functions "properly".
  1205. if (len(tokens) >= 4 and
  1206. tokens[1].name == '(' and tokens[2].name == '*'):
  1207. tokens.append(name)
  1208. name = tokens[3]
  1209. elif name.name == ']':
  1210. # HACK(nnorwitz): Handle arrays properly.
  1211. if len(tokens) >= 2:
  1212. tokens.append(name)
  1213. name = tokens[1]
  1214. new_type = tokens
  1215. if tokens and isinstance(tokens[0], tokenize.Token):
  1216. new_type = self.converter.ToType(tokens)[0]
  1217. return Typedef(indices.start, indices.end, name.name,
  1218. new_type, self.namespace_stack)
  1219. def handle_typeid(self):
  1220. pass # Not needed yet.
  1221. def handle_typename(self):
  1222. pass # Not needed yet.
  1223. def _GetTemplatedTypes(self):
  1224. result = {}
  1225. tokens = list(self._GetMatchingChar('<', '>'))
  1226. len_tokens = len(tokens) - 1 # Ignore trailing '>'.
  1227. i = 0
  1228. while i < len_tokens:
  1229. key = tokens[i].name
  1230. i += 1
  1231. if keywords.IsKeyword(key) or key == ',':
  1232. continue
  1233. type_name = default = None
  1234. if i < len_tokens:
  1235. i += 1
  1236. if tokens[i-1].name == '=':
  1237. assert i < len_tokens, '%s %s' % (i, tokens)
  1238. default, unused_next_token = self.GetName(tokens[i:])
  1239. i += len(default)
  1240. else:
  1241. if tokens[i-1].name != ',':
  1242. # We got something like: Type variable.
  1243. # Re-adjust the key (variable) and type_name (Type).
  1244. key = tokens[i-1].name
  1245. type_name = tokens[i-2]
  1246. result[key] = (type_name, default)
  1247. return result
  1248. def handle_template(self):
  1249. token = self._GetNextToken()
  1250. assert token.token_type == tokenize.SYNTAX, token
  1251. assert token.name == '<', token
  1252. templated_types = self._GetTemplatedTypes()
  1253. # TODO(nnorwitz): for now, just ignore the template params.
  1254. token = self._GetNextToken()
  1255. if token.token_type == tokenize.NAME:
  1256. if token.name == 'class':
  1257. return self._GetClass(Class, VISIBILITY_PRIVATE, templated_types)
  1258. elif token.name == 'struct':
  1259. return self._GetClass(Struct, VISIBILITY_PUBLIC, templated_types)
  1260. elif token.name == 'friend':
  1261. return self.handle_friend()
  1262. self._AddBackToken(token)
  1263. tokens, last = self._GetVarTokensUpTo(tokenize.SYNTAX, '(', ';')
  1264. tokens.append(last)
  1265. self._AddBackTokens(tokens)
  1266. if last.name == '(':
  1267. return self.GetMethod(FUNCTION_NONE, templated_types)
  1268. # Must be a variable definition.
  1269. return None
  1270. def handle_true(self):
  1271. pass # Nothing to do.
  1272. def handle_false(self):
  1273. pass # Nothing to do.
  1274. def handle_asm(self):
  1275. pass # Not needed yet.
  1276. def handle_class(self):
  1277. return self._GetClass(Class, VISIBILITY_PRIVATE, None)
  1278. def _GetBases(self):
  1279. # Get base classes.
  1280. bases = []
  1281. while 1:
  1282. token = self._GetNextToken()
  1283. assert token.token_type == tokenize.NAME, token
  1284. # TODO(nnorwitz): store kind of inheritance...maybe.
  1285. if token.name not in ('public', 'protected', 'private'):
  1286. # If inheritance type is not specified, it is private.
  1287. # Just put the token back so we can form a name.
  1288. # TODO(nnorwitz): it would be good to warn about this.
  1289. self._AddBackToken(token)
  1290. else:
  1291. # Check for virtual inheritance.
  1292. token = self._GetNextToken()
  1293. if token.name != 'virtual':
  1294. self._AddBackToken(token)
  1295. else:
  1296. # TODO(nnorwitz): store that we got virtual for this base.
  1297. pass
  1298. base, next_token = self.GetName()
  1299. bases_ast = self.converter.ToType(base)
  1300. assert len(bases_ast) == 1, bases_ast
  1301. bases.append(bases_ast[0])
  1302. assert next_token.token_type == tokenize.SYNTAX, next_token
  1303. if next_token.name == '{':
  1304. token = next_token
  1305. break
  1306. # Support multiple inheritance.
  1307. assert next_token.name == ',', next_token
  1308. return bases, token
  1309. def _GetClass(self, class_type, visibility, templated_types):
  1310. class_name = None
  1311. class_token = self._GetNextToken()
  1312. if class_token.token_type != tokenize.NAME:
  1313. assert class_token.token_type == tokenize.SYNTAX, class_token
  1314. token = class_token
  1315. else:
  1316. # Skip any macro (e.g. storage class specifiers) after the
  1317. # 'class' keyword.
  1318. next_token = self._GetNextToken()
  1319. if next_token.token_type == tokenize.NAME:
  1320. self._AddBackToken(next_token)
  1321. else:
  1322. self._AddBackTokens([class_token, next_token])
  1323. name_tokens, token = self.GetName()
  1324. class_name = ''.join([t.name for t in name_tokens])
  1325. bases = None
  1326. if token.token_type == tokenize.SYNTAX:
  1327. if token.name == ';':
  1328. # Forward declaration.
  1329. return class_type(class_token.start, class_token.end,
  1330. class_name, None, templated_types, None,
  1331. self.namespace_stack)
  1332. if token.name in '*&':
  1333. # Inline forward declaration. Could be method or data.
  1334. name_token = self._GetNextToken()
  1335. next_token = self._GetNextToken()
  1336. if next_token.name == ';':
  1337. # Handle data
  1338. modifiers = ['class']
  1339. return self._CreateVariable(class_token, name_token.name,
  1340. class_name,
  1341. modifiers, token.name, None)
  1342. else:
  1343. # Assume this is a method.
  1344. tokens = (class_token, token, name_token, next_token)
  1345. self._AddBackTokens(tokens)
  1346. return self.GetMethod(FUNCTION_NONE, None)
  1347. if token.name == ':':
  1348. bases, token = self._GetBases()
  1349. body = None
  1350. if token.token_type == tokenize.SYNTAX and token.name == '{':
  1351. assert token.token_type == tokenize.SYNTAX, token
  1352. assert token.name == '{', token
  1353. ast = AstBuilder(self.GetScope(), self.filename, class_name,
  1354. visibility, self.namespace_stack)
  1355. body = list(ast.Generate())
  1356. if not self._handling_typedef:
  1357. token = self._GetNextToken()
  1358. if token.token_type != tokenize.NAME:
  1359. assert token.token_type == tokenize.SYNTAX, token
  1360. assert token.name == ';', token
  1361. else:
  1362. new_class = class_type(class_token.start, class_token.end,
  1363. class_name, bases, None,
  1364. body, self.namespace_stack)
  1365. modifiers = []
  1366. return self._CreateVariable(class_token,
  1367. token.name, new_class,
  1368. modifiers, token.name, None)
  1369. else:
  1370. if not self._handling_typedef:
  1371. self.HandleError('non-typedef token', token)
  1372. self._AddBackToken(token)
  1373. return class_type(class_token.start, class_token.end, class_name,
  1374. bases, templated_types, body, self.namespace_stack)
  1375. def handle_namespace(self):
  1376. token = self._GetNextToken()
  1377. # Support anonymous namespaces.
  1378. name = None
  1379. if token.token_type == tokenize.NAME:
  1380. name = token.name
  1381. token = self._GetNextToken()
  1382. self.namespace_stack.append(name)
  1383. assert token.token_type == tokenize.SYNTAX, token
  1384. # Create an internal token that denotes when the namespace is complete.
  1385. internal_token = tokenize.Token(_INTERNAL_TOKEN, _NAMESPACE_POP,
  1386. None, None)
  1387. internal_token.whence = token.whence
  1388. if token.name == '=':
  1389. # TODO(nnorwitz): handle aliasing namespaces.
  1390. name, next_token = self.GetName()
  1391. assert next_token.name == ';', next_token
  1392. self._AddBackToken(internal_token)
  1393. else:
  1394. assert token.name == '{', token
  1395. tokens = list(self.GetScope())
  1396. # Replace the trailing } with the internal namespace pop token.
  1397. tokens[-1] = internal_token
  1398. # Handle namespace with nothing in it.
  1399. self._AddBackTokens(tokens)
  1400. return None
  1401. def handle_using(self):
  1402. tokens = self._GetTokensUpTo(tokenize.SYNTAX, ';')
  1403. assert tokens
  1404. return Using(tokens[0].start, tokens[0].end, tokens)
  1405. def handle_explicit(self):
  1406. assert self.in_class
  1407. # Nothing much to do.
  1408. # TODO(nnorwitz): maybe verify the method name == class name.
  1409. # This must be a ctor.
  1410. return self.GetMethod(FUNCTION_CTOR, None)
  1411. def handle_this(self):
  1412. pass # Nothing to do.
  1413. def handle_operator(self):
  1414. # Pull off the next token(s?) and make that part of the method name.
  1415. pass
  1416. def handle_sizeof(self):
  1417. pass
  1418. def handle_case(self):
  1419. pass
  1420. def handle_switch(self):
  1421. pass
  1422. def handle_default(self):
  1423. token = self._GetNextToken()
  1424. assert token.token_type == tokenize.SYNTAX
  1425. assert token.name == ':'
  1426. def handle_if(self):
  1427. pass
  1428. def handle_else(self):
  1429. pass
  1430. def handle_return(self):
  1431. tokens = self._GetTokensUpTo(tokenize.SYNTAX, ';')
  1432. if not tokens:
  1433. return Return(self.current_token.start, self.current_token.end, None)
  1434. return Return(tokens[0].start, tokens[0].end, tokens)
  1435. def handle_goto(self):
  1436. tokens = self._GetTokensUpTo(tokenize.SYNTAX, ';')
  1437. assert len(tokens) == 1, str(tokens)
  1438. return Goto(tokens[0].start, tokens[0].end, tokens[0].name)
  1439. def handle_try(self):
  1440. pass # Not needed yet.
  1441. def handle_catch(self):
  1442. pass # Not needed yet.
  1443. def handle_throw(self):
  1444. pass # Not needed yet.
  1445. def handle_while(self):
  1446. pass
  1447. def handle_do(self):
  1448. pass
  1449. def handle_for(self):
  1450. pass
  1451. def handle_break(self):
  1452. self._IgnoreUpTo(tokenize.SYNTAX, ';')
  1453. def handle_continue(self):
  1454. self._IgnoreUpTo(tokenize.SYNTAX, ';')
  1455. def BuilderFromSource(source, filename):
  1456. """Utility method that returns an AstBuilder from source code.
  1457. Args:
  1458. source: 'C++ source code'
  1459. filename: 'file1'
  1460. Returns:
  1461. AstBuilder
  1462. """
  1463. return AstBuilder(tokenize.GetTokens(source), filename)
  1464. def PrintIndentifiers(filename, should_print):
  1465. """Prints all identifiers for a C++ source file.
  1466. Args:
  1467. filename: 'file1'
  1468. should_print: predicate with signature: bool Function(token)
  1469. """
  1470. source = utils.ReadFile(filename, False)
  1471. if source is None:
  1472. sys.stderr.write('Unable to find: %s\n' % filename)
  1473. return
  1474. #print('Processing %s' % actual_filename)
  1475. builder = BuilderFromSource(source, filename)
  1476. try:
  1477. for node in builder.Generate():
  1478. if should_print(node):
  1479. print(node.name)
  1480. except KeyboardInterrupt:
  1481. return
  1482. except:
  1483. pass
  1484. def PrintAllIndentifiers(filenames, should_print):
  1485. """Prints all identifiers for each C++ source file in filenames.
  1486. Args:
  1487. filenames: ['file1', 'file2', ...]
  1488. should_print: predicate with signature: bool Function(token)
  1489. """
  1490. for path in filenames:
  1491. PrintIndentifiers(path, should_print)
  1492. def main(argv):
  1493. for filename in argv[1:]:
  1494. source = utils.ReadFile(filename)
  1495. if source is None:
  1496. continue
  1497. print('Processing %s' % filename)
  1498. builder = BuilderFromSource(source, filename)
  1499. try:
  1500. entire_ast = filter(None, builder.Generate())
  1501. except KeyboardInterrupt:
  1502. return
  1503. except:
  1504. # Already printed a warning, print the traceback and continue.
  1505. traceback.print_exc()
  1506. else:
  1507. if utils.DEBUG:
  1508. for ast in entire_ast:
  1509. print(ast)
  1510. if __name__ == '__main__':
  1511. main(sys.argv)