cprover
mini_c_parser.cpp
Go to the documentation of this file.
1 /*******************************************************************\
2 
3 Module: Mini C Parser
4 
5 Author: Daniel Kroening, dkr@amazon.com
6 
7 \*******************************************************************/
8 
11 
12 #include "mini_c_parser.h"
13 
14 #include <util/exception_utils.h>
15 #include <util/invariant.h>
16 
17 #include "cscanner.h"
18 
20 {
21 public:
23  {
24  }
25 
26  c_translation_unitt parse(std::istream &);
27 
28 protected:
29  std::size_t token_index;
30  using tokenst = std::vector<ctokent>;
32 
33  bool eof() const
34  {
35  return is_eof(peek());
36  }
37 
43 
44  const ctokent &peek() const
45  {
47  return tokens[token_index];
48  }
49 
50  const ctokent &peek(std::size_t how_many) const
51  {
52  PRECONDITION(token_index + how_many < tokens.size());
53  return tokens[token_index + how_many];
54  }
55 
57  {
60  return tokens[token_index++];
61  }
62 
63  static bool is_storage_class(const ctokent &token)
64  {
65  return token == "auto" || token == "extern" || token == "static" ||
66  token == "register" || token == "_Thread_local";
67  }
68 
69  static bool is_type_qualifier(const ctokent &token)
70  {
71  return token == "const" || token == "volatile" || token == "restrict" ||
72  token == "_Atomic";
73  }
74 
75  void skip_ws(tokenst &);
76  void parse_brackets(char open, char close, tokenst &dest);
77 };
78 
79 std::ostream &operator<<(std::ostream &out, const c_declarationt &declaration)
80 {
81  for(const auto &t : declaration.pre_declarator)
82  out << t.text;
83 
84  for(const auto &t : declaration.declarator)
85  out << t.text;
86 
87  for(const auto &t : declaration.post_declarator)
88  out << t.text;
89 
90  for(const auto &t : declaration.initializer)
91  out << t.text;
92 
93  return out;
94 }
95 
96 void c_declarationt::print(std::ostream &out) const
97 {
98  if(!declarator.empty())
99  {
100  out << "DECLARATOR: ";
101  for(const auto &t : declarator)
102  out << t.text;
103  out << '\n';
104  }
105 }
106 
108 {
109  return !post_declarator.empty() && post_declarator.front() == '(';
110 }
111 
113 {
114  return !initializer.empty() && initializer.front() == '{';
115 }
116 
118 {
119  for(auto &t : declarator)
120  if(is_identifier(t))
121  return t;
122  return {};
123 }
124 
126 {
127  if(eof())
128  return;
129 
130  while(is_ws(peek()) || is_comment(peek()) ||
132  {
133  dest.push_back(consume_token());
134  }
135 }
136 
137 void mini_c_parsert::parse_brackets(char open, char close, tokenst &dest)
138 {
139  if(eof() || peek() != open)
140  return;
141 
142  std::size_t bracket_count = 0;
143  while(true)
144  {
145  if(eof())
146  throw invalid_source_file_exceptiont("expected " + std::string(1, close));
147 
148  auto &token = consume_token();
149  dest.push_back(token);
150  if(token == open)
151  bracket_count++;
152  else if(token == close)
153  {
154  bracket_count--;
155  if(bracket_count == 0)
156  break; // done
157  }
158  }
159 }
160 
162 {
163  // type qualifier
164  // storage class
165  // type
166  // '*'
167  tokenst result;
168 
169  while(true)
170  {
171  skip_ws(result);
172 
173  if(eof())
174  return result;
175 
176  auto &token = peek();
177 
178  if(
179  is_type_qualifier(token) || is_storage_class(token) || token == '*' ||
180  token == "int" || token == "signed" || token.text == "unsigned" ||
181  token == "char" || token == "short" || token == "long" ||
182  token == "float" || token == "double" || token == "inline" ||
183  token == "typedef")
184  {
185  result.push_back(consume_token());
186  }
187  else if(token == "enum" || token == "struct" || token == "union")
188  {
189  result.push_back(consume_token());
190 
191  skip_ws(result);
192 
193  // may be followed by a tag
194  if(!eof() && is_identifier(peek()))
195  result.push_back(consume_token());
196 
197  skip_ws(result);
198 
199  // may be followed by a body {...}
200  parse_brackets('{', '}', result);
201  }
202  else if(token == "__attribute__")
203  {
204  result.push_back(consume_token());
205  skip_ws(result);
206  // followed by (( ... ))
207  parse_brackets('(', ')', result);
208  }
209  else if(is_identifier(token))
210  {
211  // Might be typedef or the declarator.
212  // We look ahead for the next non-WS token to tell the difference.
213  std::size_t index = 1;
214  while(true)
215  {
216  const auto &next_token = peek(index);
217  if(
218  is_ws(next_token) || is_preprocessor_directive(next_token) ||
219  is_comment(next_token))
220  index++;
221  else
222  break;
223  }
224 
225  auto &next_token = peek(index);
226  if(!is_identifier(next_token) && next_token != '*')
227  {
228  // 'token' is the declarator
229  return result;
230  }
231  else
232  result.push_back(consume_token()); // it's a type
233  }
234  else if(token == ';')
235  return result;
236  else if(token == '(') // function type, part of declarator
237  return result;
238  else
240  "expected a declaration but got '" + token.text + "'");
241  }
242 }
243 
245 {
246  // symbol
247  // ((...* symbol ...))
248 
249  if(eof())
250  return {};
251 
252  if(peek() == ';')
253  return {};
254 
255  if(peek() == '(')
256  {
257  tokenst result;
258  parse_brackets('(', ')', result);
259  return result;
260  }
261  else if(is_identifier(peek()))
262  {
263  return {consume_token()};
264  }
265  else
266  throw invalid_source_file_exceptiont("expected an identifier");
267 }
268 
270 {
271  // consume everything until we see one of the following:
272  // 1) ';' (end of declaration)
273  // 2) '{' (function body)
274  // 3) '=' (initializer)
275 
276  tokenst result;
277 
278  while(true)
279  {
280  if(eof())
281  return result;
282 
283  if(peek() == ';' || peek() == '{' || peek() == '=')
284  return result;
285 
286  result.push_back(consume_token());
287  }
288 }
289 
291 {
292  if(eof())
293  return {};
294  else if(peek() == '=')
295  {
296  tokenst result;
297  while(true)
298  {
299  if(eof())
300  throw invalid_source_file_exceptiont("expected an initializer");
301  auto &token = consume_token();
302  result.push_back(token);
303  if(token == ';')
304  return result;
305  }
306  }
307  else if(peek() == ';')
308  {
309  // done
310  return {consume_token()};
311  }
312  else if(peek() == '{')
313  {
314  // function body
315  tokenst result;
316  std::size_t bracket_count = 0;
317  while(true)
318  {
319  if(eof())
320  throw invalid_source_file_exceptiont("eof in function body");
321  auto &token = consume_token();
322  result.push_back(token);
323  if(token == '{')
324  bracket_count++;
325  else if(token == '}')
326  {
327  bracket_count--;
328  if(bracket_count == 0)
329  return result;
330  }
331  }
332  }
333  else
334  PRECONDITION(false);
335 }
336 
338 {
339  c_declarationt result;
340 
342  result.declarator = parse_declarator();
344  result.initializer = parse_initializer();
345 
346  return result;
347 }
348 
350 {
351  cscannert cscanner(in);
352  cscanner.return_WS_and_comments = true;
353  tokens = cscanner.get_tokens();
354  token_index = 0;
355 
356  if(tokens.empty())
357  return {};
358 
359  DATA_INVARIANT(is_eof(tokens.back()), "token stream must end on eof");
360 
361  c_translation_unitt result;
362 
363  while(!eof())
364  result.push_back(parse_declaration());
365 
366  return result;
367 }
368 
369 c_translation_unitt parse_c(std::istream &in)
370 {
371  return mini_c_parsert().parse(in);
372 }
bool return_WS_and_comments
Definition: cscanner.h:31
std::vector< ctokent > get_tokens()
Definition: cscanner.cpp:41
Definition: ctoken.h:19
Thrown when we can't handle something in an input source file.
tokenst parse_declarator()
void parse_brackets(char open, char close, tokenst &dest)
c_translation_unitt parse(std::istream &)
void skip_ws(tokenst &)
std::size_t token_index
static bool is_storage_class(const ctokent &token)
tokenst parse_post_declarator()
tokenst parse_pre_declarator()
const ctokent & peek(std::size_t how_many) const
std::vector< ctokent > tokenst
bool eof() const
const ctokent & peek() const
c_declarationt parse_declaration()
tokenst parse_initializer()
const ctokent & consume_token()
static bool is_type_qualifier(const ctokent &token)
cscanner
static bool is_identifier(const ctokent &t)
Definition: ctoken.h:68
static bool is_comment(const ctokent &t)
Definition: ctoken.h:93
static bool is_preprocessor_directive(const ctokent &t)
Definition: ctoken.h:98
static bool is_ws(const ctokent &t)
Definition: ctoken.h:83
static bool is_eof(const ctokent &t)
Definition: ctoken.h:88
std::ostream & operator<<(std::ostream &out, const c_declarationt &declaration)
c_translation_unitt parse_c(std::istream &in)
Mini C Parser.
std::vector< c_declarationt > c_translation_unitt
Definition: mini_c_parser.h:38
nonstd::optional< T > optionalt
Definition: optional.h:35
#define DATA_INVARIANT(CONDITION, REASON)
This condition should be used to document that assumptions that are made on goto_functions,...
Definition: invariant.h:510
#define PRECONDITION(CONDITION)
Definition: invariant.h:463
bool has_body() const
bool is_function() const
void print(std::ostream &) const
tokenst post_declarator
Definition: mini_c_parser.h:29
tokenst initializer
Definition: mini_c_parser.h:30
optionalt< ctokent > declared_identifier() const
tokenst declarator
Definition: mini_c_parser.h:28
tokenst pre_declarator
Definition: mini_c_parser.h:27