cprover
string_constraint_generator_code_points.cpp
Go to the documentation of this file.
1 /*******************************************************************\
2 
3 Module: Generates string constraints for Java functions dealing with
4  code points
5 
6 Author: Romain Brenguier, romain.brenguier@diffblue.com
7 
8 \*******************************************************************/
9 
12 
14 
21  const array_string_exprt &res,
22  const exprt &code_point)
23 {
24  const typet &char_type = res.content().type().subtype();
25  const typet &type=code_point.type();
26  PRECONDITION(type.id()==ID_signedbv);
27 
28  // We add axioms:
29  // a1 : code_point<0x010000 => |res|=1
30  // a2 : code_point>=0x010000 => |res|=2
31  // a3 : code_point<0x010000 => res[0]=code_point
32  // a4 : code_point>=0x010000 => res[0]=0xD800+(code_point-0x10000)/0x0400
33  // a5 : code_point>=0x010000 => res[1]=0xDC00+(code_point-0x10000)/0x0400
34  // For more explenations about this conversion, see:
35  // https://en.wikipedia.org/wiki/UTF-16
36 
37  exprt hex010000=from_integer(0x010000, type);
38  exprt hexD800=from_integer(0xD800, type);
39  exprt hexDC00=from_integer(0xDC00, type);
40  exprt hex0400=from_integer(0x0400, type);
41 
42  binary_relation_exprt small(code_point, ID_lt, hex010000);
43  implies_exprt a1(small, res.axiom_for_has_length(1));
44  lemmas.push_back(a1);
45 
46  implies_exprt a2(not_exprt(small), res.axiom_for_has_length(2));
47  lemmas.push_back(a2);
48 
49  typecast_exprt code_point_as_char(code_point, char_type);
50  implies_exprt a3(small, equal_exprt(res[0], code_point_as_char));
51  lemmas.push_back(a3);
52 
53  plus_exprt first_char(
54  hexD800, div_exprt(minus_exprt(code_point, hex010000), hex0400));
55  implies_exprt a4(
56  not_exprt(small),
57  equal_exprt(res[0], typecast_exprt(first_char, char_type)));
58  lemmas.push_back(a4);
59 
60  plus_exprt second_char(hexDC00, mod_exprt(code_point, hex0400));
61  implies_exprt a5(
62  not_exprt(small),
63  equal_exprt(res[1], typecast_exprt(second_char, char_type)));
64  lemmas.push_back(a5);
65 
67 }
68 
76 {
77  return and_exprt(
78  binary_relation_exprt(chr, ID_ge, constant_char(0xD800, chr.type())),
79  binary_relation_exprt(chr, ID_le, constant_char(0xDBFF, chr.type())));
80 }
81 
89 {
90  return and_exprt(
91  binary_relation_exprt(chr, ID_ge, constant_char(0xDC00, chr.type())),
92  binary_relation_exprt(chr, ID_le, constant_char(0xDFFF, chr.type())));
93 }
94 
104 exprt pair_value(exprt char1, exprt char2, typet return_type)
105 {
106  exprt hex010000=from_integer(0x010000, return_type);
107  exprt hex0800=from_integer(0x0800, return_type);
108  exprt hex0400=from_integer(0x0400, return_type);
109  mult_exprt m1(mod_exprt(char1, hex0800), hex0400);
110  mod_exprt m2(char2, hex0400);
111  plus_exprt pair_value(hex010000, plus_exprt(m1, m2));
112  return pair_value;
113 }
114 
121 {
122  const typet &return_type = f.type();
123  PRECONDITION(return_type.id()==ID_signedbv);
124  PRECONDITION(f.arguments().size() == 2);
125  const array_string_exprt str = get_string_expr(f.arguments()[0]);
126  const exprt &pos = f.arguments()[1];
127 
128  const symbol_exprt result = fresh_symbol("char", return_type);
129  const exprt index1 = from_integer(1, str.length().type());
130  const exprt &char1=str[pos];
131  const exprt &char2 = str[plus_exprt(pos, index1)];
132  const typecast_exprt char1_as_int(char1, return_type);
133  const typecast_exprt char2_as_int(char2, return_type);
134  const exprt pair = pair_value(char1_as_int, char2_as_int, return_type);
135  const exprt is_low = is_low_surrogate(str[plus_exprt(pos, index1)]);
136  const and_exprt return_pair(is_high_surrogate(str[pos]), is_low);
137 
138  lemmas.push_back(implies_exprt(return_pair, equal_exprt(result, pair)));
139  lemmas.push_back(
140  implies_exprt(not_exprt(return_pair), equal_exprt(result, char1_as_int)));
141  return result;
142 }
143 
150 {
152  PRECONDITION(args.size()==2);
153  typet return_type=f.type();
154  PRECONDITION(return_type.id()==ID_signedbv);
155  symbol_exprt result=fresh_symbol("char", return_type);
156  array_string_exprt str = get_string_expr(args[0]);
157 
158  const exprt &char1=
159  str[minus_exprt(args[1], from_integer(2, str.length().type()))];
160  const exprt &char2=
161  str[minus_exprt(args[1], from_integer(1, str.length().type()))];
162  const typecast_exprt char1_as_int(char1, return_type);
163  const typecast_exprt char2_as_int(char2, return_type);
164 
165  const exprt pair = pair_value(char1_as_int, char2_as_int, return_type);
166  const and_exprt return_pair(
167  is_high_surrogate(char1), is_low_surrogate(char2));
168 
169  lemmas.push_back(implies_exprt(return_pair, equal_exprt(result, pair)));
170  lemmas.push_back(
171  implies_exprt(not_exprt(return_pair), equal_exprt(result, char2_as_int)));
172  return result;
173 }
174 
182 {
183  PRECONDITION(f.arguments().size() == 3);
184  const array_string_exprt str = get_string_expr(f.arguments()[0]);
185  const exprt &begin = f.arguments()[1];
186  const exprt &end = f.arguments()[2];
187  const typet &return_type=f.type();
188  const symbol_exprt result = fresh_symbol("code_point_count", return_type);
189  const minus_exprt length(end, begin);
190  const div_exprt minimum(length, from_integer(2, length.type()));
191  lemmas.push_back(binary_relation_exprt(result, ID_le, length));
192  lemmas.push_back(binary_relation_exprt(result, ID_ge, minimum));
193 
194  return result;
195 }
196 
205 {
206  PRECONDITION(f.arguments().size() == 3);
207  const exprt &index = f.arguments()[1];
208  const exprt &offset = f.arguments()[2];
209  const typet &return_type=f.type();
210  const symbol_exprt result = fresh_symbol("offset_by_code_point", return_type);
211 
212  const exprt minimum = plus_exprt(index, offset);
213  const exprt maximum = plus_exprt(minimum, offset);
214  lemmas.push_back(binary_relation_exprt(result, ID_le, maximum));
215  lemmas.push_back(binary_relation_exprt(result, ID_ge, minimum));
216 
217  return result;
218 }
219 
The type of an expression.
Definition: type.h:22
Boolean negation.
Definition: std_expr.h:3228
semantic type conversion
Definition: std_expr.h:2111
Generates string constraints to link results from string functions with their arguments.
exprt add_axioms_for_code_point_at(const function_application_exprt &f)
add axioms corresponding to the String.codePointAt java function
A generic base class for relations, i.e., binary predicates.
Definition: std_expr.h:752
application of (mathematical) function
Definition: std_expr.h:4529
equal_exprt axiom_for_has_length(const exprt &rhs) const
Definition: string_expr.h:125
literalt pos(literalt a)
Definition: literal.h:193
argumentst & arguments()
Definition: std_expr.h:4562
exprt add_axioms_for_code_point_count(const function_application_exprt &f)
Add axioms corresponding the String.codePointCount java function.
exprt add_axioms_for_offset_by_code_point(const function_application_exprt &f)
Add axioms corresponding the String.offsetByCodePointCount java function.
typet & type()
Definition: expr.h:56
exprt minimum(const exprt &a, const exprt &b)
boolean implication
Definition: std_expr.h:2339
equality
Definition: std_expr.h:1354
const irep_idt & id() const
Definition: irep.h:259
exprt pair_value(exprt char1, exprt char2, typet return_type)
the output corresponds to the unicode character given by the pair of characters of inputs assuming it...
division (integer and real)
Definition: std_expr.h:1075
exprt::operandst argumentst
Definition: std_expr.h:4560
boolean AND
Definition: std_expr.h:2255
#define PRECONDITION(CONDITION)
Definition: invariant.h:242
The plus expression.
Definition: std_expr.h:893
static exprt is_low_surrogate(const exprt &chr)
the output is true when the character is a low surrogate for UTF-16 encoding, see https://en...
binary multiplication
Definition: std_expr.h:1017
exprt maximum(const exprt &a, const exprt &b)
static constant_exprt constant_char(int i, const typet &char_type)
generate constant character expression with character type.
Base class for all expressions.
Definition: expr.h:42
exprt add_axioms_for_code_point(const array_string_exprt &res, const exprt &code_point)
add axioms for the conversion of an integer representing a java code point to a utf-16 string ...
exprt add_axioms_for_code_point_before(const function_application_exprt &f)
add axioms corresponding to the String.codePointBefore java function
binary minus
Definition: std_expr.h:959
Expression to hold a symbol (variable)
Definition: std_expr.h:90
const typet & subtype() const
Definition: type.h:33
constant_exprt from_integer(const mp_integer &int_value, const typet &type)
static exprt is_high_surrogate(const exprt &chr)
the output is true when the character is a high surrogate for UTF-16 encoding, see https://en...
bitvector_typet char_type()
Definition: c_types.cpp:114
array_string_exprt get_string_expr(const exprt &expr)
casts an expression to a string expression, or fetches the actual string_exprt in the case of a symbo...
binary modulo
Definition: std_expr.h:1133