cprover
string_refinement.cpp
Go to the documentation of this file.
1/*******************************************************************\
2
3Module: String support via creating string constraints and progressively
4 instantiating the universal constraints as needed.
5 The procedure is described in the PASS paper at HVC'13:
6 "PASS: String Solving with Parameterized Array and Interval Automaton"
7 by Guodong Li and Indradeep Ghosh.
8
9Author: Alberto Griggio, alberto.griggio@gmail.com
10
11\*******************************************************************/
12
19
20#include "string_refinement.h"
21
23#include <stack>
24#include <unordered_set>
25
26#include <util/expr_iterator.h>
27#include <util/expr_util.h>
28#include <util/format_type.h>
29#include <util/magic.h>
30#include <util/range.h>
31#include <util/simplify_expr.h>
32
35#include "string_dependencies.h"
37
39 messaget::mstreamt &stream,
40 const namespacet &ns,
41 const string_constraintt &constraint);
42
44 const namespacet &ns,
45 const exprt &axiom,
46 const symbol_exprt &var,
47 message_handlert &message_handler);
48
65static std::pair<bool, std::vector<exprt>> check_axioms(
66 const string_axiomst &axioms,
68 const std::function<exprt(const exprt &)> &get,
69 messaget::mstreamt &stream,
70 const namespacet &ns,
71 bool use_counter_example,
72 const union_find_replacet &symbol_resolve,
73 const std::unordered_map<string_not_contains_constraintt, symbol_exprt>
74 &not_contain_witnesses);
75
76static void initial_index_set(
77 index_set_pairt &index_set,
78 const namespacet &ns,
79 const string_constraintt &axiom);
80
81static void initial_index_set(
82 index_set_pairt &index_set,
83 const namespacet &ns,
85
86static void initial_index_set(
87 index_set_pairt &index_set,
88 const namespacet &ns,
89 const string_axiomst &axioms);
90
91exprt simplify_sum(const exprt &f);
92
93static void update_index_set(
94 index_set_pairt &index_set,
95 const namespacet &ns,
96 const std::vector<exprt> &current_constraints);
97
98static void update_index_set(
99 index_set_pairt &index_set,
100 const namespacet &ns,
101 const exprt &formula);
102
103static std::vector<exprt> instantiate(
105 const index_set_pairt &index_set,
106 const std::unordered_map<string_not_contains_constraintt, symbol_exprt>
107 &witnesses);
108
110 const std::function<exprt(const exprt &)> &super_get,
111 const namespacet &ns,
112 messaget::mstreamt &stream,
113 const array_string_exprt &arr,
114 const array_poolt &array_pool);
115
117 const index_exprt &index_expr,
118 symbol_generatort &symbol_generator,
119 const bool left_propagate);
120
128template <typename T>
129static std::vector<T>
130fill_in_map_as_vector(const std::map<std::size_t, T> &index_value)
131{
132 std::vector<T> result;
133 if(!index_value.empty())
134 {
135 result.resize(index_value.rbegin()->first + 1);
136 for(auto it = index_value.rbegin(); it != index_value.rend(); ++it)
137 {
138 const std::size_t index = it->first;
139 const T &value = it->second;
140 const auto next = std::next(it);
141 const std::size_t leftmost_index_to_pad =
142 next != index_value.rend() ? next->first + 1 : 0;
143 for(std::size_t j = leftmost_index_to_pad; j <= index; j++)
144 result[j] = value;
145 }
146 }
147 return result;
148}
149
150static bool validate(const string_refinementt::infot &info)
151{
152 PRECONDITION(info.ns);
153 PRECONDITION(info.prop);
154 return true;
155}
156
158 : supert(info),
159 config_(info),
160 loop_bound_(info.refinement_bound),
161 generator(*info.ns)
162{
163}
164
166 : string_refinementt(info, validate(info))
167{
168}
169
171static void
173{
174 std::size_t count = 0;
175 std::size_t count_current = 0;
176 for(const auto &i : index_set.cumulative)
177 {
178 const exprt &s = i.first;
179 stream << "IS(" << format(s) << ")=={" << messaget::eom;
180
181 for(const auto &j : i.second)
182 {
183 const auto it = index_set.current.find(i.first);
184 if(
185 it != index_set.current.end() && it->second.find(j) != it->second.end())
186 {
187 count_current++;
188 stream << "**";
189 }
190 stream << " " << format(j) << ";" << messaget::eom;
191 count++;
192 }
193 stream << "}" << messaget::eom;
194 }
195 stream << count << " elements in index set (" << count_current
196 << " newly added)" << messaget::eom;
197}
198
220static std::vector<exprt> generate_instantiations(
221 const index_set_pairt &index_set,
222 const string_axiomst &axioms,
223 const std::unordered_map<string_not_contains_constraintt, symbol_exprt>
224 &not_contain_witnesses)
225{
226 std::vector<exprt> lemmas;
227 for(const auto &i : index_set.current)
228 {
229 for(const auto &univ_axiom : axioms.universal)
230 {
231 for(const auto &j : i.second)
232 lemmas.push_back(instantiate(univ_axiom, i.first, j));
233 }
234 }
235 for(const auto &nc_axiom : axioms.not_contains)
236 {
237 for(const auto &instance :
238 instantiate(nc_axiom, index_set, not_contain_witnesses))
239 lemmas.push_back(instance);
240 }
241 return lemmas;
242}
243
249 exprt &expr)
250{
251 if(const auto equal_expr = expr_try_dynamic_cast<equal_exprt>(expr))
252 {
253 if(
254 const auto fun_app = expr_try_dynamic_cast<function_application_exprt>(
255 as_const(equal_expr->rhs())))
256 {
257 const auto new_equation =
258 generator.make_array_pointer_association(equal_expr->lhs(), *fun_app);
259 if(new_equation)
260 {
261 expr =
262 equal_exprt{from_integer(true, new_equation->type()), *new_equation};
263 }
264 }
265 }
266}
267
272static exprt
273replace_expr_copy(const union_find_replacet &symbol_resolve, exprt expr)
274{
275 symbol_resolve.replace_expr(expr);
276 return expr;
277}
278
283void string_refinementt::set_to(const exprt &expr, bool value)
284{
285 PRECONDITION(expr.type().id() == ID_bool);
287 if(!value)
288 equations.push_back(not_exprt{expr});
289 else
290 equations.push_back(expr);
291}
292
302 union_find_replacet &symbol_solver,
303 const std::vector<exprt> &equations,
304 const namespacet &ns,
305 messaget::mstreamt &stream)
306{
307 const std::string log_message =
308 "WARNING string_refinement.cpp generate_symbol_resolution_from_equations:";
309 auto equalities = make_range(equations).filter(
310 [&](const exprt &e) { return can_cast_expr<equal_exprt>(e); });
311 for(const exprt &e : equalities)
312 {
313 const equal_exprt &eq = to_equal_expr(e);
314 const exprt &lhs = to_equal_expr(eq).lhs();
315 const exprt &rhs = to_equal_expr(eq).rhs();
316 if(lhs.id() != ID_symbol)
317 {
318 stream << log_message << "non symbol lhs: " << format(lhs)
319 << " with rhs: " << format(rhs) << messaget::eom;
320 continue;
321 }
322
323 if(lhs.type() != rhs.type())
324 {
325 stream << log_message << "non equal types lhs: " << format(lhs)
326 << "\n####################### rhs: " << format(rhs)
327 << messaget::eom;
328 continue;
329 }
330
331 if(is_char_pointer_type(rhs.type()))
332 {
333 symbol_solver.make_union(lhs, simplify_expr(rhs, ns));
334 }
335 else if(rhs.id() == ID_function_application)
336 {
337 // function applications can be ignored because they will be replaced
338 // in the convert_function_application step of dec_solve
339 }
340 else if(
341 lhs.type().id() != ID_pointer && has_char_pointer_subtype(lhs.type(), ns))
342 {
343 if(rhs.type().id() == ID_struct || rhs.type().id() == ID_struct_tag)
344 {
345 const struct_typet &struct_type = to_struct_type(ns.follow(rhs.type()));
346 for(const auto &comp : struct_type.components())
347 {
348 if(is_char_pointer_type(comp.type()))
349 {
350 const member_exprt lhs_data(lhs, comp.get_name(), comp.type());
351 const exprt rhs_data = simplify_expr(
352 member_exprt(rhs, comp.get_name(), comp.type()), ns);
353 symbol_solver.make_union(lhs_data, rhs_data);
354 }
355 }
356 }
357 else
358 {
359 stream << log_message << "non struct with char pointer subexpr "
360 << format(rhs) << "\n * of type " << format(rhs.type())
361 << messaget::eom;
362 }
363 }
364 }
365}
366
373static std::vector<exprt>
375{
376 std::vector<exprt> result;
377 if(lhs.type() == string_typet())
378 result.push_back(lhs);
379 else if(lhs.type().id() == ID_struct || lhs.type().id() == ID_struct_tag)
380 {
381 const struct_typet &struct_type = to_struct_type(ns.follow(lhs.type()));
382 for(const auto &comp : struct_type.components())
383 {
384 const std::vector<exprt> strings_in_comp = extract_strings_from_lhs(
385 member_exprt(lhs, comp.get_name(), comp.type()), ns);
386 result.insert(
387 result.end(), strings_in_comp.begin(), strings_in_comp.end());
388 }
389 }
390 return result;
391}
392
398static std::vector<exprt>
399extract_strings(const exprt &expr, const namespacet &ns)
400{
401 std::vector<exprt> result;
402 for(auto it = expr.depth_begin(); it != expr.depth_end();)
403 {
404 if(it->type() == string_typet() && it->id() != ID_if)
405 {
406 result.push_back(*it);
407 it.next_sibling_or_parent();
408 }
409 else if(it->id() == ID_symbol)
410 {
411 for(const exprt &e : extract_strings_from_lhs(*it, ns))
412 result.push_back(e);
413 it.next_sibling_or_parent();
414 }
415 else
416 ++it;
417 }
418 return result;
419}
420
428 const equal_exprt &eq,
429 union_find_replacet &symbol_resolve,
430 const namespacet &ns)
431{
432 if(eq.rhs().type() == string_typet())
433 {
434 symbol_resolve.make_union(eq.lhs(), simplify_expr(eq.rhs(), ns));
435 }
436 else if(has_subtype(eq.lhs().type(), ID_string, ns))
437 {
438 if(
439 eq.rhs().type().id() == ID_struct ||
440 eq.rhs().type().id() == ID_struct_tag)
441 {
442 const struct_typet &struct_type =
443 to_struct_type(ns.follow(eq.rhs().type()));
444 for(const auto &comp : struct_type.components())
445 {
446 const member_exprt lhs_data(eq.lhs(), comp.get_name(), comp.type());
447 const exprt rhs_data = simplify_expr(
448 member_exprt(eq.rhs(), comp.get_name(), comp.type()), ns);
450 equal_exprt(lhs_data, rhs_data), symbol_resolve, ns);
451 }
452 }
453 }
454}
455
464 const std::vector<equal_exprt> &equations,
465 const namespacet &ns,
466 messaget::mstreamt &stream)
467{
468 const std::string log_message =
469 "WARNING string_refinement.cpp "
470 "string_identifiers_resolution_from_equations:";
471
472 equation_symbol_mappingt equation_map;
473
474 // Indexes of equations that need to be added to the result
475 std::unordered_set<size_t> required_equations;
476 std::stack<size_t> equations_to_treat;
477
478 for(std::size_t i = 0; i < equations.size(); ++i)
479 {
480 const equal_exprt &eq = equations[i];
481 if(eq.rhs().id() == ID_function_application)
482 {
483 if(required_equations.insert(i).second)
484 equations_to_treat.push(i);
485
486 std::vector<exprt> rhs_strings = extract_strings(eq.rhs(), ns);
487 for(const auto &expr : rhs_strings)
488 equation_map.add(i, expr);
489 }
490 else if(
491 eq.lhs().type().id() != ID_pointer &&
492 has_subtype(eq.lhs().type(), ID_string, ns))
493 {
494 std::vector<exprt> lhs_strings = extract_strings_from_lhs(eq.lhs(), ns);
495
496 for(const auto &expr : lhs_strings)
497 equation_map.add(i, expr);
498
499 if(lhs_strings.empty())
500 {
501 stream << log_message << "non struct with string subtype "
502 << format(eq.lhs()) << "\n * of type "
503 << format(eq.lhs().type()) << messaget::eom;
504 }
505
506 for(const exprt &expr : extract_strings(eq.rhs(), ns))
507 equation_map.add(i, expr);
508 }
509 }
510
511 // transitively add all equations which depend on the equations to treat
512 while(!equations_to_treat.empty())
513 {
514 const std::size_t i = equations_to_treat.top();
515 equations_to_treat.pop();
516 for(const exprt &string : equation_map.find_expressions(i))
517 {
518 for(const std::size_t j : equation_map.find_equations(string))
519 {
520 if(required_equations.insert(j).second)
521 equations_to_treat.push(j);
522 }
523 }
524 }
525
526 union_find_replacet result;
527 for(const std::size_t i : required_equations)
528 add_string_equation_to_symbol_resolution(equations[i], result, ns);
529
530 return result;
531}
532
533#ifdef DEBUG
535static void
536output_equations(std::ostream &output, const std::vector<exprt> &equations)
537{
538 for(std::size_t i = 0; i < equations.size(); ++i)
539 output << " [" << i << "] " << format(equations[i]) << std::endl;
540}
541#endif
542
553// NOLINTNEXTLINE
556// NOLINTNEXTLINE
561// NOLINTNEXTLINE
608{
609#ifdef DEBUG
610 log.debug() << "dec_solve: Initial set of equations" << messaget::eom;
611 output_equations(log.debug(), equations);
612#endif
613
614 log.debug() << "dec_solve: Build symbol solver from equations"
615 << messaget::eom;
616 // symbol_resolve is used by get and is kept between calls to dec_solve,
617 // that's why we use a class member here
620#ifdef DEBUG
621 log.debug() << "symbol resolve:" << messaget::eom;
622 for(const auto &pair : symbol_resolve.to_vector())
623 log.debug() << format(pair.first) << " --> " << format(pair.second)
624 << messaget::eom;
625#endif
626
627 const union_find_replacet string_id_symbol_resolve =
629 [&] {
630 std::vector<equal_exprt> equalities;
631 for(const auto &eq : equations)
632 {
633 if(auto equal_expr = expr_try_dynamic_cast<equal_exprt>(eq))
634 equalities.push_back(*equal_expr);
635 }
636 return equalities;
637 }(),
638 ns,
639 log.debug());
640#ifdef DEBUG
641 log.debug() << "symbol resolve string:" << messaget::eom;
642 for(const auto &pair : string_id_symbol_resolve.to_vector())
643 {
644 log.debug() << format(pair.first) << " --> " << format(pair.second)
645 << messaget::eom;
646 }
647#endif
648
649 log.debug() << "dec_solve: Replacing string ids and simplifying arguments"
650 " in function applications"
651 << messaget::eom;
652 for(exprt &expr : equations)
653 {
654 auto it = expr.depth_begin();
655 while(it != expr.depth_end())
656 {
658 {
659 // Simplification is required because the array pool may not realize
660 // that an expression like
661 // `(unsignedbv[16]*)((signedbv[8]*)&constarray[0] + 0)` is the
662 // same pointer as `&constarray[0]
663 simplify(it.mutate(), ns);
664 string_id_symbol_resolve.replace_expr(it.mutate());
665 it.next_sibling_or_parent();
666 }
667 else
668 ++it;
669 }
670 }
671
672 // Constraints start clear at each `dec_solve` call.
673 string_constraintst constraints;
674 for(auto &expr : equations)
676
677#ifdef DEBUG
678 output_equations(log.debug(), equations);
679#endif
680
681 log.debug() << "dec_solve: compute dependency graph and remove function "
682 << "applications captured by the dependencies:" << messaget::eom;
683 std::vector<exprt> local_equations;
684 for(const exprt &eq : equations)
685 {
686 // Ensures that arrays that are equal, are associated to the same nodes
687 // in the graph.
688 const exprt eq_with_char_array_replaced_with_representative_elements =
690 const optionalt<exprt> new_equation = add_node(
692 eq_with_char_array_replaced_with_representative_elements,
695 if(new_equation)
696 local_equations.push_back(*new_equation);
697 else
698 local_equations.push_back(eq);
699 }
700 equations.clear();
701
702#ifdef DEBUG
704#endif
705
706 log.debug() << "dec_solve: add constraints" << messaget::eom;
708
709#ifdef DEBUG
710 output_equations(log.debug(), equations);
711#endif
712
713#ifdef DEBUG
714 log.debug() << "dec_solve: arrays_of_pointers:" << messaget::eom;
716 {
717 log.debug() << " * " << format(pair.first) << "\t--> "
718 << format(pair.second) << " : " << format(pair.second.type())
719 << messaget::eom;
720 }
721#endif
722
723 for(const auto &eq : local_equations)
724 {
725#ifdef DEBUG
726 log.debug() << "dec_solve: set_to " << format(eq) << messaget::eom;
727#endif
728 supert::set_to(eq, true);
729 }
730
732 constraints.universal.begin(),
733 constraints.universal.end(),
734 std::back_inserter(axioms.universal),
735 [&](string_constraintt constraint) {
736 constraint.replace_expr(symbol_resolve);
737 DATA_INVARIANT(
738 is_valid_string_constraint(log.error(), ns, constraint),
739 string_refinement_invariantt(
740 "string constraints satisfy their invariant"));
741 return constraint;
742 });
743
745 constraints.not_contains.begin(),
746 constraints.not_contains.end(),
747 std::back_inserter(axioms.not_contains),
749 replace(symbol_resolve, axiom);
750 return axiom;
751 });
752
753 // Used to store information about witnesses for not_contains constraints
754 std::unordered_map<string_not_contains_constraintt, symbol_exprt>
755 not_contain_witnesses;
756 for(const auto &nc_axiom : axioms.not_contains)
757 {
758 const auto &witness_type = [&] {
759 const auto &rtype = to_array_type(nc_axiom.s0.type());
760 const typet &index_type = rtype.size().type();
762 }();
763 not_contain_witnesses.emplace(
764 nc_axiom, generator.fresh_symbol("not_contains_witness", witness_type));
765 }
766
767 for(const exprt &lemma : constraints.existential)
768 {
770 }
771
772 // All generated strings should have non-negative length
773 for(const auto &pair : generator.array_pool.created_strings())
774 {
775 exprt length = generator.array_pool.get_or_create_length(pair.first);
776 add_lemma(
777 binary_relation_exprt{length, ID_ge, from_integer(0, length.type())});
778 }
779
780 // Initial try without index set
781 const auto get = [this](const exprt &expr) { return this->get(expr); };
783 const decision_proceduret::resultt initial_result = supert::dec_solve();
784 if(initial_result == resultt::D_SATISFIABLE)
785 {
786 bool satisfied;
787 std::vector<exprt> counter_examples;
788 std::tie(satisfied, counter_examples) = check_axioms(
789 axioms,
790 generator,
791 get,
792 log.debug(),
793 ns,
796 not_contain_witnesses);
797 if(satisfied)
798 {
799 log.debug() << "check_SAT: the model is correct" << messaget::eom;
801 }
802 log.debug() << "check_SAT: got SAT but the model is not correct"
803 << messaget::eom;
804 }
805 else
806 {
807 log.debug() << "check_SAT: got UNSAT or ERROR" << messaget::eom;
808 return initial_result;
809 }
810
813 current_constraints.clear();
814 const auto initial_instances =
815 generate_instantiations(index_sets, axioms, not_contain_witnesses);
816 for(const auto &instance : initial_instances)
817 {
819 }
820
821 while((loop_bound_--) > 0)
822 {
824 const decision_proceduret::resultt refined_result = supert::dec_solve();
825
826 if(refined_result == resultt::D_SATISFIABLE)
827 {
828 bool satisfied;
829 std::vector<exprt> counter_examples;
830 std::tie(satisfied, counter_examples) = check_axioms(
831 axioms,
832 generator,
833 get,
834 log.debug(),
835 ns,
838 not_contain_witnesses);
839 if(satisfied)
840 {
841 log.debug() << "check_SAT: the model is correct" << messaget::eom;
843 }
844
845 log.debug()
846 << "check_SAT: got SAT but the model is not correct, refining..."
847 << messaget::eom;
848
849 // Since the model is not correct although we got SAT, we need to refine
850 // the property we are checking by adding more indices to the index set,
851 // and instantiating universal formulas with this indices.
852 // We will then relaunch the solver with these added lemmas.
853 index_sets.current.clear();
855
857
858 if(index_sets.current.empty())
859 {
860 if(axioms.not_contains.empty())
861 {
862 log.error() << "dec_solve: current index set is empty, "
863 << "this should not happen" << messaget::eom;
864 return resultt::D_ERROR;
865 }
866 else
867 {
868 log.debug() << "dec_solve: current index set is empty, "
869 << "adding counter examples" << messaget::eom;
870 for(const auto &counter : counter_examples)
871 add_lemma(counter);
872 }
873 }
874 current_constraints.clear();
875 const auto instances =
876 generate_instantiations(index_sets, axioms, not_contain_witnesses);
877 for(const auto &instance : instances)
878 add_lemma(
880 }
881 else
882 {
883 log.debug() << "check_SAT: default return "
884 << static_cast<int>(refined_result) << messaget::eom;
885 return refined_result;
886 }
887 }
888 log.debug() << "string_refinementt::dec_solve reached the maximum number"
889 << "of steps allowed" << messaget::eom;
890 return resultt::D_ERROR;
891}
897 const exprt &lemma,
898 const bool simplify_lemma)
899{
900 if(!seen_instances.insert(lemma).second)
901 return;
902
903 current_constraints.push_back(lemma);
904
905 exprt simple_lemma = lemma;
906 if(simplify_lemma)
907 {
908 simplify(simple_lemma, ns);
909 }
910
911 if(simple_lemma.is_true())
912 {
913#if 0
914 log.debug() << "string_refinementt::add_lemma : tautology" << messaget::eom;
915#endif
916 return;
917 }
918
919 symbol_resolve.replace_expr(simple_lemma);
920
921 // Replace empty arrays with array_of expression because the solver cannot
922 // handle empty arrays.
923 for(auto it = simple_lemma.depth_begin(); it != simple_lemma.depth_end();)
924 {
925 if(it->id() == ID_array && it->operands().empty())
926 {
927 it.mutate() = array_of_exprt(
930 to_array_type(it->type()));
931 it.next_sibling_or_parent();
932 }
933 else
934 ++it;
935 }
936
937 log.debug() << "adding lemma " << format(simple_lemma) << messaget::eom;
938
939 prop.l_set_to_true(convert(simple_lemma));
940}
941
957 const std::function<exprt(const exprt &)> &super_get,
958 const namespacet &ns,
959 messaget::mstreamt &stream,
960 const array_string_exprt &arr,
961 const array_poolt &array_pool)
962{
963 const auto &size_from_pool = array_pool.get_length_if_exists(arr);
964 exprt size_val;
965 if(size_from_pool.has_value())
966 {
967 const exprt size = size_from_pool.value();
968 size_val = simplify_expr(super_get(size), ns);
969 if(size_val.id() != ID_constant)
970 {
971 stream << "(sr::get_valid_array_size) string of unknown size: "
972 << format(size_val) << messaget::eom;
973 return {};
974 }
975 }
976 else if(to_array_type(arr.type()).size().id() == ID_constant)
977 size_val = simplify_expr(to_array_type(arr.type()).size(), ns);
978 else
979 return {};
980
981 auto n_opt = numeric_cast<std::size_t>(size_val);
982 if(!n_opt)
983 {
984 stream << "(sr::get_valid_array_size) size is not valid" << messaget::eom;
985 return {};
986 }
987
988 return size_val;
989}
990
1001 const std::function<exprt(const exprt &)> &super_get,
1002 const namespacet &ns,
1003 messaget::mstreamt &stream,
1004 const array_string_exprt &arr,
1005 const array_poolt &array_pool)
1006{
1007 const auto size =
1008 get_valid_array_size(super_get, ns, stream, arr, array_pool);
1009 if(!size.has_value())
1010 {
1011 return {};
1012 }
1013
1014 const size_t n = numeric_cast<std::size_t>(size.value()).value();
1015
1017 {
1018 stream << "(sr::get_valid_array_size) long string (size "
1019 << " = " << n << ") " << format(arr) << messaget::eom;
1020 stream << "(sr::get_valid_array_size) consider reducing "
1021 "max-nondet-string-length so "
1022 "that no string exceeds "
1024 << " in length and "
1025 "make sure all functions returning strings are loaded"
1026 << messaget::eom;
1027 stream << "(sr::get_valid_array_size) this can also happen on invalid "
1028 "object access"
1029 << messaget::eom;
1030 return nil_exprt();
1031 }
1032
1033 const exprt arr_val = simplify_expr(super_get(arr), ns);
1034 const typet char_type = to_array_type(arr.type()).subtype();
1035 const typet &index_type = size.value().type();
1036
1037 if(
1038 const auto &array = interval_sparse_arrayt::of_expr(
1040 return array->concretize(n, index_type);
1041 return {};
1042}
1043
1048static std::string string_of_array(const array_exprt &arr)
1049{
1050 if(arr.type().id() != ID_array)
1051 return std::string("");
1052
1053 exprt size_expr = to_array_type(arr.type()).size();
1054 auto n = numeric_cast_v<std::size_t>(to_constant_expr(size_expr));
1055 return utf16_constant_array_to_java(arr, n);
1056}
1057
1067 const std::function<exprt(const exprt &)> &super_get,
1068 const namespacet &ns,
1069 messaget::mstreamt &stream,
1070 const array_string_exprt &arr,
1071 array_poolt &array_pool)
1072{
1073 stream << "- " << format(arr) << ":\n";
1074 stream << std::string(4, ' ') << "- type: " << format(arr.type())
1075 << messaget::eom;
1076 const auto arr_model_opt = get_array(super_get, ns, stream, arr, array_pool);
1077 if(arr_model_opt)
1078 {
1079 stream << std::string(4, ' ') << "- char_array: " << format(*arr_model_opt)
1080 << '\n';
1081 stream << std::string(4, ' ')
1082 << "- type : " << format(arr_model_opt->type()) << messaget::eom;
1083 const exprt simple = simplify_expr(*arr_model_opt, ns);
1084 stream << std::string(4, ' ')
1085 << "- simplified_char_array: " << format(simple) << messaget::eom;
1086 if(
1087 const auto concretized_array = get_array(
1088 super_get, ns, stream, to_array_string_expr(simple), array_pool))
1089 {
1090 stream << std::string(4, ' ')
1091 << "- concretized_char_array: " << format(*concretized_array)
1092 << messaget::eom;
1093
1094 if(
1095 const auto array_expr =
1096 expr_try_dynamic_cast<array_exprt>(*concretized_array))
1097 {
1098 stream << std::string(4, ' ') << "- as_string: \""
1099 << string_of_array(*array_expr) << "\"\n";
1100 }
1101 else
1102 stream << std::string(2, ' ') << "- warning: not an array"
1103 << messaget::eom;
1104 return *concretized_array;
1105 }
1106 return simple;
1107 }
1108 stream << std::string(4, ' ') << "- incomplete model" << messaget::eom;
1109 return arr;
1110}
1111
1115 const string_constraint_generatort &generator,
1116 messaget::mstreamt &stream,
1117 const namespacet &ns,
1118 const std::function<exprt(const exprt &)> &super_get,
1119 const std::vector<symbol_exprt> &symbols,
1120 array_poolt &array_pool)
1121{
1122 stream << "debug_model:" << '\n';
1123 for(const auto &pointer_array : generator.array_pool.get_arrays_of_pointers())
1124 {
1125 const auto arr = pointer_array.second;
1126 const exprt model =
1127 get_char_array_and_concretize(super_get, ns, stream, arr, array_pool);
1128
1129 stream << "- " << format(arr) << ":\n"
1130 << " - pointer: " << format(pointer_array.first) << "\n"
1131 << " - model: " << format(model) << messaget::eom;
1132 }
1133
1134 for(const auto &symbol : symbols)
1135 {
1136 stream << " - " << symbol.get_identifier() << ": "
1137 << format(super_get(symbol)) << '\n';
1138 }
1139 stream << messaget::eom;
1140}
1141
1155 const with_exprt &expr,
1156 const exprt &index,
1157 const bool left_propagate)
1158{
1159 return left_propagate ? interval_sparse_arrayt(expr).to_if_expression(index)
1160 : sparse_arrayt::to_if_expression(expr, index);
1161}
1162
1170 const array_exprt &array_expr,
1171 const exprt &index,
1172 symbol_generatort &symbol_generator)
1173{
1174 const typet &char_type = array_expr.type().element_type();
1175 const exprt default_val = symbol_generator("out_of_bound_access", char_type);
1176 const interval_sparse_arrayt sparse_array(array_expr, default_val);
1177 return sparse_array.to_if_expression(index);
1178}
1179
1181 const if_exprt &if_expr,
1182 const exprt &index,
1183 symbol_generatort &symbol_generator,
1184 const bool left_propagate)
1185{
1186 exprt true_index = index_exprt(if_expr.true_case(), index);
1187 exprt false_index = index_exprt(if_expr.false_case(), index);
1188
1189 // Substitute recursively in branches of conditional expressions
1190 optionalt<exprt> substituted_true_case =
1191 substitute_array_access(true_index, symbol_generator, left_propagate);
1192 optionalt<exprt> substituted_false_case =
1193 substitute_array_access(false_index, symbol_generator, left_propagate);
1194
1195 return if_exprt(
1196 if_expr.cond(),
1197 substituted_true_case ? *substituted_true_case : true_index,
1198 substituted_false_case ? *substituted_false_case : false_index);
1199}
1200
1202 const index_exprt &index_expr,
1203 symbol_generatort &symbol_generator,
1204 const bool left_propagate)
1205{
1206 const exprt &array = index_expr.array();
1207 if(auto array_of = expr_try_dynamic_cast<array_of_exprt>(array))
1208 return array_of->op();
1209 if(auto array_with = expr_try_dynamic_cast<with_exprt>(array))
1211 *array_with, index_expr.index(), left_propagate);
1212 if(auto array_expr = expr_try_dynamic_cast<array_exprt>(array))
1214 *array_expr, index_expr.index(), symbol_generator);
1215 if(auto if_expr = expr_try_dynamic_cast<if_exprt>(array))
1217 *if_expr, index_expr.index(), symbol_generator, left_propagate);
1218
1219 INVARIANT(
1220 array.is_nil() || array.id() == ID_symbol || array.id() == ID_nondet_symbol,
1221 std::string(
1222 "in case the array is unknown, it should be a symbol or nil, id: ") +
1223 id2string(array.id()));
1224 return {};
1225}
1226
1231 exprt &expr,
1232 symbol_generatort &symbol_generator,
1233 const bool left_propagate)
1234{
1235 // Recurse down structure and modify on the way.
1236 for(auto it = expr.depth_begin(), itend = expr.depth_end(); it != itend; ++it)
1237 {
1238 if(const auto index_expr = expr_try_dynamic_cast<index_exprt>(*it))
1239 {
1240 optionalt<exprt> result =
1241 substitute_array_access(*index_expr, symbol_generator, left_propagate);
1242
1243 // Only perform a write when we have something changed.
1244 if(result)
1245 it.mutate() = *result;
1246 }
1247 }
1248}
1249
1270 exprt expr,
1271 symbol_generatort &symbol_generator,
1272 const bool left_propagate)
1273{
1274 substitute_array_access_in_place(expr, symbol_generator, left_propagate);
1275 return expr;
1276}
1277
1289 const string_not_contains_constraintt &constraint,
1290 const symbol_exprt &univ_var,
1291 const std::function<exprt(const exprt &)> &get)
1292{
1293 // If the for all is vacuously true, the negation is false.
1294 const auto lbe = numeric_cast_v<mp_integer>(
1295 to_constant_expr(get(constraint.exists_lower_bound)));
1296 const auto ube = numeric_cast_v<mp_integer>(
1297 to_constant_expr(get(constraint.exists_upper_bound)));
1298 const auto univ_bounds = and_exprt(
1299 binary_relation_exprt(get(constraint.univ_lower_bound), ID_le, univ_var),
1300 binary_relation_exprt(get(constraint.univ_upper_bound), ID_gt, univ_var));
1301
1302 // The negated existential becomes an universal, and this is the unrolling of
1303 // that universal quantifier.
1304 // Ff the upper bound is smaller than the lower bound (specifically, it might
1305 // actually be negative as it is initially unconstrained) then there is
1306 // nothing to do (and the reserve call would fail).
1307 if(ube < lbe)
1308 return and_exprt(univ_bounds, get(constraint.premise));
1309
1310 std::vector<exprt> conjuncts;
1311 conjuncts.reserve(numeric_cast_v<std::size_t>(ube - lbe));
1312 for(mp_integer i = lbe; i < ube; ++i)
1313 {
1314 const constant_exprt i_expr = from_integer(i, univ_var.type());
1315 const exprt s0_char =
1316 get(index_exprt(constraint.s0, plus_exprt(univ_var, i_expr)));
1317 const exprt s1_char = get(index_exprt(constraint.s1, i_expr));
1318 conjuncts.push_back(equal_exprt(s0_char, s1_char));
1319 }
1320 const exprt equal_strings = conjunction(conjuncts);
1321 return and_exprt(univ_bounds, get(constraint.premise), equal_strings);
1322}
1323
1328template <typename T>
1330 messaget::mstreamt &stream,
1331 const T &axiom,
1332 const T &axiom_in_model,
1333 const exprt &negaxiom,
1334 const exprt &with_concretized_arrays)
1335{
1336 stream << std::string(4, ' ') << "- axiom:\n" << std::string(6, ' ');
1337 stream << to_string(axiom);
1338 stream << '\n'
1339 << std::string(4, ' ') << "- axiom_in_model:\n"
1340 << std::string(6, ' ');
1341 stream << to_string(axiom_in_model) << '\n'
1342 << std::string(4, ' ') << "- negated_axiom:\n"
1343 << std::string(6, ' ') << format(negaxiom) << '\n';
1344 stream << std::string(4, ' ') << "- negated_axiom_with_concretized_arrays:\n"
1345 << std::string(6, ' ') << format(with_concretized_arrays) << '\n';
1346}
1347
1349static std::pair<bool, std::vector<exprt>> check_axioms(
1350 const string_axiomst &axioms,
1352 const std::function<exprt(const exprt &)> &get,
1353 messaget::mstreamt &stream,
1354 const namespacet &ns,
1355 bool use_counter_example,
1356 const union_find_replacet &symbol_resolve,
1357 const std::unordered_map<string_not_contains_constraintt, symbol_exprt>
1358 &not_contain_witnesses)
1359{
1360 stream << "string_refinementt::check_axioms:" << messaget::eom;
1361
1362 stream << "symbol_resolve:" << messaget::eom;
1363 auto pairs = symbol_resolve.to_vector();
1364 for(const auto &pair : pairs)
1365 stream << " - " << format(pair.first) << " --> " << format(pair.second)
1366 << messaget::eom;
1367
1368#ifdef DEBUG
1370 generator,
1371 stream,
1372 ns,
1373 get,
1374 generator.fresh_symbol.created_symbols,
1375 generator.array_pool);
1376#endif
1377
1378 // Maps from indexes of violated universal axiom to a witness of violation
1379 std::map<size_t, exprt> violated;
1380
1381 stream << "string_refinement::check_axioms: " << axioms.universal.size()
1382 << " universal axioms:" << messaget::eom;
1383 for(size_t i = 0; i < axioms.universal.size(); i++)
1384 {
1385 const string_constraintt &axiom = axioms.universal[i];
1386 const string_constraintt axiom_in_model(
1387 axiom.univ_var,
1388 get(axiom.lower_bound),
1389 get(axiom.upper_bound),
1390 get(axiom.body));
1391
1392 exprt negaxiom = axiom_in_model.negation();
1393 negaxiom = simplify_expr(negaxiom, ns);
1394
1395 stream << std::string(2, ' ') << i << ".\n";
1396 const exprt with_concretized_arrays =
1397 substitute_array_access(negaxiom, generator.fresh_symbol, true);
1399 stream, axiom, axiom_in_model, negaxiom, with_concretized_arrays);
1400
1401 if(
1402 const auto &witness = find_counter_example(
1403 ns,
1404 with_concretized_arrays,
1405 axiom.univ_var,
1406 stream.message.get_message_handler()))
1407 {
1408 stream << std::string(4, ' ')
1409 << "- violated_for: " << format(axiom.univ_var) << "="
1410 << format(*witness) << messaget::eom;
1411 violated[i] = *witness;
1412 }
1413 else
1414 stream << std::string(4, ' ') << "- correct" << messaget::eom;
1415 }
1416
1417 // Maps from indexes of violated not_contains axiom to a witness of violation
1418 std::map<std::size_t, exprt> violated_not_contains;
1419
1420 stream << "there are " << axioms.not_contains.size() << " not_contains axioms"
1421 << messaget::eom;
1422 for(std::size_t i = 0; i < axioms.not_contains.size(); i++)
1423 {
1424 const string_not_contains_constraintt &nc_axiom = axioms.not_contains[i];
1425 const symbol_exprt univ_var = generator.fresh_symbol(
1426 "not_contains_univ_var", nc_axiom.s0.length_type());
1427 const exprt negated_axiom = negation_of_not_contains_constraint(
1428 nc_axiom, univ_var, [&](const exprt &expr) {
1429 return simplify_expr(get(expr), ns);
1430 });
1431
1432 stream << std::string(2, ' ') << i << ".\n";
1434 stream, nc_axiom, nc_axiom, negated_axiom, negated_axiom);
1435
1436 if(
1437 const auto witness = find_counter_example(
1438 ns, negated_axiom, univ_var, stream.message.get_message_handler()))
1439 {
1440 stream << std::string(4, ' ')
1441 << "- violated_for: " << univ_var.get_identifier() << "="
1442 << format(*witness) << messaget::eom;
1443 violated_not_contains[i] = *witness;
1444 }
1445 }
1446
1447 if(violated.empty() && violated_not_contains.empty())
1448 {
1449 stream << "no violated property" << messaget::eom;
1450 return {true, std::vector<exprt>()};
1451 }
1452 else
1453 {
1454 stream << violated.size() << " universal string axioms can be violated"
1455 << messaget::eom;
1456 stream << violated_not_contains.size()
1457 << " not_contains string axioms can be violated" << messaget::eom;
1458
1459 if(use_counter_example)
1460 {
1461 std::vector<exprt> lemmas;
1462
1463 for(const auto &v : violated)
1464 {
1465 const exprt &val = v.second;
1466 const string_constraintt &axiom = axioms.universal[v.first];
1467
1468 exprt instance(axiom.body);
1469 replace_expr(axiom.univ_var, val, instance);
1470 // We are not sure the index set contains only positive numbers
1471 and_exprt bounds(
1472 axiom.univ_within_bounds(),
1473 binary_relation_exprt(from_integer(0, val.type()), ID_le, val));
1474 replace_expr(axiom.univ_var, val, bounds);
1475 const implies_exprt counter(bounds, instance);
1476 lemmas.push_back(counter);
1477 }
1478
1479 for(const auto &v : violated_not_contains)
1480 {
1481 const exprt &val = v.second;
1482 const string_not_contains_constraintt &axiom =
1483 axioms.not_contains[v.first];
1484
1485 const exprt func_val =
1486 index_exprt(not_contain_witnesses.at(axiom), val);
1487 const exprt comp_val = simplify_sum(plus_exprt(val, func_val));
1488
1489 std::set<std::pair<exprt, exprt>> indices;
1490 indices.insert(std::pair<exprt, exprt>(comp_val, func_val));
1491 const exprt counter =
1492 ::instantiate_not_contains(axiom, indices, not_contain_witnesses)[0];
1493 lemmas.push_back(counter);
1494 }
1495 return {false, lemmas};
1496 }
1497 }
1498 return {false, std::vector<exprt>()};
1499}
1500
1504{
1505 return linear_functiont{f}.to_expr();
1506}
1507
1514 index_set_pairt &index_set,
1515 const namespacet &ns,
1516 const string_axiomst &axioms)
1517{
1518 for(const auto &axiom : axioms.universal)
1519 initial_index_set(index_set, ns, axiom);
1520 for(const auto &axiom : axioms.not_contains)
1521 initial_index_set(index_set, ns, axiom);
1522}
1523
1529 index_set_pairt &index_set,
1530 const namespacet &ns,
1531 const std::vector<exprt> &current_constraints)
1532{
1533 for(const auto &axiom : current_constraints)
1534 update_index_set(index_set, ns, axiom);
1535}
1536
1543static void get_sub_arrays(const exprt &array_expr, std::vector<exprt> &accu)
1544{
1545 if(array_expr.id() == ID_if)
1546 {
1547 get_sub_arrays(to_if_expr(array_expr).true_case(), accu);
1548 get_sub_arrays(to_if_expr(array_expr).false_case(), accu);
1549 }
1550 else
1551 {
1552 if(array_expr.type().id() == ID_array)
1553 {
1554 // TODO: check_that it does not contain any sub_array
1555 accu.push_back(array_expr);
1556 }
1557 else
1558 {
1559 for(const auto &operand : array_expr.operands())
1560 get_sub_arrays(operand, accu);
1561 }
1562 }
1563}
1564
1571 index_set_pairt &index_set,
1572 const namespacet &ns,
1573 const exprt &s,
1574 exprt i)
1575{
1576 simplify(i, ns);
1577 const bool is_size_t = numeric_cast<std::size_t>(i).has_value();
1578 if(i.id() != ID_constant || is_size_t)
1579 {
1580 std::vector<exprt> sub_arrays;
1581 get_sub_arrays(s, sub_arrays);
1582 for(const auto &sub : sub_arrays)
1583 if(index_set.cumulative[sub].insert(i).second)
1584 index_set.current[sub].insert(i);
1585 }
1586}
1587
1604 index_set_pairt &index_set,
1605 const namespacet &ns,
1606 const exprt &qvar,
1607 const exprt &upper_bound,
1608 const exprt &s,
1609 const exprt &i)
1610{
1612 s.id() == ID_symbol || s.id() == ID_nondet_symbol || s.id() == ID_array ||
1613 s.id() == ID_if);
1614 if(s.id() == ID_array)
1615 {
1616 for(std::size_t j = 0; j < s.operands().size(); ++j)
1617 add_to_index_set(index_set, ns, s, from_integer(j, i.type()));
1618 return;
1619 }
1620 if(auto ite = expr_try_dynamic_cast<if_exprt>(s))
1621 {
1622 initial_index_set(index_set, ns, qvar, upper_bound, ite->true_case(), i);
1623 initial_index_set(index_set, ns, qvar, upper_bound, ite->false_case(), i);
1624 return;
1625 }
1626 const minus_exprt u_minus_1(upper_bound, from_integer(1, upper_bound.type()));
1627 exprt i_copy = i;
1628 replace_expr(qvar, u_minus_1, i_copy);
1629 add_to_index_set(index_set, ns, s, i_copy);
1630}
1631
1633 index_set_pairt &index_set,
1634 const namespacet &ns,
1635 const string_constraintt &axiom)
1636{
1637 const symbol_exprt &qvar = axiom.univ_var;
1638 const auto &bound = axiom.upper_bound;
1639 auto it = axiom.body.depth_begin();
1640 const auto end = axiom.body.depth_end();
1641 while(it != end)
1642 {
1643 if(it->id() == ID_index && is_char_type(it->type()))
1644 {
1645 const auto &index_expr = to_index_expr(*it);
1646 const auto &s = index_expr.array();
1647 initial_index_set(index_set, ns, qvar, bound, s, index_expr.index());
1648 it.next_sibling_or_parent();
1649 }
1650 else
1651 ++it;
1652 }
1653}
1654
1656 index_set_pairt &index_set,
1657 const namespacet &ns,
1659{
1660 auto it = axiom.premise.depth_begin();
1661 const auto end = axiom.premise.depth_end();
1662 while(it != end)
1663 {
1664 if(it->id() == ID_index && is_char_type(it->type()))
1665 {
1666 const exprt &s = to_index_expr(*it).array();
1667 const exprt &i = to_index_expr(*it).index();
1668
1669 // cur is of the form s[i] and no quantified variable appears in i
1670 add_to_index_set(index_set, ns, s, i);
1671
1672 it.next_sibling_or_parent();
1673 }
1674 else
1675 ++it;
1676 }
1677
1678 const minus_exprt kminus1(
1680 add_to_index_set(index_set, ns, axiom.s1.content(), kminus1);
1681}
1682
1688 index_set_pairt &index_set,
1689 const namespacet &ns,
1690 const exprt &formula)
1691{
1692 std::list<exprt> to_process;
1693 to_process.push_back(formula);
1694
1695 while(!to_process.empty())
1696 {
1697 exprt cur = to_process.back();
1698 to_process.pop_back();
1699 if(cur.id() == ID_index && is_char_type(cur.type()))
1700 {
1701 const exprt &s = to_index_expr(cur).array();
1702 const exprt &i = to_index_expr(cur).index();
1704 s.type().id() == ID_array,
1705 string_refinement_invariantt("index expressions must index on arrays"));
1706 exprt simplified = simplify_sum(i);
1707 if(s.id() != ID_array) // do not update index set of constant arrays
1708 add_to_index_set(index_set, ns, s, simplified);
1709 }
1710 else
1711 {
1712 forall_operands(it, cur)
1713 to_process.push_back(*it);
1714 }
1715 }
1716}
1717
1736static std::vector<exprt> instantiate(
1738 const index_set_pairt &index_set,
1739 const std::unordered_map<string_not_contains_constraintt, symbol_exprt>
1740 &witnesses)
1741{
1742 const array_string_exprt &s0 = axiom.s0;
1743 const array_string_exprt &s1 = axiom.s1;
1744
1745 const auto &index_set0 = index_set.cumulative.find(s0.content());
1746 const auto &index_set1 = index_set.cumulative.find(s1.content());
1747 const auto &current_index_set0 = index_set.current.find(s0.content());
1748 const auto &current_index_set1 = index_set.current.find(s1.content());
1749
1750 if(
1751 index_set0 != index_set.cumulative.end() &&
1752 index_set1 != index_set.cumulative.end() &&
1753 current_index_set0 != index_set.current.end() &&
1754 current_index_set1 != index_set.current.end())
1755 {
1756 typedef std::pair<exprt, exprt> expr_pairt;
1757 std::set<expr_pairt> index_pairs;
1758
1759 for(const auto &ic0 : current_index_set0->second)
1760 for(const auto &i1 : index_set1->second)
1761 index_pairs.insert(expr_pairt(ic0, i1));
1762 for(const auto &ic1 : current_index_set1->second)
1763 for(const auto &i0 : index_set0->second)
1764 index_pairs.insert(expr_pairt(i0, ic1));
1765
1766 return ::instantiate_not_contains(axiom, index_pairs, witnesses);
1767 }
1768 return {};
1769}
1770
1778exprt substitute_array_lists(exprt expr, size_t string_max_length)
1779{
1780 for(auto &operand : expr.operands())
1781 operand = substitute_array_lists(operand, string_max_length);
1782
1783 if(expr.id() == ID_array_list)
1784 {
1786 expr.operands().size() >= 2,
1787 string_refinement_invariantt("array-lists must have at least two "
1788 "operands"));
1789 const typet &char_type = expr.operands()[1].type();
1791 exprt ret_expr = array_of_exprt(from_integer(0, char_type), arr_type);
1792
1793 for(size_t i = 0; i < expr.operands().size(); i += 2)
1794 {
1795 const exprt &index = expr.operands()[i];
1796 const exprt &value = expr.operands()[i + 1];
1797 const auto index_value = numeric_cast<std::size_t>(index);
1798 if(
1799 !index.is_constant() ||
1800 (index_value && *index_value < string_max_length))
1801 ret_expr = with_exprt(ret_expr, index, value);
1802 }
1803 return ret_expr;
1804 }
1805
1806 return expr;
1807}
1808
1817{
1818 const auto super_get = [this](const exprt &expr) {
1819 return supert::get(expr);
1820 };
1821 exprt ecopy(expr);
1822 (void)symbol_resolve.replace_expr(ecopy);
1823
1824 // Special treatment for index expressions
1825 const auto &index_expr = expr_try_dynamic_cast<index_exprt>(ecopy);
1826 if(index_expr && is_char_type(index_expr->type()))
1827 {
1828 std::reference_wrapper<const exprt> current(index_expr->array());
1829 while(current.get().id() == ID_if)
1830 {
1831 const auto &if_expr = expr_dynamic_cast<if_exprt>(current.get());
1832 const exprt cond = get(if_expr.cond());
1833 if(cond.is_true())
1834 current = std::cref(if_expr.true_case());
1835 else if(cond.is_false())
1836 current = std::cref(if_expr.false_case());
1837 else
1839 }
1840 const auto array = supert::get(current.get());
1841 const auto index = get(index_expr->index());
1842
1843 // If the underlying solver does not know about the existence of an array,
1844 // it can return nil, which cannot be used in the expression returned here.
1845 if(array.is_nil())
1846 return index_exprt(current, index);
1847
1848 const exprt unknown =
1849 from_integer(CHARACTER_FOR_UNKNOWN, index_expr->type());
1850 if(
1851 const auto sparse_array = interval_sparse_arrayt::of_expr(array, unknown))
1852 {
1853 if(const auto index_value = numeric_cast<std::size_t>(index))
1854 return sparse_array->at(*index_value);
1855 return sparse_array->to_if_expression(index);
1856 }
1857
1858 INVARIANT(array.id() == ID_symbol || array.id() == ID_nondet_symbol,
1859 "Apart from symbols, array valuations can be interpreted as "
1860 "sparse arrays. Array model : " + array.pretty());
1861 return index_exprt(array, index);
1862 }
1863
1864 if(is_char_array_type(ecopy.type(), ns))
1865 {
1867
1868 if(
1869 const auto from_dependencies =
1870 dependencies.eval(arr, [&](const exprt &expr) { return get(expr); }))
1871 return *from_dependencies;
1872
1873 if(
1874 const auto arr_model_opt =
1875 get_array(super_get, ns, log.debug(), arr, generator.array_pool))
1876 return *arr_model_opt;
1877
1878 if(
1879 const auto &length_from_pool =
1881 {
1882 const exprt length = super_get(length_from_pool.value());
1883
1884 if(const auto n = numeric_cast<std::size_t>(length))
1885 {
1886 const interval_sparse_arrayt sparse_array(
1888 return sparse_array.concretize(*n, length.type());
1889 }
1890 }
1891 return arr;
1892 }
1893 return supert::get(ecopy);
1894}
1895
1906 const namespacet &ns,
1907 const exprt &axiom,
1908 const symbol_exprt &var,
1909 message_handlert &message_handler)
1910{
1911 satcheck_no_simplifiert sat_check(message_handler);
1912 boolbvt solver(ns, sat_check, message_handler);
1913 solver << axiom;
1914
1916 return solver.get(var);
1917 else
1918 return {};
1919}
1920
1922typedef std::map<exprt, std::vector<exprt>> array_index_mapt;
1923
1926{
1927 array_index_mapt indices;
1928 // clang-format off
1929 std::for_each(
1930 expr.depth_begin(),
1931 expr.depth_end(),
1932 [&](const exprt &expr)
1933 {
1934 const auto index_expr = expr_try_dynamic_cast<const index_exprt>(expr);
1935 if(index_expr)
1936 indices[index_expr->array()].push_back(index_expr->index());
1937 });
1938 // clang-format on
1939 return indices;
1940}
1941
1947static bool
1949{
1950 for(auto it = expr.depth_begin(); it != expr.depth_end();)
1951 {
1952 if(
1953 it->id() != ID_plus && it->id() != ID_minus &&
1954 it->id() != ID_unary_minus && *it != var)
1955 {
1956 if(std::find(it->depth_begin(), it->depth_end(), var) != it->depth_end())
1957 return false;
1958 else
1959 it.next_sibling_or_parent();
1960 }
1961 else
1962 ++it;
1963 }
1964 return true;
1965}
1966
1975{
1976 for(auto it = constr.body.depth_begin(); it != constr.body.depth_end();)
1977 {
1978 if(*it == constr.univ_var)
1979 return false;
1980 if(it->id() == ID_index)
1981 it.next_sibling_or_parent();
1982 else
1983 ++it;
1984 }
1985 return true;
1986}
1987
1995 messaget::mstreamt &stream,
1996 const namespacet &ns,
1997 const string_constraintt &constraint)
1998{
1999 const array_index_mapt body_indices = gather_indices(constraint.body);
2000 // Must validate for each string. Note that we have an invariant that the
2001 // second value in the pair is non-empty.
2002 for(const auto &pair : body_indices)
2003 {
2004 // Condition 1: All indices of the same string must be the of the same form
2005 const exprt rep = pair.second.back();
2006 for(size_t j = 0; j < pair.second.size() - 1; j++)
2007 {
2008 const exprt i = pair.second[j];
2009 const equal_exprt equals(rep, i);
2010 const exprt result = simplify_expr(equals, ns);
2011 if(result.is_false())
2012 {
2013 stream << "Indices not equal: " << to_string(constraint)
2014 << ", str: " << format(pair.first) << messaget::eom;
2015 return false;
2016 }
2017 }
2018
2019 // Condition 2: f must be linear in the quantified variable
2020 if(!is_linear_arithmetic_expr(rep, constraint.univ_var))
2021 {
2022 stream << "f is not linear: " << to_string(constraint)
2023 << ", str: " << format(pair.first) << messaget::eom;
2024 return false;
2025 }
2026
2027 // Condition 3: the quantified variable can only occur in indices in the
2028 // body
2029 if(!universal_only_in_index(constraint))
2030 {
2031 stream << "Universal variable outside of index:" << to_string(constraint)
2032 << messaget::eom;
2033 return false;
2034 }
2035 }
2036
2037 return true;
2038}
static abstract_object_pointert transform(const exprt &expr, const std::vector< abstract_object_pointert > &operands, const abstract_environmentt &environment, const namespacet &ns)
constant_exprt from_integer(const mp_integer &int_value, const typet &type)
Definition: arith_tools.cpp:99
const T & as_const(T &value)
Return a reference to the same object but ensures the type is const.
Definition: as_const.h:14
int8_t s1
Definition: bytecode_info.h:59
bitvector_typet index_type()
Definition: c_types.cpp:22
bitvector_typet char_type()
Definition: c_types.cpp:124
Boolean AND.
Definition: std_expr.h:1974
Array constructor from list of elements.
Definition: std_expr.h:1476
const array_typet & type() const
Definition: std_expr.h:1483
Array constructor from single element.
Definition: std_expr.h:1411
Correspondance between arrays and pointers string representations.
Definition: array_pool.h:42
exprt get_or_create_length(const array_string_exprt &s)
Get the length of an array_string_exprt from the array_pool.
Definition: array_pool.cpp:26
const std::unordered_map< exprt, array_string_exprt, irep_hash > & get_arrays_of_pointers() const
Definition: array_pool.h:50
optionalt< exprt > get_length_if_exists(const array_string_exprt &s) const
As opposed to get_length(), do not create a new symbol if the length of the array_string_exprt does n...
Definition: array_pool.cpp:48
const std::unordered_map< array_string_exprt, exprt, irep_hash > & created_strings() const
Return a map mapping all array_string_exprt of the array_pool to their length.
Definition: array_pool.cpp:179
const typet & length_type() const
Definition: string_expr.h:69
exprt & content()
Definition: string_expr.h:74
Arrays with given size.
Definition: std_types.h:763
const exprt & size() const
Definition: std_types.h:790
const typet & element_type() const
The type of the elements of the array.
Definition: std_types.h:777
const namespacet & ns
Definition: arrays.h:56
messaget log
Definition: arrays.h:57
exprt & lhs()
Definition: std_expr.h:580
exprt & rhs()
Definition: std_expr.h:590
A base class for relations, i.e., binary predicates whose two operands have the same type.
Definition: std_expr.h:674
Definition: boolbv.h:44
void set_to(const exprt &expr, bool value) override
For a Boolean expression expr, add the constraint 'expr' if value is true, otherwise add 'not expr'.
Definition: boolbv.cpp:518
exprt get(const exprt &expr) const override
Return expr with variables replaced by values from satisfying assignment if available.
Definition: boolbv_get.cpp:20
decision_proceduret::resultt dec_solve() override
Run the decision procedure to solve the problem.
A constant literal expression.
Definition: std_expr.h:2807
resultt
Result of running the decision procedure.
Equality.
Definition: std_expr.h:1225
Maps equation to expressions contained in them and conversely expressions to equations that contain t...
void add(const std::size_t i, const exprt &expr)
Record the fact that equation i contains expression expr
std::vector< std::size_t > find_equations(const exprt &expr)
std::vector< exprt > find_expressions(const std::size_t i)
Base class for all expressions.
Definition: expr.h:54
bool is_true() const
Return whether the expression is a constant representing true.
Definition: expr.cpp:33
depth_iteratort depth_end()
Definition: expr.cpp:267
depth_iteratort depth_begin()
Definition: expr.cpp:265
bool is_false() const
Return whether the expression is a constant representing false.
Definition: expr.cpp:42
bool is_constant() const
Return whether the expression is a constant.
Definition: expr.cpp:26
typet & type()
Return the type of the expression.
Definition: expr.h:82
operandst & operands()
Definition: expr.h:92
The trinary if-then-else operator.
Definition: std_expr.h:2226
exprt & cond()
Definition: std_expr.h:2243
exprt & false_case()
Definition: std_expr.h:2263
exprt & true_case()
Definition: std_expr.h:2253
Boolean implication.
Definition: std_expr.h:2037
Array index operator.
Definition: std_expr.h:1328
exprt & index()
Definition: std_expr.h:1363
exprt & array()
Definition: std_expr.h:1353
An expression denoting infinity.
Definition: std_expr.h:2890
Represents arrays by the indexes up to which the value remains the same.
exprt to_if_expression(const exprt &index) const
static optionalt< interval_sparse_arrayt > of_expr(const exprt &expr, const exprt &extra_value)
If the expression is an array_exprt or a with_exprt uses the appropriate constructor,...
array_exprt concretize(std::size_t size, const typet &index_type) const
Convert to an array representation, ignores elements at index >= size.
const irep_idt & id() const
Definition: irep.h:396
bool is_nil() const
Definition: irep.h:376
Canonical representation of linear function, for instance, expression $x + x - y + 5 - 3$ would given...
Extract member of struct or union.
Definition: std_expr.h:2667
messaget & message
Definition: message.h:246
mstreamt & error() const
Definition: message.h:399
mstreamt & debug() const
Definition: message.h:429
message_handlert & get_message_handler()
Definition: message.h:184
static eomt eom
Definition: message.h:297
Binary minus.
Definition: std_expr.h:973
const typet & follow(const typet &) const
Resolve type symbol to the type it points to.
Definition: namespace.cpp:49
A namespacet is essentially one or two symbol tables bound together, to allow for symbol lookups in t...
Definition: namespace.h:91
The NIL expression.
Definition: std_expr.h:2874
Boolean negation.
Definition: std_expr.h:2181
The plus expression Associativity is not specified.
Definition: std_expr.h:914
literalt convert(const exprt &expr) override
Convert a Boolean expression and return the corresponding literal.
void l_set_to_true(literalt a)
Definition: prop.h:50
static exprt to_if_expression(const with_exprt &expr, const exprt &index)
Creates an if_expr corresponding to the result of accessing the array at the given index.
optionalt< exprt > make_array_pointer_association(const exprt &return_code, const function_application_exprt &expr)
Associate array to pointer, and array to length.
static bool is_valid_string_constraint(messaget::mstreamt &stream, const namespacet &ns, const string_constraintt &constraint)
Checks the data invariant for string_constraintt.
exprt univ_within_bounds() const
static array_index_mapt gather_indices(const exprt &expr)
exprt negation() const
static bool universal_only_in_index(const string_constraintt &constr)
The universally quantified variable is only allowed to occur in index expressions in the body of a st...
static bool is_linear_arithmetic_expr(const exprt &expr, const symbol_exprt &var)
std::map< exprt, std::vector< exprt > > array_index_mapt
void output_dot(std::ostream &stream) const
void clean_cache()
Clean the cache used by eval
NODISCARD string_constraintst add_constraints(string_constraint_generatort &generatort)
For all builtin call on which a test (or an unsupported buitin) result depends, add the corresponding...
optionalt< exprt > eval(const array_string_exprt &s, const std::function< exprt(const exprt &)> &get_value) const
Attempt to evaluate the given string from the dependencies and valuation of strings on which it depen...
string_constraint_generatort generator
union_find_replacet symbol_resolve
std::vector< exprt > equations
string_axiomst axioms
string_refinementt(const infot &)
const configt config_
decision_proceduret::resultt dec_solve() override
Main decision procedure of the solver.
std::set< exprt > seen_instances
void set_to(const exprt &expr, bool value) override
Record the constraints to ensure that the expression is true when the boolean is true and false other...
string_dependenciest dependencies
index_set_pairt index_sets
exprt get(const exprt &expr) const override
Evaluates the given expression in the valuation found by string_refinementt::dec_solve.
std::vector< exprt > current_constraints
std::vector< exprt > instantiate_not_contains(const string_not_contains_constraintt &axiom, const std::set< std::pair< exprt, exprt > > &index_pairs, const std::unordered_map< string_not_contains_constraintt, symbol_exprt > &witnesses)
Instantiates a quantified formula representing not_contains by substituting the quantifiers and gener...
void add_lemma(const exprt &lemma, bool simplify_lemma=true)
Add the given lemma to the solver.
String type.
Definition: std_types.h:901
Structure type, corresponds to C style structs.
Definition: std_types.h:231
const componentst & components() const
Definition: std_types.h:147
Expression to hold a symbol (variable)
Definition: std_expr.h:80
const irep_idt & get_identifier() const
Definition: std_expr.h:109
Generation of fresh symbols of a given type.
Definition: array_pool.h:22
const typet & subtype() const
Definition: type.h:156
The type of an expression, extends irept.
Definition: type.h:29
const typet & subtype() const
Definition: type.h:48
Similar interface to union-find for expressions, with a function for replacing sub-expressions by the...
std::vector< std::pair< exprt, exprt > > to_vector() const
exprt make_union(const exprt &a, const exprt &b)
Merge the set containing a and the set containing b.
bool replace_expr(exprt &expr) const
Replace subexpressions of expr by the representative element of the set they belong to.
Operator to update elements in structs and arrays.
Definition: std_expr.h:2312
#define forall_operands(it, expr)
Definition: expr.h:18
Forward depth-first search iterators These iterators' copy operations are expensive,...
bool has_subtype(const typet &type, const std::function< bool(const typet &)> &pred, const namespacet &ns)
returns true if any of the contained types satisfies pred
Definition: expr_util.cpp:152
Deprecated expression utility functions.
static format_containert< T > format(const T &o)
Definition: format.h:37
const std::string & id2string(const irep_idt &d)
Definition: irep.h:47
Magic numbers used throughout the codebase.
const std::size_t MAX_CONCRETE_STRING_SIZE
Definition: magic.h:14
bool can_cast_expr< function_application_exprt >(const exprt &base)
nonstd::optional< T > optionalt
Definition: optional.h:35
Ranges: pair of begin and end iterators, which can be initialized from containers,...
ranget< iteratort > make_range(iteratort begin, iteratort end)
Definition: range.h:524
bool replace_expr(const exprt &what, const exprt &by, exprt &dest)
bool simplify(exprt &expr, const namespacet &ns)
exprt simplify_expr(exprt src, const namespacet &ns)
int solver(std::istream &in)
BigInt mp_integer
Definition: smt_terms.h:12
#define UNREACHABLE
This should be used to mark dead code.
Definition: invariant.h:503
#define DATA_INVARIANT(CONDITION, REASON)
This condition should be used to document that assumptions that are made on goto_functions,...
Definition: invariant.h:510
#define PRECONDITION(CONDITION)
Definition: invariant.h:463
#define INVARIANT(CONDITION, REASON)
This macro uses the wrapper function 'invariant_violated_string'.
Definition: invariant.h:423
exprt conjunction(const exprt::operandst &op)
1) generates a conjunction for two or more operands 2) for one operand, returns the operand 3) return...
Definition: std_expr.cpp:34
bool can_cast_expr< equal_exprt >(const exprt &base)
Definition: std_expr.h:1249
const index_exprt & to_index_expr(const exprt &expr)
Cast an exprt to an index_exprt.
Definition: std_expr.h:1391
const if_exprt & to_if_expr(const exprt &expr)
Cast an exprt to an if_exprt.
Definition: std_expr.h:2291
const constant_exprt & to_constant_expr(const exprt &expr)
Cast an exprt to a constant_exprt.
Definition: std_expr.h:2840
const equal_exprt & to_equal_expr(const exprt &expr)
Cast an exprt to an equal_exprt.
Definition: std_expr.h:1265
const struct_typet & to_struct_type(const typet &type)
Cast a typet to a struct_typet.
Definition: std_types.h:308
const array_typet & to_array_type(const typet &type)
Cast a typet to an array_typet.
Definition: std_types.h:832
#define CHARACTER_FOR_UNKNOWN
Module: String solver Author: Diffblue Ltd.
std::string to_string(const string_not_contains_constraintt &expr)
Used for debug printing.
void merge(string_constraintst &result, string_constraintst other)
Merge two sets of constraints by appending to the first one.
Defines related function for string constraints.
std::vector< exprt > instantiate_not_contains(const string_not_contains_constraintt &axiom, const std::set< std::pair< exprt, exprt > > &index_pairs, const std::unordered_map< string_not_contains_constraintt, symbol_exprt > &witnesses)
optionalt< exprt > add_node(string_dependenciest &dependencies, const exprt &expr, array_poolt &array_pool, symbol_generatort &fresh_symbol)
When a sub-expression of expr is a builtin_function, add a "string_builtin_function" node to the grap...
Keeps track of dependencies between strings.
array_string_exprt & to_array_string_expr(exprt &expr)
Definition: string_expr.h:95
static void initial_index_set(index_set_pairt &index_set, const namespacet &ns, const string_constraintt &axiom)
static std::string string_of_array(const array_exprt &arr)
convert the content of a string to a more readable representation.
static void update_index_set(index_set_pairt &index_set, const namespacet &ns, const std::vector< exprt > &current_constraints)
Add to the index set all the indices that appear in the formulas.
static optionalt< exprt > get_array(const std::function< exprt(const exprt &)> &super_get, const namespacet &ns, messaget::mstreamt &stream, const array_string_exprt &arr, const array_poolt &array_pool)
Get a model of an array and put it in a certain form.
static std::vector< exprt > extract_strings_from_lhs(const exprt &lhs, const namespacet &ns)
This is meant to be used on the lhs of an equation with string subtype.
exprt substitute_array_lists(exprt expr, size_t string_max_length)
Replace array-lists by 'with' expressions.
static std::vector< exprt > extract_strings(const exprt &expr, const namespacet &ns)
static std::pair< bool, std::vector< exprt > > check_axioms(const string_axiomst &axioms, string_constraint_generatort &generator, const std::function< exprt(const exprt &)> &get, messaget::mstreamt &stream, const namespacet &ns, bool use_counter_example, const union_find_replacet &symbol_resolve, const std::unordered_map< string_not_contains_constraintt, symbol_exprt > &not_contain_witnesses)
Check axioms takes the model given by the underlying solver and answers whether it satisfies the stri...
static void add_equations_for_symbol_resolution(union_find_replacet &symbol_solver, const std::vector< exprt > &equations, const namespacet &ns, messaget::mstreamt &stream)
Add association for each char pointer in the equation.
static void make_char_array_pointer_associations(string_constraint_generatort &generator, exprt &expr)
If expr is an equation whose right-hand-side is a associate_array_to_pointer call,...
static void add_string_equation_to_symbol_resolution(const equal_exprt &eq, union_find_replacet &symbol_resolve, const namespacet &ns)
Given an equation on strings, mark these strings as belonging to the same set in the symbol_resolve s...
static bool validate(const string_refinementt::infot &info)
static void add_to_index_set(index_set_pairt &index_set, const namespacet &ns, const exprt &s, exprt i)
Add i to the index set all the indices that appear in the formula.
exprt simplify_sum(const exprt &f)
static optionalt< exprt > get_valid_array_size(const std::function< exprt(const exprt &)> &super_get, const namespacet &ns, messaget::mstreamt &stream, const array_string_exprt &arr, const array_poolt &array_pool)
Get a model of the size of the input string.
union_find_replacet string_identifiers_resolution_from_equations(const std::vector< equal_exprt > &equations, const namespacet &ns, messaget::mstreamt &stream)
Symbol resolution for expressions of type string typet.
static void display_index_set(messaget::mstreamt &stream, const index_set_pairt &index_set)
Write index set to the given stream, use for debugging.
static std::vector< T > fill_in_map_as_vector(const std::map< std::size_t, T > &index_value)
Convert index-value map to a vector of values.
static exprt negation_of_not_contains_constraint(const string_not_contains_constraintt &constraint, const symbol_exprt &univ_var, const std::function< exprt(const exprt &)> &get)
Negates the constraint to be fed to a solver.
static optionalt< exprt > find_counter_example(const namespacet &ns, const exprt &axiom, const symbol_exprt &var, message_handlert &message_handler)
Creates a solver with axiom as the only formula added and runs it.
static exprt replace_expr_copy(const union_find_replacet &symbol_resolve, exprt expr)
Substitute sub-expressions in equation by representative elements of symbol_resolve whenever possible...
static void substitute_array_access_in_place(exprt &expr, symbol_generatort &symbol_generator, const bool left_propagate)
Auxiliary function for substitute_array_access Performs the same operation but modifies the argument ...
static void get_sub_arrays(const exprt &array_expr, std::vector< exprt > &accu)
An expression representing an array of characters can be in the form of an if expression for instance...
void debug_model(const string_constraint_generatort &generator, messaget::mstreamt &stream, const namespacet &ns, const std::function< exprt(const exprt &)> &super_get, const std::vector< symbol_exprt > &symbols, array_poolt &array_pool)
Display part of the current model by mapping the variables created by the solver to constant expressi...
static std::vector< exprt > instantiate(const string_not_contains_constraintt &axiom, const index_set_pairt &index_set, const std::unordered_map< string_not_contains_constraintt, symbol_exprt > &witnesses)
Instantiates a quantified formula representing not_contains by substituting the quantifiers and gener...
static optionalt< exprt > substitute_array_access(const index_exprt &index_expr, symbol_generatort &symbol_generator, const bool left_propagate)
static void debug_check_axioms_step(messaget::mstreamt &stream, const T &axiom, const T &axiom_in_model, const exprt &negaxiom, const exprt &with_concretized_arrays)
Debugging function which outputs the different steps an axiom goes through to be checked in check axi...
static exprt get_char_array_and_concretize(const std::function< exprt(const exprt &)> &super_get, const namespacet &ns, messaget::mstreamt &stream, const array_string_exprt &arr, array_poolt &array_pool)
Debugging function which finds the valuation of the given array in super_get and concretize unknown c...
static std::vector< exprt > generate_instantiations(const index_set_pairt &index_set, const string_axiomst &axioms, const std::unordered_map< string_not_contains_constraintt, symbol_exprt > &not_contain_witnesses)
Instantiation of all constraints.
String support via creating string constraints and progressively instantiating the universal constrai...
#define string_refinement_invariantt(reason)
std::string utf16_constant_array_to_java(const array_exprt &arr, std::size_t length)
Construct a string from a constant array.
bool is_char_type(const typet &type)
For now, any unsigned bitvector type of width smaller or equal to 16 is considered a character.
bool has_char_pointer_subtype(const typet &type, const namespacet &ns)
bool is_char_array_type(const typet &type, const namespacet &ns)
Distinguish char array from other types.
bool is_char_pointer_type(const typet &type)
For now, any unsigned bitvector type is considered a character.
const namespacet * ns
Definition: bv_refinement.h:35
std::map< exprt, std::set< exprt > > current
std::map< exprt, std::set< exprt > > cumulative
std::vector< string_constraintt > universal
std::vector< string_not_contains_constraintt > not_contains
Collection of constraints of different types: existential formulas, universal formulas,...
std::vector< string_not_contains_constraintt > not_contains
std::vector< exprt > existential
std::vector< string_constraintt > universal
Constraints to encode non containement of strings.
string_refinementt constructor arguments