cprover
string_refinement.cpp
Go to the documentation of this file.
1 /*******************************************************************\
2 
3 Module: String support via creating string constraints and progressively
4  instantiating the universal constraints as needed.
5  The procedure is described in the PASS paper at HVC'13:
6  "PASS: String Solving with Parameterized Array and Interval Automaton"
7  by Guodong Li and Indradeep Ghosh.
8 
9 Author: Alberto Griggio, alberto.griggio@gmail.com
10 
11 \*******************************************************************/
12 
19 
20 #include "string_refinement.h"
21 
22 #include <solvers/sat/satcheck.h>
23 #include <stack>
24 #include <unordered_set>
25 
26 #include <util/expr_iterator.h>
27 #include <util/expr_util.h>
28 #include <util/format_type.h>
29 #include <util/magic.h>
30 #include <util/range.h>
31 #include <util/simplify_expr.h>
32 
35 #include "string_dependencies.h"
37 
39  messaget::mstreamt &stream,
40  const namespacet &ns,
41  const string_constraintt &constraint);
42 
44  const namespacet &ns,
45  const exprt &axiom,
46  const symbol_exprt &var,
47  message_handlert &message_handler);
48 
65 static std::pair<bool, std::vector<exprt>> check_axioms(
66  const string_axiomst &axioms,
68  const std::function<exprt(const exprt &)> &get,
69  messaget::mstreamt &stream,
70  const namespacet &ns,
71  bool use_counter_example,
72  const union_find_replacet &symbol_resolve,
73  const std::unordered_map<string_not_contains_constraintt, symbol_exprt>
74  &not_contain_witnesses);
75 
76 static void initial_index_set(
77  index_set_pairt &index_set,
78  const namespacet &ns,
79  const string_constraintt &axiom);
80 
81 static void initial_index_set(
82  index_set_pairt &index_set,
83  const namespacet &ns,
84  const string_not_contains_constraintt &axiom);
85 
86 static void initial_index_set(
87  index_set_pairt &index_set,
88  const namespacet &ns,
89  const string_axiomst &axioms);
90 
91 exprt simplify_sum(const exprt &f);
92 
93 static void update_index_set(
94  index_set_pairt &index_set,
95  const namespacet &ns,
96  const std::vector<exprt> &current_constraints);
97 
98 static void update_index_set(
99  index_set_pairt &index_set,
100  const namespacet &ns,
101  const exprt &formula);
102 
103 static std::vector<exprt> instantiate(
104  const string_not_contains_constraintt &axiom,
105  const index_set_pairt &index_set,
106  const std::unordered_map<string_not_contains_constraintt, symbol_exprt>
107  &witnesses);
108 
110  const std::function<exprt(const exprt &)> &super_get,
111  const namespacet &ns,
112  messaget::mstreamt &stream,
113  const array_string_exprt &arr,
114  const array_poolt &array_pool);
115 
117  const index_exprt &index_expr,
118  symbol_generatort &symbol_generator,
119  const bool left_propagate);
120 
128 template <typename T>
129 static std::vector<T>
130 fill_in_map_as_vector(const std::map<std::size_t, T> &index_value)
131 {
132  std::vector<T> result;
133  if(!index_value.empty())
134  {
135  result.resize(index_value.rbegin()->first + 1);
136  for(auto it = index_value.rbegin(); it != index_value.rend(); ++it)
137  {
138  const std::size_t index = it->first;
139  const T &value = it->second;
140  const auto next = std::next(it);
141  const std::size_t leftmost_index_to_pad =
142  next != index_value.rend() ? next->first + 1 : 0;
143  for(std::size_t j = leftmost_index_to_pad; j <= index; j++)
144  result[j] = value;
145  }
146  }
147  return result;
148 }
149 
150 static bool validate(const string_refinementt::infot &info)
151 {
152  PRECONDITION(info.ns);
153  PRECONDITION(info.prop);
154  return true;
155 }
156 
158  : supert(info),
159  config_(info),
160  loop_bound_(info.refinement_bound),
161  generator(*info.ns)
162 {
163 }
164 
166  : string_refinementt(info, validate(info))
167 {
168 }
169 
171 static void
173 {
174  std::size_t count = 0;
175  std::size_t count_current = 0;
176  for(const auto &i : index_set.cumulative)
177  {
178  const exprt &s = i.first;
179  stream << "IS(" << format(s) << ")=={" << messaget::eom;
180 
181  for(const auto &j : i.second)
182  {
183  const auto it = index_set.current.find(i.first);
184  if(
185  it != index_set.current.end() && it->second.find(j) != it->second.end())
186  {
187  count_current++;
188  stream << "**";
189  }
190  stream << " " << format(j) << ";" << messaget::eom;
191  count++;
192  }
193  stream << "}" << messaget::eom;
194  }
195  stream << count << " elements in index set (" << count_current
196  << " newly added)" << messaget::eom;
197 }
198 
220 static std::vector<exprt> generate_instantiations(
221  const index_set_pairt &index_set,
222  const string_axiomst &axioms,
223  const std::unordered_map<string_not_contains_constraintt, symbol_exprt>
224  &not_contain_witnesses)
225 {
226  std::vector<exprt> lemmas;
227  for(const auto &i : index_set.current)
228  {
229  for(const auto &univ_axiom : axioms.universal)
230  {
231  for(const auto &j : i.second)
232  lemmas.push_back(instantiate(univ_axiom, i.first, j));
233  }
234  }
235  for(const auto &nc_axiom : axioms.not_contains)
236  {
237  for(const auto &instance :
238  instantiate(nc_axiom, index_set, not_contain_witnesses))
239  lemmas.push_back(instance);
240  }
241  return lemmas;
242 }
243 
248  string_constraint_generatort &generator,
249  exprt &expr)
250 {
251  if(const auto equal_expr = expr_try_dynamic_cast<equal_exprt>(expr))
252  {
253  if(
254  const auto fun_app = expr_try_dynamic_cast<function_application_exprt>(
255  as_const(equal_expr->rhs())))
256  {
257  const auto new_equation =
258  generator.make_array_pointer_association(equal_expr->lhs(), *fun_app);
259  if(new_equation)
260  {
261  expr =
262  equal_exprt{from_integer(true, new_equation->type()), *new_equation};
263  }
264  }
265  }
266 }
267 
272 static exprt
273 replace_expr_copy(const union_find_replacet &symbol_resolve, exprt expr)
274 {
275  symbol_resolve.replace_expr(expr);
276  return expr;
277 }
278 
283 void string_refinementt::set_to(const exprt &expr, bool value)
284 {
285  PRECONDITION(expr.type().id() == ID_bool);
287  if(!value)
288  equations.push_back(not_exprt{expr});
289  else
290  equations.push_back(expr);
291 }
292 
302  union_find_replacet &symbol_solver,
303  const std::vector<exprt> &equations,
304  const namespacet &ns,
305  messaget::mstreamt &stream)
306 {
307  const std::string log_message =
308  "WARNING string_refinement.cpp generate_symbol_resolution_from_equations:";
309  auto equalities = make_range(equations).filter(
310  [&](const exprt &e) { return can_cast_expr<equal_exprt>(e); });
311  for(const exprt &e : equalities)
312  {
313  const equal_exprt &eq = to_equal_expr(e);
314  const exprt &lhs = to_equal_expr(eq).lhs();
315  const exprt &rhs = to_equal_expr(eq).rhs();
316  if(lhs.id() != ID_symbol)
317  {
318  stream << log_message << "non symbol lhs: " << format(lhs)
319  << " with rhs: " << format(rhs) << messaget::eom;
320  continue;
321  }
322 
323  if(lhs.type() != rhs.type())
324  {
325  stream << log_message << "non equal types lhs: " << format(lhs)
326  << "\n####################### rhs: " << format(rhs)
327  << messaget::eom;
328  continue;
329  }
330 
331  if(is_char_pointer_type(rhs.type()))
332  {
333  symbol_solver.make_union(lhs, simplify_expr(rhs, ns));
334  }
335  else if(rhs.id() == ID_function_application)
336  {
337  // function applications can be ignored because they will be replaced
338  // in the convert_function_application step of dec_solve
339  }
340  else if(
341  lhs.type().id() != ID_pointer && has_char_pointer_subtype(lhs.type(), ns))
342  {
343  if(rhs.type().id() == ID_struct || rhs.type().id() == ID_struct_tag)
344  {
345  const struct_typet &struct_type = to_struct_type(ns.follow(rhs.type()));
346  for(const auto &comp : struct_type.components())
347  {
348  if(is_char_pointer_type(comp.type()))
349  {
350  const member_exprt lhs_data(lhs, comp.get_name(), comp.type());
351  const exprt rhs_data = simplify_expr(
352  member_exprt(rhs, comp.get_name(), comp.type()), ns);
353  symbol_solver.make_union(lhs_data, rhs_data);
354  }
355  }
356  }
357  else
358  {
359  stream << log_message << "non struct with char pointer subexpr "
360  << format(rhs) << "\n * of type " << format(rhs.type())
361  << messaget::eom;
362  }
363  }
364  }
365 }
366 
373 static std::vector<exprt>
375 {
376  std::vector<exprt> result;
377  if(lhs.type() == string_typet())
378  result.push_back(lhs);
379  else if(lhs.type().id() == ID_struct || lhs.type().id() == ID_struct_tag)
380  {
381  const struct_typet &struct_type = to_struct_type(ns.follow(lhs.type()));
382  for(const auto &comp : struct_type.components())
383  {
384  const std::vector<exprt> strings_in_comp = extract_strings_from_lhs(
385  member_exprt(lhs, comp.get_name(), comp.type()), ns);
386  result.insert(
387  result.end(), strings_in_comp.begin(), strings_in_comp.end());
388  }
389  }
390  return result;
391 }
392 
398 static std::vector<exprt>
399 extract_strings(const exprt &expr, const namespacet &ns)
400 {
401  std::vector<exprt> result;
402  for(auto it = expr.depth_begin(); it != expr.depth_end();)
403  {
404  if(it->type() == string_typet() && it->id() != ID_if)
405  {
406  result.push_back(*it);
407  it.next_sibling_or_parent();
408  }
409  else if(it->id() == ID_symbol)
410  {
411  for(const exprt &e : extract_strings_from_lhs(*it, ns))
412  result.push_back(e);
413  it.next_sibling_or_parent();
414  }
415  else
416  ++it;
417  }
418  return result;
419 }
420 
428  const equal_exprt &eq,
429  union_find_replacet &symbol_resolve,
430  const namespacet &ns)
431 {
432  if(eq.rhs().type() == string_typet())
433  {
434  symbol_resolve.make_union(eq.lhs(), simplify_expr(eq.rhs(), ns));
435  }
436  else if(has_subtype(eq.lhs().type(), ID_string, ns))
437  {
438  if(
439  eq.rhs().type().id() == ID_struct ||
440  eq.rhs().type().id() == ID_struct_tag)
441  {
442  const struct_typet &struct_type =
443  to_struct_type(ns.follow(eq.rhs().type()));
444  for(const auto &comp : struct_type.components())
445  {
446  const member_exprt lhs_data(eq.lhs(), comp.get_name(), comp.type());
447  const exprt rhs_data = simplify_expr(
448  member_exprt(eq.rhs(), comp.get_name(), comp.type()), ns);
450  equal_exprt(lhs_data, rhs_data), symbol_resolve, ns);
451  }
452  }
453  }
454 }
455 
464  const std::vector<equal_exprt> &equations,
465  const namespacet &ns,
466  messaget::mstreamt &stream)
467 {
468  const std::string log_message =
469  "WARNING string_refinement.cpp "
470  "string_identifiers_resolution_from_equations:";
471 
472  equation_symbol_mappingt equation_map;
473 
474  // Indexes of equations that need to be added to the result
475  std::unordered_set<size_t> required_equations;
476  std::stack<size_t> equations_to_treat;
477 
478  for(std::size_t i = 0; i < equations.size(); ++i)
479  {
480  const equal_exprt &eq = equations[i];
481  if(eq.rhs().id() == ID_function_application)
482  {
483  if(required_equations.insert(i).second)
484  equations_to_treat.push(i);
485 
486  std::vector<exprt> rhs_strings = extract_strings(eq.rhs(), ns);
487  for(const auto &expr : rhs_strings)
488  equation_map.add(i, expr);
489  }
490  else if(
491  eq.lhs().type().id() != ID_pointer &&
492  has_subtype(eq.lhs().type(), ID_string, ns))
493  {
494  std::vector<exprt> lhs_strings = extract_strings_from_lhs(eq.lhs(), ns);
495 
496  for(const auto &expr : lhs_strings)
497  equation_map.add(i, expr);
498 
499  if(lhs_strings.empty())
500  {
501  stream << log_message << "non struct with string subtype "
502  << format(eq.lhs()) << "\n * of type "
503  << format(eq.lhs().type()) << messaget::eom;
504  }
505 
506  for(const exprt &expr : extract_strings(eq.rhs(), ns))
507  equation_map.add(i, expr);
508  }
509  }
510 
511  // transitively add all equations which depend on the equations to treat
512  while(!equations_to_treat.empty())
513  {
514  const std::size_t i = equations_to_treat.top();
515  equations_to_treat.pop();
516  for(const exprt &string : equation_map.find_expressions(i))
517  {
518  for(const std::size_t j : equation_map.find_equations(string))
519  {
520  if(required_equations.insert(j).second)
521  equations_to_treat.push(j);
522  }
523  }
524  }
525 
526  union_find_replacet result;
527  for(const std::size_t i : required_equations)
528  add_string_equation_to_symbol_resolution(equations[i], result, ns);
529 
530  return result;
531 }
532 
533 #ifdef DEBUG
535 static void
536 output_equations(std::ostream &output, const std::vector<exprt> &equations)
537 {
538  for(std::size_t i = 0; i < equations.size(); ++i)
539  output << " [" << i << "] " << format(equations[i]) << std::endl;
540 }
541 #endif
542 
553 // NOLINTNEXTLINE
556 // NOLINTNEXTLINE
561 // NOLINTNEXTLINE
608 {
609 #ifdef DEBUG
610  log.debug() << "dec_solve: Initial set of equations" << messaget::eom;
611  output_equations(log.debug(), equations);
612 #endif
613 
614  log.debug() << "dec_solve: Build symbol solver from equations"
615  << messaget::eom;
616  // symbol_resolve is used by get and is kept between calls to dec_solve,
617  // that's why we use a class member here
620 #ifdef DEBUG
621  log.debug() << "symbol resolve:" << messaget::eom;
622  for(const auto &pair : symbol_resolve.to_vector())
623  log.debug() << format(pair.first) << " --> " << format(pair.second)
624  << messaget::eom;
625 #endif
626 
627  const union_find_replacet string_id_symbol_resolve =
629  [&] {
630  std::vector<equal_exprt> equalities;
631  for(const auto &eq : equations)
632  {
633  if(auto equal_expr = expr_try_dynamic_cast<equal_exprt>(eq))
634  equalities.push_back(*equal_expr);
635  }
636  return equalities;
637  }(),
638  ns,
639  log.debug());
640 #ifdef DEBUG
641  log.debug() << "symbol resolve string:" << messaget::eom;
642  for(const auto &pair : string_id_symbol_resolve.to_vector())
643  {
644  log.debug() << format(pair.first) << " --> " << format(pair.second)
645  << messaget::eom;
646  }
647 #endif
648 
649  log.debug() << "dec_solve: Replacing string ids and simplifying arguments"
650  " in function applications"
651  << messaget::eom;
652  for(exprt &expr : equations)
653  {
654  auto it = expr.depth_begin();
655  while(it != expr.depth_end())
656  {
658  {
659  // Simplification is required because the array pool may not realize
660  // that an expression like
661  // `(unsignedbv[16]*)((signedbv[8]*)&constarray[0] + 0)` is the
662  // same pointer as `&constarray[0]
663  simplify(it.mutate(), ns);
664  string_id_symbol_resolve.replace_expr(it.mutate());
665  it.next_sibling_or_parent();
666  }
667  else
668  ++it;
669  }
670  }
671 
672  // Constraints start clear at each `dec_solve` call.
673  string_constraintst constraints;
674  for(auto &expr : equations)
676 
677 #ifdef DEBUG
678  output_equations(log.debug(), equations);
679 #endif
680 
681  log.debug() << "dec_solve: compute dependency graph and remove function "
682  << "applications captured by the dependencies:" << messaget::eom;
683  std::vector<exprt> local_equations;
684  for(const exprt &eq : equations)
685  {
686  // Ensures that arrays that are equal, are associated to the same nodes
687  // in the graph.
688  const exprt eq_with_char_array_replaced_with_representative_elements =
690  const optionalt<exprt> new_equation = add_node(
691  dependencies,
692  eq_with_char_array_replaced_with_representative_elements,
695  if(new_equation)
696  local_equations.push_back(*new_equation);
697  else
698  local_equations.push_back(eq);
699  }
700  equations.clear();
701 
702 #ifdef DEBUG
704 #endif
705 
706  log.debug() << "dec_solve: add constraints" << messaget::eom;
708 
709 #ifdef DEBUG
710  output_equations(log.debug(), equations);
711 #endif
712 
713 #ifdef DEBUG
714  log.debug() << "dec_solve: arrays_of_pointers:" << messaget::eom;
715  for(auto pair : generator.array_pool.get_arrays_of_pointers())
716  {
717  log.debug() << " * " << format(pair.first) << "\t--> "
718  << format(pair.second) << " : " << format(pair.second.type())
719  << messaget::eom;
720  }
721 #endif
722 
723  for(const auto &eq : local_equations)
724  {
725 #ifdef DEBUG
726  log.debug() << "dec_solve: set_to " << format(eq) << messaget::eom;
727 #endif
728  supert::set_to(eq, true);
729  }
730 
732  constraints.universal.begin(),
733  constraints.universal.end(),
734  std::back_inserter(axioms.universal),
735  [&](string_constraintt constraint) {
736  constraint.replace_expr(symbol_resolve);
737  DATA_INVARIANT(
738  is_valid_string_constraint(log.error(), ns, constraint),
739  string_refinement_invariantt(
740  "string constraints satisfy their invariant"));
741  return constraint;
742  });
743 
745  constraints.not_contains.begin(),
746  constraints.not_contains.end(),
747  std::back_inserter(axioms.not_contains),
749  replace(symbol_resolve, axiom);
750  return axiom;
751  });
752 
753  // Used to store information about witnesses for not_contains constraints
754  std::unordered_map<string_not_contains_constraintt, symbol_exprt>
755  not_contain_witnesses;
756  for(const auto &nc_axiom : axioms.not_contains)
757  {
758  const auto &witness_type = [&] {
759  const auto &rtype = to_array_type(nc_axiom.s0.type());
760  const typet &index_type = rtype.size().type();
762  }();
763  not_contain_witnesses.emplace(
764  nc_axiom, generator.fresh_symbol("not_contains_witness", witness_type));
765  }
766 
767  for(const exprt &lemma : constraints.existential)
768  {
770  }
771 
772  // All generated strings should have non-negative length
773  for(const auto &pair : generator.array_pool.created_strings())
774  {
775  exprt length = generator.array_pool.get_or_create_length(pair.first);
776  add_lemma(
777  binary_relation_exprt{length, ID_ge, from_integer(0, length.type())});
778  }
779 
780  // Initial try without index set
781  const auto get = [this](const exprt &expr) { return this->get(expr); };
783  const decision_proceduret::resultt initial_result = supert::dec_solve();
784  if(initial_result == resultt::D_SATISFIABLE)
785  {
786  bool satisfied;
787  std::vector<exprt> counter_examples;
788  std::tie(satisfied, counter_examples) = check_axioms(
789  axioms,
790  generator,
791  get,
792  log.debug(),
793  ns,
796  not_contain_witnesses);
797  if(satisfied)
798  {
799  log.debug() << "check_SAT: the model is correct" << messaget::eom;
800  return resultt::D_SATISFIABLE;
801  }
802  log.debug() << "check_SAT: got SAT but the model is not correct"
803  << messaget::eom;
804  }
805  else
806  {
807  log.debug() << "check_SAT: got UNSAT or ERROR" << messaget::eom;
808  return initial_result;
809  }
810 
813  current_constraints.clear();
814  const auto initial_instances =
815  generate_instantiations(index_sets, axioms, not_contain_witnesses);
816  for(const auto &instance : initial_instances)
817  {
819  }
820 
821  while((loop_bound_--) > 0)
822  {
824  const decision_proceduret::resultt refined_result = supert::dec_solve();
825 
826  if(refined_result == resultt::D_SATISFIABLE)
827  {
828  bool satisfied;
829  std::vector<exprt> counter_examples;
830  std::tie(satisfied, counter_examples) = check_axioms(
831  axioms,
832  generator,
833  get,
834  log.debug(),
835  ns,
838  not_contain_witnesses);
839  if(satisfied)
840  {
841  log.debug() << "check_SAT: the model is correct" << messaget::eom;
842  return resultt::D_SATISFIABLE;
843  }
844 
845  log.debug()
846  << "check_SAT: got SAT but the model is not correct, refining..."
847  << messaget::eom;
848 
849  // Since the model is not correct although we got SAT, we need to refine
850  // the property we are checking by adding more indices to the index set,
851  // and instantiating universal formulas with this indices.
852  // We will then relaunch the solver with these added lemmas.
853  index_sets.current.clear();
855 
857 
858  if(index_sets.current.empty())
859  {
860  if(axioms.not_contains.empty())
861  {
862  log.error() << "dec_solve: current index set is empty, "
863  << "this should not happen" << messaget::eom;
864  return resultt::D_ERROR;
865  }
866  else
867  {
868  log.debug() << "dec_solve: current index set is empty, "
869  << "adding counter examples" << messaget::eom;
870  for(const auto &counter : counter_examples)
871  add_lemma(counter);
872  }
873  }
874  current_constraints.clear();
875  const auto instances =
876  generate_instantiations(index_sets, axioms, not_contain_witnesses);
877  for(const auto &instance : instances)
878  add_lemma(
880  }
881  else
882  {
883  log.debug() << "check_SAT: default return "
884  << static_cast<int>(refined_result) << messaget::eom;
885  return refined_result;
886  }
887  }
888  log.debug() << "string_refinementt::dec_solve reached the maximum number"
889  << "of steps allowed" << messaget::eom;
890  return resultt::D_ERROR;
891 }
897  const exprt &lemma,
898  const bool simplify_lemma)
899 {
900  if(!seen_instances.insert(lemma).second)
901  return;
902 
903  current_constraints.push_back(lemma);
904 
905  exprt simple_lemma = lemma;
906  if(simplify_lemma)
907  {
908  simplify(simple_lemma, ns);
909  }
910 
911  if(simple_lemma.is_true())
912  {
913 #if 0
914  log.debug() << "string_refinementt::add_lemma : tautology" << messaget::eom;
915 #endif
916  return;
917  }
918 
919  symbol_resolve.replace_expr(simple_lemma);
920 
921  // Replace empty arrays with array_of expression because the solver cannot
922  // handle empty arrays.
923  for(auto it = simple_lemma.depth_begin(); it != simple_lemma.depth_end();)
924  {
925  if(it->id() == ID_array && it->operands().empty())
926  {
927  it.mutate() = array_of_exprt(
928  from_integer(
930  to_array_type(it->type()));
931  it.next_sibling_or_parent();
932  }
933  else
934  ++it;
935  }
936 
937  log.debug() << "adding lemma " << format(simple_lemma) << messaget::eom;
938 
939  prop.l_set_to_true(convert(simple_lemma));
940 }
941 
957  const std::function<exprt(const exprt &)> &super_get,
958  const namespacet &ns,
959  messaget::mstreamt &stream,
960  const array_string_exprt &arr,
961  const array_poolt &array_pool)
962 {
963  const auto &size_from_pool = array_pool.get_length_if_exists(arr);
964  exprt size_val;
965  if(size_from_pool.has_value())
966  {
967  const exprt size = size_from_pool.value();
968  size_val = simplify_expr(super_get(size), ns);
969  if(size_val.id() != ID_constant)
970  {
971  stream << "(sr::get_valid_array_size) string of unknown size: "
972  << format(size_val) << messaget::eom;
973  return {};
974  }
975  }
976  else if(to_array_type(arr.type()).size().id() == ID_constant)
977  size_val = simplify_expr(to_array_type(arr.type()).size(), ns);
978  else
979  return {};
980 
981  auto n_opt = numeric_cast<std::size_t>(size_val);
982  if(!n_opt)
983  {
984  stream << "(sr::get_valid_array_size) size is not valid" << messaget::eom;
985  return {};
986  }
987 
988  return size_val;
989 }
990 
1001  const std::function<exprt(const exprt &)> &super_get,
1002  const namespacet &ns,
1003  messaget::mstreamt &stream,
1004  const array_string_exprt &arr,
1005  const array_poolt &array_pool)
1006 {
1007  const auto size =
1008  get_valid_array_size(super_get, ns, stream, arr, array_pool);
1009  if(!size.has_value())
1010  {
1011  return {};
1012  }
1013 
1014  const size_t n = numeric_cast<std::size_t>(size.value()).value();
1015 
1016  if(n > MAX_CONCRETE_STRING_SIZE)
1017  {
1018  stream << "(sr::get_valid_array_size) long string (size "
1019  << " = " << n << ") " << format(arr) << messaget::eom;
1020  stream << "(sr::get_valid_array_size) consider reducing "
1021  "max-nondet-string-length so "
1022  "that no string exceeds "
1024  << " in length and "
1025  "make sure all functions returning strings are loaded"
1026  << messaget::eom;
1027  stream << "(sr::get_valid_array_size) this can also happen on invalid "
1028  "object access"
1029  << messaget::eom;
1030  return nil_exprt();
1031  }
1032 
1033  const exprt arr_val = simplify_expr(super_get(arr), ns);
1034  const typet char_type = to_array_type(arr.type()).subtype();
1035  const typet &index_type = size.value().type();
1036 
1037  if(
1038  const auto &array = interval_sparse_arrayt::of_expr(
1040  return array->concretize(n, index_type);
1041  return {};
1042 }
1043 
1048 static std::string string_of_array(const array_exprt &arr)
1049 {
1050  if(arr.type().id() != ID_array)
1051  return std::string("");
1052 
1053  exprt size_expr = to_array_type(arr.type()).size();
1054  auto n = numeric_cast_v<std::size_t>(to_constant_expr(size_expr));
1055  return utf16_constant_array_to_java(arr, n);
1056 }
1057 
1067  const std::function<exprt(const exprt &)> &super_get,
1068  const namespacet &ns,
1069  messaget::mstreamt &stream,
1070  const array_string_exprt &arr,
1071  array_poolt &array_pool)
1072 {
1073  stream << "- " << format(arr) << ":\n";
1074  stream << std::string(4, ' ') << "- type: " << format(arr.type())
1075  << messaget::eom;
1076  const auto arr_model_opt = get_array(super_get, ns, stream, arr, array_pool);
1077  if(arr_model_opt)
1078  {
1079  stream << std::string(4, ' ') << "- char_array: " << format(*arr_model_opt)
1080  << '\n';
1081  stream << std::string(4, ' ')
1082  << "- type : " << format(arr_model_opt->type()) << messaget::eom;
1083  const exprt simple = simplify_expr(*arr_model_opt, ns);
1084  stream << std::string(4, ' ')
1085  << "- simplified_char_array: " << format(simple) << messaget::eom;
1086  if(
1087  const auto concretized_array = get_array(
1088  super_get, ns, stream, to_array_string_expr(simple), array_pool))
1089  {
1090  stream << std::string(4, ' ')
1091  << "- concretized_char_array: " << format(*concretized_array)
1092  << messaget::eom;
1093 
1094  if(
1095  const auto array_expr =
1096  expr_try_dynamic_cast<array_exprt>(*concretized_array))
1097  {
1098  stream << std::string(4, ' ') << "- as_string: \""
1099  << string_of_array(*array_expr) << "\"\n";
1100  }
1101  else
1102  stream << std::string(2, ' ') << "- warning: not an array"
1103  << messaget::eom;
1104  return *concretized_array;
1105  }
1106  return simple;
1107  }
1108  stream << std::string(4, ' ') << "- incomplete model" << messaget::eom;
1109  return arr;
1110 }
1111 
1115  const string_constraint_generatort &generator,
1116  messaget::mstreamt &stream,
1117  const namespacet &ns,
1118  const std::function<exprt(const exprt &)> &super_get,
1119  const std::vector<symbol_exprt> &symbols,
1120  array_poolt &array_pool)
1121 {
1122  stream << "debug_model:" << '\n';
1123  for(const auto &pointer_array : generator.array_pool.get_arrays_of_pointers())
1124  {
1125  const auto arr = pointer_array.second;
1126  const exprt model =
1127  get_char_array_and_concretize(super_get, ns, stream, arr, array_pool);
1128 
1129  stream << "- " << format(arr) << ":\n"
1130  << " - pointer: " << format(pointer_array.first) << "\n"
1131  << " - model: " << format(model) << messaget::eom;
1132  }
1133 
1134  for(const auto &symbol : symbols)
1135  {
1136  stream << " - " << symbol.get_identifier() << ": "
1137  << format(super_get(symbol)) << '\n';
1138  }
1139  stream << messaget::eom;
1140 }
1141 
1155  const with_exprt &expr,
1156  const exprt &index,
1157  const bool left_propagate)
1158 {
1159  return left_propagate ? interval_sparse_arrayt(expr).to_if_expression(index)
1160  : sparse_arrayt::to_if_expression(expr, index);
1161 }
1162 
1170  const array_exprt &array_expr,
1171  const exprt &index,
1172  symbol_generatort &symbol_generator)
1173 {
1174  const typet &char_type = array_expr.type().element_type();
1175  const exprt default_val = symbol_generator("out_of_bound_access", char_type);
1176  const interval_sparse_arrayt sparse_array(array_expr, default_val);
1177  return sparse_array.to_if_expression(index);
1178 }
1179 
1181  const if_exprt &if_expr,
1182  const exprt &index,
1183  symbol_generatort &symbol_generator,
1184  const bool left_propagate)
1185 {
1186  exprt true_index = index_exprt(if_expr.true_case(), index);
1187  exprt false_index = index_exprt(if_expr.false_case(), index);
1188 
1189  // Substitute recursively in branches of conditional expressions
1190  optionalt<exprt> substituted_true_case =
1191  substitute_array_access(true_index, symbol_generator, left_propagate);
1192  optionalt<exprt> substituted_false_case =
1193  substitute_array_access(false_index, symbol_generator, left_propagate);
1194 
1195  return if_exprt(
1196  if_expr.cond(),
1197  substituted_true_case ? *substituted_true_case : true_index,
1198  substituted_false_case ? *substituted_false_case : false_index);
1199 }
1200 
1202  const index_exprt &index_expr,
1203  symbol_generatort &symbol_generator,
1204  const bool left_propagate)
1205 {
1206  const exprt &array = index_expr.array();
1207  if(auto array_of = expr_try_dynamic_cast<array_of_exprt>(array))
1208  return array_of->op();
1209  if(auto array_with = expr_try_dynamic_cast<with_exprt>(array))
1210  return substitute_array_access(
1211  *array_with, index_expr.index(), left_propagate);
1212  if(auto array_expr = expr_try_dynamic_cast<array_exprt>(array))
1213  return substitute_array_access(
1214  *array_expr, index_expr.index(), symbol_generator);
1215  if(auto if_expr = expr_try_dynamic_cast<if_exprt>(array))
1216  return substitute_array_access(
1217  *if_expr, index_expr.index(), symbol_generator, left_propagate);
1218 
1219  INVARIANT(
1220  array.is_nil() || array.id() == ID_symbol || array.id() == ID_nondet_symbol,
1221  std::string(
1222  "in case the array is unknown, it should be a symbol or nil, id: ") +
1223  id2string(array.id()));
1224  return {};
1225 }
1226 
1231  exprt &expr,
1232  symbol_generatort &symbol_generator,
1233  const bool left_propagate)
1234 {
1235  // Recurse down structure and modify on the way.
1236  for(auto it = expr.depth_begin(), itend = expr.depth_end(); it != itend; ++it)
1237  {
1238  if(const auto index_expr = expr_try_dynamic_cast<index_exprt>(*it))
1239  {
1240  optionalt<exprt> result =
1241  substitute_array_access(*index_expr, symbol_generator, left_propagate);
1242 
1243  // Only perform a write when we have something changed.
1244  if(result)
1245  it.mutate() = *result;
1246  }
1247  }
1248 }
1249 
1270  exprt expr,
1271  symbol_generatort &symbol_generator,
1272  const bool left_propagate)
1273 {
1274  substitute_array_access_in_place(expr, symbol_generator, left_propagate);
1275  return expr;
1276 }
1277 
1289  const string_not_contains_constraintt &constraint,
1290  const symbol_exprt &univ_var,
1291  const std::function<exprt(const exprt &)> &get)
1292 {
1293  // If the for all is vacuously true, the negation is false.
1294  const auto lbe = numeric_cast_v<mp_integer>(
1295  to_constant_expr(get(constraint.exists_lower_bound)));
1296  const auto ube = numeric_cast_v<mp_integer>(
1297  to_constant_expr(get(constraint.exists_upper_bound)));
1298  const auto univ_bounds = and_exprt(
1299  binary_relation_exprt(get(constraint.univ_lower_bound), ID_le, univ_var),
1300  binary_relation_exprt(get(constraint.univ_upper_bound), ID_gt, univ_var));
1301 
1302  // The negated existential becomes an universal, and this is the unrolling of
1303  // that universal quantifier.
1304  // Ff the upper bound is smaller than the lower bound (specifically, it might
1305  // actually be negative as it is initially unconstrained) then there is
1306  // nothing to do (and the reserve call would fail).
1307  if(ube < lbe)
1308  return and_exprt(univ_bounds, get(constraint.premise));
1309 
1310  std::vector<exprt> conjuncts;
1311  conjuncts.reserve(numeric_cast_v<std::size_t>(ube - lbe));
1312  for(mp_integer i = lbe; i < ube; ++i)
1313  {
1314  const constant_exprt i_expr = from_integer(i, univ_var.type());
1315  const exprt s0_char =
1316  get(index_exprt(constraint.s0, plus_exprt(univ_var, i_expr)));
1317  const exprt s1_char = get(index_exprt(constraint.s1, i_expr));
1318  conjuncts.push_back(equal_exprt(s0_char, s1_char));
1319  }
1320  const exprt equal_strings = conjunction(conjuncts);
1321  return and_exprt(univ_bounds, get(constraint.premise), equal_strings);
1322 }
1323 
1328 template <typename T>
1330  messaget::mstreamt &stream,
1331  const T &axiom,
1332  const T &axiom_in_model,
1333  const exprt &negaxiom,
1334  const exprt &with_concretized_arrays)
1335 {
1336  stream << std::string(4, ' ') << "- axiom:\n" << std::string(6, ' ');
1337  stream << to_string(axiom);
1338  stream << '\n'
1339  << std::string(4, ' ') << "- axiom_in_model:\n"
1340  << std::string(6, ' ');
1341  stream << to_string(axiom_in_model) << '\n'
1342  << std::string(4, ' ') << "- negated_axiom:\n"
1343  << std::string(6, ' ') << format(negaxiom) << '\n';
1344  stream << std::string(4, ' ') << "- negated_axiom_with_concretized_arrays:\n"
1345  << std::string(6, ' ') << format(with_concretized_arrays) << '\n';
1346 }
1347 
1349 static std::pair<bool, std::vector<exprt>> check_axioms(
1350  const string_axiomst &axioms,
1351  string_constraint_generatort &generator,
1352  const std::function<exprt(const exprt &)> &get,
1353  messaget::mstreamt &stream,
1354  const namespacet &ns,
1355  bool use_counter_example,
1356  const union_find_replacet &symbol_resolve,
1357  const std::unordered_map<string_not_contains_constraintt, symbol_exprt>
1358  &not_contain_witnesses)
1359 {
1360  stream << "string_refinementt::check_axioms:" << messaget::eom;
1361 
1362  stream << "symbol_resolve:" << messaget::eom;
1363  auto pairs = symbol_resolve.to_vector();
1364  for(const auto &pair : pairs)
1365  stream << " - " << format(pair.first) << " --> " << format(pair.second)
1366  << messaget::eom;
1367 
1368 #ifdef DEBUG
1369  debug_model(
1370  generator,
1371  stream,
1372  ns,
1373  get,
1374  generator.fresh_symbol.created_symbols,
1375  generator.array_pool);
1376 #endif
1377 
1378  // Maps from indexes of violated universal axiom to a witness of violation
1379  std::map<size_t, exprt> violated;
1380 
1381  stream << "string_refinement::check_axioms: " << axioms.universal.size()
1382  << " universal axioms:" << messaget::eom;
1383  for(size_t i = 0; i < axioms.universal.size(); i++)
1384  {
1385  const string_constraintt &axiom = axioms.universal[i];
1386  const string_constraintt axiom_in_model(
1387  axiom.univ_var,
1388  get(axiom.lower_bound),
1389  get(axiom.upper_bound),
1390  get(axiom.body));
1391 
1392  exprt negaxiom = axiom_in_model.negation();
1393  negaxiom = simplify_expr(negaxiom, ns);
1394 
1395  stream << std::string(2, ' ') << i << ".\n";
1396  const exprt with_concretized_arrays =
1397  substitute_array_access(negaxiom, generator.fresh_symbol, true);
1399  stream, axiom, axiom_in_model, negaxiom, with_concretized_arrays);
1400 
1401  if(
1402  const auto &witness = find_counter_example(
1403  ns,
1404  with_concretized_arrays,
1405  axiom.univ_var,
1406  stream.message.get_message_handler()))
1407  {
1408  stream << std::string(4, ' ')
1409  << "- violated_for: " << format(axiom.univ_var) << "="
1410  << format(*witness) << messaget::eom;
1411  violated[i] = *witness;
1412  }
1413  else
1414  stream << std::string(4, ' ') << "- correct" << messaget::eom;
1415  }
1416 
1417  // Maps from indexes of violated not_contains axiom to a witness of violation
1418  std::map<std::size_t, exprt> violated_not_contains;
1419 
1420  stream << "there are " << axioms.not_contains.size() << " not_contains axioms"
1421  << messaget::eom;
1422  for(std::size_t i = 0; i < axioms.not_contains.size(); i++)
1423  {
1424  const string_not_contains_constraintt &nc_axiom = axioms.not_contains[i];
1425  const symbol_exprt univ_var = generator.fresh_symbol(
1426  "not_contains_univ_var", nc_axiom.s0.length_type());
1427  const exprt negated_axiom = negation_of_not_contains_constraint(
1428  nc_axiom, univ_var, [&](const exprt &expr) {
1429  return simplify_expr(get(expr), ns);
1430  });
1431 
1432  stream << std::string(2, ' ') << i << ".\n";
1434  stream, nc_axiom, nc_axiom, negated_axiom, negated_axiom);
1435 
1436  if(
1437  const auto witness = find_counter_example(
1438  ns, negated_axiom, univ_var, stream.message.get_message_handler()))
1439  {
1440  stream << std::string(4, ' ')
1441  << "- violated_for: " << univ_var.get_identifier() << "="
1442  << format(*witness) << messaget::eom;
1443  violated_not_contains[i] = *witness;
1444  }
1445  }
1446 
1447  if(violated.empty() && violated_not_contains.empty())
1448  {
1449  stream << "no violated property" << messaget::eom;
1450  return {true, std::vector<exprt>()};
1451  }
1452  else
1453  {
1454  stream << violated.size() << " universal string axioms can be violated"
1455  << messaget::eom;
1456  stream << violated_not_contains.size()
1457  << " not_contains string axioms can be violated" << messaget::eom;
1458 
1459  if(use_counter_example)
1460  {
1461  std::vector<exprt> lemmas;
1462 
1463  for(const auto &v : violated)
1464  {
1465  const exprt &val = v.second;
1466  const string_constraintt &axiom = axioms.universal[v.first];
1467 
1468  exprt instance(axiom.body);
1469  replace_expr(axiom.univ_var, val, instance);
1470  // We are not sure the index set contains only positive numbers
1471  and_exprt bounds(
1472  axiom.univ_within_bounds(),
1473  binary_relation_exprt(from_integer(0, val.type()), ID_le, val));
1474  replace_expr(axiom.univ_var, val, bounds);
1475  const implies_exprt counter(bounds, instance);
1476  lemmas.push_back(counter);
1477  }
1478 
1479  for(const auto &v : violated_not_contains)
1480  {
1481  const exprt &val = v.second;
1482  const string_not_contains_constraintt &axiom =
1483  axioms.not_contains[v.first];
1484 
1485  const exprt func_val =
1486  index_exprt(not_contain_witnesses.at(axiom), val);
1487  const exprt comp_val = simplify_sum(plus_exprt(val, func_val));
1488 
1489  std::set<std::pair<exprt, exprt>> indices;
1490  indices.insert(std::pair<exprt, exprt>(comp_val, func_val));
1491  const exprt counter =
1492  ::instantiate_not_contains(axiom, indices, not_contain_witnesses)[0];
1493  lemmas.push_back(counter);
1494  }
1495  return {false, lemmas};
1496  }
1497  }
1498  return {false, std::vector<exprt>()};
1499 }
1500 
1504 {
1505  return linear_functiont{f}.to_expr();
1506 }
1507 
1513 static void initial_index_set(
1514  index_set_pairt &index_set,
1515  const namespacet &ns,
1516  const string_axiomst &axioms)
1517 {
1518  for(const auto &axiom : axioms.universal)
1519  initial_index_set(index_set, ns, axiom);
1520  for(const auto &axiom : axioms.not_contains)
1521  initial_index_set(index_set, ns, axiom);
1522 }
1523 
1528 static void update_index_set(
1529  index_set_pairt &index_set,
1530  const namespacet &ns,
1531  const std::vector<exprt> &current_constraints)
1532 {
1533  for(const auto &axiom : current_constraints)
1534  update_index_set(index_set, ns, axiom);
1535 }
1536 
1543 static void get_sub_arrays(const exprt &array_expr, std::vector<exprt> &accu)
1544 {
1545  if(array_expr.id() == ID_if)
1546  {
1547  get_sub_arrays(to_if_expr(array_expr).true_case(), accu);
1548  get_sub_arrays(to_if_expr(array_expr).false_case(), accu);
1549  }
1550  else
1551  {
1552  if(array_expr.type().id() == ID_array)
1553  {
1554  // TODO: check_that it does not contain any sub_array
1555  accu.push_back(array_expr);
1556  }
1557  else
1558  {
1559  for(const auto &operand : array_expr.operands())
1560  get_sub_arrays(operand, accu);
1561  }
1562  }
1563 }
1564 
1570 static void add_to_index_set(
1571  index_set_pairt &index_set,
1572  const namespacet &ns,
1573  const exprt &s,
1574  exprt i)
1575 {
1576  simplify(i, ns);
1577  const bool is_size_t = numeric_cast<std::size_t>(i).has_value();
1578  if(i.id() != ID_constant || is_size_t)
1579  {
1580  std::vector<exprt> sub_arrays;
1581  get_sub_arrays(s, sub_arrays);
1582  for(const auto &sub : sub_arrays)
1583  if(index_set.cumulative[sub].insert(i).second)
1584  index_set.current[sub].insert(i);
1585  }
1586 }
1587 
1603 static void initial_index_set(
1604  index_set_pairt &index_set,
1605  const namespacet &ns,
1606  const exprt &qvar,
1607  const exprt &upper_bound,
1608  const exprt &s,
1609  const exprt &i)
1610 {
1611  PRECONDITION(
1612  s.id() == ID_symbol || s.id() == ID_nondet_symbol || s.id() == ID_array ||
1613  s.id() == ID_if);
1614  if(s.id() == ID_array)
1615  {
1616  for(std::size_t j = 0; j < s.operands().size(); ++j)
1617  add_to_index_set(index_set, ns, s, from_integer(j, i.type()));
1618  return;
1619  }
1620  if(auto ite = expr_try_dynamic_cast<if_exprt>(s))
1621  {
1622  initial_index_set(index_set, ns, qvar, upper_bound, ite->true_case(), i);
1623  initial_index_set(index_set, ns, qvar, upper_bound, ite->false_case(), i);
1624  return;
1625  }
1626  const minus_exprt u_minus_1(upper_bound, from_integer(1, upper_bound.type()));
1627  exprt i_copy = i;
1628  replace_expr(qvar, u_minus_1, i_copy);
1629  add_to_index_set(index_set, ns, s, i_copy);
1630 }
1631 
1632 static void initial_index_set(
1633  index_set_pairt &index_set,
1634  const namespacet &ns,
1635  const string_constraintt &axiom)
1636 {
1637  const symbol_exprt &qvar = axiom.univ_var;
1638  const auto &bound = axiom.upper_bound;
1639  auto it = axiom.body.depth_begin();
1640  const auto end = axiom.body.depth_end();
1641  while(it != end)
1642  {
1643  if(it->id() == ID_index && is_char_type(it->type()))
1644  {
1645  const auto &index_expr = to_index_expr(*it);
1646  const auto &s = index_expr.array();
1647  initial_index_set(index_set, ns, qvar, bound, s, index_expr.index());
1648  it.next_sibling_or_parent();
1649  }
1650  else
1651  ++it;
1652  }
1653 }
1654 
1655 static void initial_index_set(
1656  index_set_pairt &index_set,
1657  const namespacet &ns,
1658  const string_not_contains_constraintt &axiom)
1659 {
1660  auto it = axiom.premise.depth_begin();
1661  const auto end = axiom.premise.depth_end();
1662  while(it != end)
1663  {
1664  if(it->id() == ID_index && is_char_type(it->type()))
1665  {
1666  const exprt &s = to_index_expr(*it).array();
1667  const exprt &i = to_index_expr(*it).index();
1668 
1669  // cur is of the form s[i] and no quantified variable appears in i
1670  add_to_index_set(index_set, ns, s, i);
1671 
1672  it.next_sibling_or_parent();
1673  }
1674  else
1675  ++it;
1676  }
1677 
1678  const minus_exprt kminus1(
1680  add_to_index_set(index_set, ns, axiom.s1.content(), kminus1);
1681 }
1682 
1687 static void update_index_set(
1688  index_set_pairt &index_set,
1689  const namespacet &ns,
1690  const exprt &formula)
1691 {
1692  std::list<exprt> to_process;
1693  to_process.push_back(formula);
1694 
1695  while(!to_process.empty())
1696  {
1697  exprt cur = to_process.back();
1698  to_process.pop_back();
1699  if(cur.id() == ID_index && is_char_type(cur.type()))
1700  {
1701  const exprt &s = to_index_expr(cur).array();
1702  const exprt &i = to_index_expr(cur).index();
1704  s.type().id() == ID_array,
1705  string_refinement_invariantt("index expressions must index on arrays"));
1706  exprt simplified = simplify_sum(i);
1707  if(s.id() != ID_array) // do not update index set of constant arrays
1708  add_to_index_set(index_set, ns, s, simplified);
1709  }
1710  else
1711  {
1712  forall_operands(it, cur)
1713  to_process.push_back(*it);
1714  }
1715  }
1716 }
1717 
1736 static std::vector<exprt> instantiate(
1737  const string_not_contains_constraintt &axiom,
1738  const index_set_pairt &index_set,
1739  const std::unordered_map<string_not_contains_constraintt, symbol_exprt>
1740  &witnesses)
1741 {
1742  const array_string_exprt &s0 = axiom.s0;
1743  const array_string_exprt &s1 = axiom.s1;
1744 
1745  const auto &index_set0 = index_set.cumulative.find(s0.content());
1746  const auto &index_set1 = index_set.cumulative.find(s1.content());
1747  const auto &current_index_set0 = index_set.current.find(s0.content());
1748  const auto &current_index_set1 = index_set.current.find(s1.content());
1749 
1750  if(
1751  index_set0 != index_set.cumulative.end() &&
1752  index_set1 != index_set.cumulative.end() &&
1753  current_index_set0 != index_set.current.end() &&
1754  current_index_set1 != index_set.current.end())
1755  {
1756  typedef std::pair<exprt, exprt> expr_pairt;
1757  std::set<expr_pairt> index_pairs;
1758 
1759  for(const auto &ic0 : current_index_set0->second)
1760  for(const auto &i1 : index_set1->second)
1761  index_pairs.insert(expr_pairt(ic0, i1));
1762  for(const auto &ic1 : current_index_set1->second)
1763  for(const auto &i0 : index_set0->second)
1764  index_pairs.insert(expr_pairt(i0, ic1));
1765 
1766  return ::instantiate_not_contains(axiom, index_pairs, witnesses);
1767  }
1768  return {};
1769 }
1770 
1778 exprt substitute_array_lists(exprt expr, size_t string_max_length)
1779 {
1780  for(auto &operand : expr.operands())
1781  operand = substitute_array_lists(operand, string_max_length);
1782 
1783  if(expr.id() == ID_array_list)
1784  {
1786  expr.operands().size() >= 2,
1787  string_refinement_invariantt("array-lists must have at least two "
1788  "operands"));
1789  const typet &char_type = expr.operands()[1].type();
1791  exprt ret_expr = array_of_exprt(from_integer(0, char_type), arr_type);
1792 
1793  for(size_t i = 0; i < expr.operands().size(); i += 2)
1794  {
1795  const exprt &index = expr.operands()[i];
1796  const exprt &value = expr.operands()[i + 1];
1797  const auto index_value = numeric_cast<std::size_t>(index);
1798  if(
1799  !index.is_constant() ||
1800  (index_value && *index_value < string_max_length))
1801  ret_expr = with_exprt(ret_expr, index, value);
1802  }
1803  return ret_expr;
1804  }
1805 
1806  return expr;
1807 }
1808 
1817 {
1818  const auto super_get = [this](const exprt &expr) {
1819  return supert::get(expr);
1820  };
1821  exprt ecopy(expr);
1822  (void)symbol_resolve.replace_expr(ecopy);
1823 
1824  // Special treatment for index expressions
1825  const auto &index_expr = expr_try_dynamic_cast<index_exprt>(ecopy);
1826  if(index_expr && is_char_type(index_expr->type()))
1827  {
1828  std::reference_wrapper<const exprt> current(index_expr->array());
1829  while(current.get().id() == ID_if)
1830  {
1831  const auto &if_expr = expr_dynamic_cast<if_exprt>(current.get());
1832  const exprt cond = get(if_expr.cond());
1833  if(cond.is_true())
1834  current = std::cref(if_expr.true_case());
1835  else if(cond.is_false())
1836  current = std::cref(if_expr.false_case());
1837  else
1838  UNREACHABLE;
1839  }
1840  const auto array = supert::get(current.get());
1841  const auto index = get(index_expr->index());
1842 
1843  // If the underlying solver does not know about the existence of an array,
1844  // it can return nil, which cannot be used in the expression returned here.
1845  if(array.is_nil())
1846  return index_exprt(current, index);
1847 
1848  const exprt unknown =
1849  from_integer(CHARACTER_FOR_UNKNOWN, index_expr->type());
1850  if(
1851  const auto sparse_array = interval_sparse_arrayt::of_expr(array, unknown))
1852  {
1853  if(const auto index_value = numeric_cast<std::size_t>(index))
1854  return sparse_array->at(*index_value);
1855  return sparse_array->to_if_expression(index);
1856  }
1857 
1858  INVARIANT(array.id() == ID_symbol || array.id() == ID_nondet_symbol,
1859  "Apart from symbols, array valuations can be interpreted as "
1860  "sparse arrays. Array model : " + array.pretty());
1861  return index_exprt(array, index);
1862  }
1863 
1864  if(is_char_array_type(ecopy.type(), ns))
1865  {
1867 
1868  if(
1869  const auto from_dependencies =
1870  dependencies.eval(arr, [&](const exprt &expr) { return get(expr); }))
1871  return *from_dependencies;
1872 
1873  if(
1874  const auto arr_model_opt =
1875  get_array(super_get, ns, log.debug(), arr, generator.array_pool))
1876  return *arr_model_opt;
1877 
1878  if(
1879  const auto &length_from_pool =
1881  {
1882  const exprt length = super_get(length_from_pool.value());
1883 
1884  if(const auto n = numeric_cast<std::size_t>(length))
1885  {
1886  const interval_sparse_arrayt sparse_array(
1888  return sparse_array.concretize(*n, length.type());
1889  }
1890  }
1891  return arr;
1892  }
1893  return supert::get(ecopy);
1894 }
1895 
1906  const namespacet &ns,
1907  const exprt &axiom,
1908  const symbol_exprt &var,
1909  message_handlert &message_handler)
1910 {
1911  satcheck_no_simplifiert sat_check(message_handler);
1912  boolbvt solver(ns, sat_check, message_handler);
1913  solver << axiom;
1914 
1916  return solver.get(var);
1917  else
1918  return {};
1919 }
1920 
1922 typedef std::map<exprt, std::vector<exprt>> array_index_mapt;
1923 
1926 {
1927  array_index_mapt indices;
1928  // clang-format off
1929  std::for_each(
1930  expr.depth_begin(),
1931  expr.depth_end(),
1932  [&](const exprt &expr)
1933  {
1934  const auto index_expr = expr_try_dynamic_cast<const index_exprt>(expr);
1935  if(index_expr)
1936  indices[index_expr->array()].push_back(index_expr->index());
1937  });
1938  // clang-format on
1939  return indices;
1940 }
1941 
1947 static bool
1949 {
1950  for(auto it = expr.depth_begin(); it != expr.depth_end();)
1951  {
1952  if(
1953  it->id() != ID_plus && it->id() != ID_minus &&
1954  it->id() != ID_unary_minus && *it != var)
1955  {
1956  if(std::find(it->depth_begin(), it->depth_end(), var) != it->depth_end())
1957  return false;
1958  else
1959  it.next_sibling_or_parent();
1960  }
1961  else
1962  ++it;
1963  }
1964  return true;
1965 }
1966 
1975 {
1976  for(auto it = constr.body.depth_begin(); it != constr.body.depth_end();)
1977  {
1978  if(*it == constr.univ_var)
1979  return false;
1980  if(it->id() == ID_index)
1981  it.next_sibling_or_parent();
1982  else
1983  ++it;
1984  }
1985  return true;
1986 }
1987 
1995  messaget::mstreamt &stream,
1996  const namespacet &ns,
1997  const string_constraintt &constraint)
1998 {
1999  const array_index_mapt body_indices = gather_indices(constraint.body);
2000  // Must validate for each string. Note that we have an invariant that the
2001  // second value in the pair is non-empty.
2002  for(const auto &pair : body_indices)
2003  {
2004  // Condition 1: All indices of the same string must be the of the same form
2005  const exprt rep = pair.second.back();
2006  for(size_t j = 0; j < pair.second.size() - 1; j++)
2007  {
2008  const exprt i = pair.second[j];
2009  const equal_exprt equals(rep, i);
2010  const exprt result = simplify_expr(equals, ns);
2011  if(result.is_false())
2012  {
2013  stream << "Indices not equal: " << to_string(constraint)
2014  << ", str: " << format(pair.first) << messaget::eom;
2015  return false;
2016  }
2017  }
2018 
2019  // Condition 2: f must be linear in the quantified variable
2020  if(!is_linear_arithmetic_expr(rep, constraint.univ_var))
2021  {
2022  stream << "f is not linear: " << to_string(constraint)
2023  << ", str: " << format(pair.first) << messaget::eom;
2024  return false;
2025  }
2026 
2027  // Condition 3: the quantified variable can only occur in indices in the
2028  // body
2029  if(!universal_only_in_index(constraint))
2030  {
2031  stream << "Universal variable outside of index:" << to_string(constraint)
2032  << messaget::eom;
2033  return false;
2034  }
2035  }
2036 
2037  return true;
2038 }
static abstract_object_pointert transform(const exprt &expr, const std::vector< abstract_object_pointert > &operands, const abstract_environmentt &environment, const namespacet &ns)
constant_exprt from_integer(const mp_integer &int_value, const typet &type)
Definition: arith_tools.cpp:99
const T & as_const(T &value)
Return a reference to the same object but ensures the type is const.
Definition: as_const.h:14
int8_t s1
Definition: bytecode_info.h:59
bitvector_typet index_type()
Definition: c_types.cpp:22
bitvector_typet char_type()
Definition: c_types.cpp:124
Boolean AND.
Definition: std_expr.h:1974
Array constructor from list of elements.
Definition: std_expr.h:1476
const array_typet & type() const
Definition: std_expr.h:1483
Array constructor from single element.
Definition: std_expr.h:1411
Correspondance between arrays and pointers string representations.
Definition: array_pool.h:42
const std::unordered_map< exprt, array_string_exprt, irep_hash > & get_arrays_of_pointers() const
Definition: array_pool.h:50
exprt get_or_create_length(const array_string_exprt &s)
Get the length of an array_string_exprt from the array_pool.
Definition: array_pool.cpp:26
optionalt< exprt > get_length_if_exists(const array_string_exprt &s) const
As opposed to get_length(), do not create a new symbol if the length of the array_string_exprt does n...
Definition: array_pool.cpp:48
const std::unordered_map< array_string_exprt, exprt, irep_hash > & created_strings() const
Return a map mapping all array_string_exprt of the array_pool to their length.
Definition: array_pool.cpp:179
const typet & length_type() const
Definition: string_expr.h:69
exprt & content()
Definition: string_expr.h:74
Arrays with given size.
Definition: std_types.h:763
const typet & element_type() const
The type of the elements of the array.
Definition: std_types.h:777
const exprt & size() const
Definition: std_types.h:790
const namespacet & ns
Definition: arrays.h:56
messaget log
Definition: arrays.h:57
exprt & lhs()
Definition: std_expr.h:580
exprt & rhs()
Definition: std_expr.h:590
A base class for relations, i.e., binary predicates whose two operands have the same type.
Definition: std_expr.h:674
Definition: boolbv.h:44
void set_to(const exprt &expr, bool value) override
For a Boolean expression expr, add the constraint 'expr' if value is true, otherwise add 'not expr'.
Definition: boolbv.cpp:518
exprt get(const exprt &expr) const override
Return expr with variables replaced by values from satisfying assignment if available.
Definition: boolbv_get.cpp:20
decision_proceduret::resultt dec_solve() override
Run the decision procedure to solve the problem.
A constant literal expression.
Definition: std_expr.h:2807
resultt
Result of running the decision procedure.
Equality.
Definition: std_expr.h:1225
Maps equation to expressions contained in them and conversely expressions to equations that contain t...
void add(const std::size_t i, const exprt &expr)
Record the fact that equation i contains expression expr
std::vector< std::size_t > find_equations(const exprt &expr)
std::vector< exprt > find_expressions(const std::size_t i)
Base class for all expressions.
Definition: expr.h:54
bool is_true() const
Return whether the expression is a constant representing true.
Definition: expr.cpp:33
depth_iteratort depth_end()
Definition: expr.cpp:267
depth_iteratort depth_begin()
Definition: expr.cpp:265
bool is_false() const
Return whether the expression is a constant representing false.
Definition: expr.cpp:42
typet & type()
Return the type of the expression.
Definition: expr.h:82
bool is_constant() const
Return whether the expression is a constant.
Definition: expr.cpp:26
operandst & operands()
Definition: expr.h:92
The trinary if-then-else operator.
Definition: std_expr.h:2226
exprt & true_case()
Definition: std_expr.h:2253
exprt & false_case()
Definition: std_expr.h:2263
exprt & cond()
Definition: std_expr.h:2243
Boolean implication.
Definition: std_expr.h:2037
Array index operator.
Definition: std_expr.h:1328
exprt & array()
Definition: std_expr.h:1353
exprt & index()
Definition: std_expr.h:1363
An expression denoting infinity.
Definition: std_expr.h:2890
Represents arrays by the indexes up to which the value remains the same.
exprt to_if_expression(const exprt &index) const
static optionalt< interval_sparse_arrayt > of_expr(const exprt &expr, const exprt &extra_value)
If the expression is an array_exprt or a with_exprt uses the appropriate constructor,...
array_exprt concretize(std::size_t size, const typet &index_type) const
Convert to an array representation, ignores elements at index >= size.
const irep_idt & id() const
Definition: irep.h:396
bool is_nil() const
Definition: irep.h:376
Canonical representation of linear function, for instance, expression $x + x - y + 5 - 3$ would given...
Extract member of struct or union.
Definition: std_expr.h:2667
messaget & message
Definition: message.h:246
mstreamt & error() const
Definition: message.h:399
mstreamt & debug() const
Definition: message.h:429
message_handlert & get_message_handler()
Definition: message.h:184
static eomt eom
Definition: message.h:297
Binary minus.
Definition: std_expr.h:973
const typet & follow(const typet &) const
Resolve type symbol to the type it points to.
Definition: namespace.cpp:49
A namespacet is essentially one or two symbol tables bound together, to allow for symbol lookups in t...
Definition: namespace.h:91
The NIL expression.
Definition: std_expr.h:2874
Boolean negation.
Definition: std_expr.h:2181
The plus expression Associativity is not specified.
Definition: std_expr.h:914
literalt convert(const exprt &expr) override
Convert a Boolean expression and return the corresponding literal.
void l_set_to_true(literalt a)
Definition: prop.h:50
static exprt to_if_expression(const with_exprt &expr, const exprt &index)
Creates an if_expr corresponding to the result of accessing the array at the given index.
optionalt< exprt > make_array_pointer_association(const exprt &return_code, const function_application_exprt &expr)
Associate array to pointer, and array to length.
static bool is_valid_string_constraint(messaget::mstreamt &stream, const namespacet &ns, const string_constraintt &constraint)
Checks the data invariant for string_constraintt.
exprt univ_within_bounds() const
static array_index_mapt gather_indices(const exprt &expr)
exprt negation() const
static bool universal_only_in_index(const string_constraintt &constr)
The universally quantified variable is only allowed to occur in index expressions in the body of a st...
static bool is_linear_arithmetic_expr(const exprt &expr, const symbol_exprt &var)
std::map< exprt, std::vector< exprt > > array_index_mapt
void output_dot(std::ostream &stream) const
void clean_cache()
Clean the cache used by eval
NODISCARD string_constraintst add_constraints(string_constraint_generatort &generatort)
For all builtin call on which a test (or an unsupported buitin) result depends, add the corresponding...
optionalt< exprt > eval(const array_string_exprt &s, const std::function< exprt(const exprt &)> &get_value) const
Attempt to evaluate the given string from the dependencies and valuation of strings on which it depen...
string_constraint_generatort generator
union_find_replacet symbol_resolve
std::vector< exprt > equations
string_axiomst axioms
string_refinementt(const infot &)
const configt config_
decision_proceduret::resultt dec_solve() override
Main decision procedure of the solver.
std::set< exprt > seen_instances
void set_to(const exprt &expr, bool value) override
Record the constraints to ensure that the expression is true when the boolean is true and false other...
string_dependenciest dependencies
index_set_pairt index_sets
exprt get(const exprt &expr) const override
Evaluates the given expression in the valuation found by string_refinementt::dec_solve.
std::vector< exprt > instantiate_not_contains(const string_not_contains_constraintt &axiom, const std::set< std::pair< exprt, exprt >> &index_pairs, const std::unordered_map< string_not_contains_constraintt, symbol_exprt > &witnesses)
Instantiates a quantified formula representing not_contains by substituting the quantifiers and gener...
std::vector< exprt > current_constraints
void add_lemma(const exprt &lemma, bool simplify_lemma=true)
Add the given lemma to the solver.
String type.
Definition: std_types.h:901
Structure type, corresponds to C style structs.
Definition: std_types.h:231
const componentst & components() const
Definition: std_types.h:147
Expression to hold a symbol (variable)
Definition: std_expr.h:80
const irep_idt & get_identifier() const
Definition: std_expr.h:109
Generation of fresh symbols of a given type.
Definition: array_pool.h:22
const typet & subtype() const
Definition: type.h:156
The type of an expression, extends irept.
Definition: type.h:29
const typet & subtype() const
Definition: type.h:48
Similar interface to union-find for expressions, with a function for replacing sub-expressions by the...
std::vector< std::pair< exprt, exprt > > to_vector() const
exprt make_union(const exprt &a, const exprt &b)
Merge the set containing a and the set containing b.
bool replace_expr(exprt &expr) const
Replace subexpressions of expr by the representative element of the set they belong to.
Operator to update elements in structs and arrays.
Definition: std_expr.h:2312
#define forall_operands(it, expr)
Definition: expr.h:18
Forward depth-first search iterators These iterators' copy operations are expensive,...
bool has_subtype(const typet &type, const std::function< bool(const typet &)> &pred, const namespacet &ns)
returns true if any of the contained types satisfies pred
Definition: expr_util.cpp:152
Deprecated expression utility functions.
static format_containert< T > format(const T &o)
Definition: format.h:37
const std::string & id2string(const irep_idt &d)
Definition: irep.h:47
Magic numbers used throughout the codebase.
const std::size_t MAX_CONCRETE_STRING_SIZE
Definition: magic.h:14
bool can_cast_expr< function_application_exprt >(const exprt &base)
nonstd::optional< T > optionalt
Definition: optional.h:35
Ranges: pair of begin and end iterators, which can be initialized from containers,...
ranget< iteratort > make_range(iteratort begin, iteratort end)
Definition: range.h:524
bool replace_expr(const exprt &what, const exprt &by, exprt &dest)
bool simplify(exprt &expr, const namespacet &ns)
exprt simplify_expr(exprt src, const namespacet &ns)
int solver(std::istream &in)
BigInt mp_integer
Definition: smt_terms.h:12
#define UNREACHABLE
This should be used to mark dead code.
Definition: invariant.h:503
#define DATA_INVARIANT(CONDITION, REASON)
This condition should be used to document that assumptions that are made on goto_functions,...
Definition: invariant.h:510
#define PRECONDITION(CONDITION)
Definition: invariant.h:463
#define INVARIANT(CONDITION, REASON)
This macro uses the wrapper function 'invariant_violated_string'.
Definition: invariant.h:423
exprt conjunction(const exprt::operandst &op)
1) generates a conjunction for two or more operands 2) for one operand, returns the operand 3) return...
Definition: std_expr.cpp:34
bool can_cast_expr< equal_exprt >(const exprt &base)
Definition: std_expr.h:1249
const if_exprt & to_if_expr(const exprt &expr)
Cast an exprt to an if_exprt.
Definition: std_expr.h:2291
const constant_exprt & to_constant_expr(const exprt &expr)
Cast an exprt to a constant_exprt.
Definition: std_expr.h:2840
const equal_exprt & to_equal_expr(const exprt &expr)
Cast an exprt to an equal_exprt.
Definition: std_expr.h:1265
const index_exprt & to_index_expr(const exprt &expr)
Cast an exprt to an index_exprt.
Definition: std_expr.h:1391
const struct_typet & to_struct_type(const typet &type)
Cast a typet to a struct_typet.
Definition: std_types.h:308
const array_typet & to_array_type(const typet &type)
Cast a typet to an array_typet.
Definition: std_types.h:832
#define CHARACTER_FOR_UNKNOWN
Module: String solver Author: Diffblue Ltd.
std::string to_string(const string_not_contains_constraintt &expr)
Used for debug printing.
void merge(string_constraintst &result, string_constraintst other)
Merge two sets of constraints by appending to the first one.
Defines related function for string constraints.
std::vector< exprt > instantiate_not_contains(const string_not_contains_constraintt &axiom, const std::set< std::pair< exprt, exprt >> &index_pairs, const std::unordered_map< string_not_contains_constraintt, symbol_exprt > &witnesses)
optionalt< exprt > add_node(string_dependenciest &dependencies, const exprt &expr, array_poolt &array_pool, symbol_generatort &fresh_symbol)
When a sub-expression of expr is a builtin_function, add a "string_builtin_function" node to the grap...
Keeps track of dependencies between strings.
array_string_exprt & to_array_string_expr(exprt &expr)
Definition: string_expr.h:95
static void initial_index_set(index_set_pairt &index_set, const namespacet &ns, const string_constraintt &axiom)
static std::vector< exprt > extract_strings_from_lhs(const exprt &lhs, const namespacet &ns)
This is meant to be used on the lhs of an equation with string subtype.
static std::string string_of_array(const array_exprt &arr)
convert the content of a string to a more readable representation.
static void update_index_set(index_set_pairt &index_set, const namespacet &ns, const std::vector< exprt > &current_constraints)
Add to the index set all the indices that appear in the formulas.
static optionalt< exprt > get_array(const std::function< exprt(const exprt &)> &super_get, const namespacet &ns, messaget::mstreamt &stream, const array_string_exprt &arr, const array_poolt &array_pool)
Get a model of an array and put it in a certain form.
static std::vector< exprt > extract_strings(const exprt &expr, const namespacet &ns)
exprt substitute_array_lists(exprt expr, size_t string_max_length)
Replace array-lists by 'with' expressions.
static std::vector< T > fill_in_map_as_vector(const std::map< std::size_t, T > &index_value)
Convert index-value map to a vector of values.
static std::vector< exprt > generate_instantiations(const index_set_pairt &index_set, const string_axiomst &axioms, const std::unordered_map< string_not_contains_constraintt, symbol_exprt > &not_contain_witnesses)
Instantiation of all constraints.
static std::pair< bool, std::vector< exprt > > check_axioms(const string_axiomst &axioms, string_constraint_generatort &generator, const std::function< exprt(const exprt &)> &get, messaget::mstreamt &stream, const namespacet &ns, bool use_counter_example, const union_find_replacet &symbol_resolve, const std::unordered_map< string_not_contains_constraintt, symbol_exprt > &not_contain_witnesses)
Check axioms takes the model given by the underlying solver and answers whether it satisfies the stri...
static void add_equations_for_symbol_resolution(union_find_replacet &symbol_solver, const std::vector< exprt > &equations, const namespacet &ns, messaget::mstreamt &stream)
Add association for each char pointer in the equation.
static void make_char_array_pointer_associations(string_constraint_generatort &generator, exprt &expr)
If expr is an equation whose right-hand-side is a associate_array_to_pointer call,...
static void add_string_equation_to_symbol_resolution(const equal_exprt &eq, union_find_replacet &symbol_resolve, const namespacet &ns)
Given an equation on strings, mark these strings as belonging to the same set in the symbol_resolve s...
static bool validate(const string_refinementt::infot &info)
static void add_to_index_set(index_set_pairt &index_set, const namespacet &ns, const exprt &s, exprt i)
Add i to the index set all the indices that appear in the formula.
exprt simplify_sum(const exprt &f)
union_find_replacet string_identifiers_resolution_from_equations(const std::vector< equal_exprt > &equations, const namespacet &ns, messaget::mstreamt &stream)
Symbol resolution for expressions of type string typet.
static void display_index_set(messaget::mstreamt &stream, const index_set_pairt &index_set)
Write index set to the given stream, use for debugging.
static exprt negation_of_not_contains_constraint(const string_not_contains_constraintt &constraint, const symbol_exprt &univ_var, const std::function< exprt(const exprt &)> &get)
Negates the constraint to be fed to a solver.
static optionalt< exprt > find_counter_example(const namespacet &ns, const exprt &axiom, const symbol_exprt &var, message_handlert &message_handler)
Creates a solver with axiom as the only formula added and runs it.
static exprt replace_expr_copy(const union_find_replacet &symbol_resolve, exprt expr)
Substitute sub-expressions in equation by representative elements of symbol_resolve whenever possible...
static void substitute_array_access_in_place(exprt &expr, symbol_generatort &symbol_generator, const bool left_propagate)
Auxiliary function for substitute_array_access Performs the same operation but modifies the argument ...
static void get_sub_arrays(const exprt &array_expr, std::vector< exprt > &accu)
An expression representing an array of characters can be in the form of an if expression for instance...
void debug_model(const string_constraint_generatort &generator, messaget::mstreamt &stream, const namespacet &ns, const std::function< exprt(const exprt &)> &super_get, const std::vector< symbol_exprt > &symbols, array_poolt &array_pool)
Display part of the current model by mapping the variables created by the solver to constant expressi...
static optionalt< exprt > get_valid_array_size(const std::function< exprt(const exprt &)> &super_get, const namespacet &ns, messaget::mstreamt &stream, const array_string_exprt &arr, const array_poolt &array_pool)
Get a model of the size of the input string.
static std::vector< exprt > instantiate(const string_not_contains_constraintt &axiom, const index_set_pairt &index_set, const std::unordered_map< string_not_contains_constraintt, symbol_exprt > &witnesses)
Instantiates a quantified formula representing not_contains by substituting the quantifiers and gener...
static optionalt< exprt > substitute_array_access(const index_exprt &index_expr, symbol_generatort &symbol_generator, const bool left_propagate)
static void debug_check_axioms_step(messaget::mstreamt &stream, const T &axiom, const T &axiom_in_model, const exprt &negaxiom, const exprt &with_concretized_arrays)
Debugging function which outputs the different steps an axiom goes through to be checked in check axi...
static exprt get_char_array_and_concretize(const std::function< exprt(const exprt &)> &super_get, const namespacet &ns, messaget::mstreamt &stream, const array_string_exprt &arr, array_poolt &array_pool)
Debugging function which finds the valuation of the given array in super_get and concretize unknown c...
String support via creating string constraints and progressively instantiating the universal constrai...
#define string_refinement_invariantt(reason)
std::string utf16_constant_array_to_java(const array_exprt &arr, std::size_t length)
Construct a string from a constant array.
bool is_char_type(const typet &type)
For now, any unsigned bitvector type of width smaller or equal to 16 is considered a character.
bool has_char_pointer_subtype(const typet &type, const namespacet &ns)
bool is_char_array_type(const typet &type, const namespacet &ns)
Distinguish char array from other types.
bool is_char_pointer_type(const typet &type)
For now, any unsigned bitvector type is considered a character.
const namespacet * ns
Definition: bv_refinement.h:35
std::map< exprt, std::set< exprt > > current
std::map< exprt, std::set< exprt > > cumulative
std::vector< string_constraintt > universal
std::vector< string_not_contains_constraintt > not_contains
Collection of constraints of different types: existential formulas, universal formulas,...
std::vector< string_not_contains_constraintt > not_contains
std::vector< exprt > existential
std::vector< string_constraintt > universal
Constraints to encode non containement of strings.
string_refinementt constructor arguments