From 3e5c8dd80de98ce4b6e5bdb65cd0c4af8a9eceb2 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?S=C3=A9bastien=20Villemot?= <sebastien@dynare.org>
Date: Mon, 24 Sep 2018 17:57:55 +0200
Subject: [PATCH] Macro-processor: implement comprehensions

Due to a limitation of the current implementation, this breaks syntaxes like
[ (i,j) ] (but not [ (2,j) ]; the problem only occurs when an array is
constructed by specifying as first element a tuple whose first element is a
variable name). Solving this problem requires an overhaul of the
macro-processor, with construction of ASTs at parsing time, and evaluation
later on (instead of doing on-the-fly evaluation).

Ref #5
---
 src/macro/MacroBison.yy  | 38 +++++++++++++++++-
 src/macro/MacroDriver.cc | 73 ++++++++++++++++++++++++++++++++++
 src/macro/MacroDriver.hh | 36 ++++++++++++++++-
 src/macro/MacroFlex.ll   | 86 ++++++++++++++++++++++++++++++++++++++--
 4 files changed, 226 insertions(+), 7 deletions(-)

diff --git a/src/macro/MacroBison.yy b/src/macro/MacroBison.yy
index 9d48a907..fbf415b3 100644
--- a/src/macro/MacroBison.yy
+++ b/src/macro/MacroBison.yy
@@ -68,6 +68,7 @@ class MacroDriver;
 
 %token COMMA DEFINE LINE FOR IN IF ECHO_DIR ERROR IFDEF IFNDEF POWER
 %token LPAREN RPAREN LBRACKET RBRACKET EQUAL EOL LENGTH ECHOMACROVARS SAVE
+%token SEMICOLON ATSIGN
 
 %token <int> INTEGER
 %token <string> NAME STRING
@@ -88,7 +89,7 @@ class MacroDriver;
 
 %type <vector<string>> comma_name
 %type <MacroValuePtr> expr
-%type <vector<MacroValuePtr>> comma_expr tuple_comma_expr
+%type <vector<MacroValuePtr>> comma_expr tuple_comma_expr comprehension_clause
 %%
 
 %start statement_list_or_nothing;
@@ -217,6 +218,24 @@ expr : INTEGER
        { TYPERR_CATCH($$ = $1->set_intersection($3), @$); }
      | expr POWER expr
        { TYPERR_CATCH($$ = $1->power($3), @$); }
+     | LBRACKET NAME IN expr SEMICOLON
+       {
+         driver.init_comprehension(vector<string>{$2}, $4);
+         driver.iter_comprehension();
+       }
+       comprehension_clause RBRACKET
+       {
+         $$ = make_shared<ArrayMV>($7);
+       }
+     | LBRACKET LPAREN comma_name RPAREN IN expr SEMICOLON
+       {
+         driver.init_comprehension($3, $6);
+         driver.iter_comprehension();
+       }
+       comprehension_clause RBRACKET
+       {
+         $$ = make_shared<ArrayMV>($9);
+       }
      ;
 
 comma_expr : %empty
@@ -237,6 +256,23 @@ tuple_comma_expr : %empty
                    { $1.push_back($3); $$ = $1; }
                  ;
 
+/* The lexer will repeat the comprehension clause as many times as there are
+   elements in the set to be filtered. It also adds a dummy at-sign (@) at the
+   end of every repetition (for making parsing of repetitions unambiguous). */
+comprehension_clause : expr ATSIGN
+                       {
+                         $$ = vector<MacroValuePtr>{};
+                         driver.possibly_add_comprehension_element($$, $1);
+                         driver.iter_comprehension();
+                       }
+                     | comprehension_clause expr ATSIGN
+                       {
+                         $$ = $1;
+                         driver.possibly_add_comprehension_element($$, $2);
+                         driver.iter_comprehension();
+                       }
+                     ;
+
 %%
 
 void
diff --git a/src/macro/MacroDriver.cc b/src/macro/MacroDriver.cc
index 0798ba5d..017cd95b 100644
--- a/src/macro/MacroDriver.cc
+++ b/src/macro/MacroDriver.cc
@@ -260,6 +260,79 @@ MacroDriver::iter_loop()  noexcept(false)
     }
 }
 
+void
+MacroDriver::init_comprehension(const vector<string> &names, MacroValuePtr value)
+{
+  auto mv = dynamic_pointer_cast<ArrayMV>(value);
+  if (!mv)
+    throw MacroValue::TypeError("In a comprehension, the expression after the 'in' keyword must be an  array");
+  comprehension_stack.emplace(names, move(mv), 0);
+}
+
+int
+MacroDriver::get_comprehension_iter_nb() const
+{
+  assert(!comprehension_stack.empty());
+
+  auto &mv = get<1>(comprehension_stack.top());
+  return mv->values.size();
+}
+
+
+void
+MacroDriver::iter_comprehension()
+{
+  assert(!comprehension_stack.empty());
+
+  int &i = get<2>(comprehension_stack.top());
+  auto &mv = get<1>(comprehension_stack.top());
+  vector<string> &names = get<0>(comprehension_stack.top());
+
+  assert(i <= static_cast<int>(mv->values.size()));
+
+  if (i == static_cast<int>(mv->values.size()))
+    comprehension_stack.pop();
+  else
+    {
+      if (names.size() == 1)
+        env[names.at(0)] = mv->values[i++];
+      else
+        {
+          auto tmv = dynamic_pointer_cast<TupleMV>(mv->values[i++]);
+          if (!tmv)
+             throw MacroValue::TypeError("The expression after the 'in' keyword must be an array expression of tuples");
+          if (tmv->values.size() != names.size())
+            {
+              cerr << "Error in comprehension loop: tuple in array contains " << tmv->length()
+                   << " elements while you are assigning to " << names.size() << " variables."
+                   << endl;
+              exit(EXIT_FAILURE);
+            }
+
+          for (auto &name: names)
+            {
+              auto idx = &name - &names[0];
+              env[name] = tmv->values.at(idx);
+            }
+        }
+    }
+}
+
+void
+MacroDriver::possibly_add_comprehension_element(vector<MacroValuePtr> &v, MacroValuePtr test_expr) const
+{
+  auto ival = dynamic_pointer_cast<IntMV>(test_expr);
+  if (!ival)
+    throw MacroValue::TypeError("In a comprehension, the expression after the 'if' must evaluate to an integer");
+  if (ival->value)
+    {
+      assert(!comprehension_stack.empty());
+      const int &i = get<2>(comprehension_stack.top());
+      auto &mv = get<1>(comprehension_stack.top());
+      v.push_back(mv->values.at(i-1));
+    }
+}
+
 void
 MacroDriver::begin_if(const MacroValuePtr &value) noexcept(false)
 {
diff --git a/src/macro/MacroDriver.hh b/src/macro/MacroDriver.hh
index cc6dc909..db6f42a4 100644
--- a/src/macro/MacroDriver.hh
+++ b/src/macro/MacroDriver.hh
@@ -60,12 +60,27 @@ private:
     const bool is_for_context;
     const string for_body;
     const Macro::parser::location_type for_body_loc;
+    const bool is_comprehension_context;
+    const string comprehension_clause;
+    const Macro::parser::location_type comprehension_clause_loc;
+    const int comprehension_start_condition;
+    const int comprehension_iter_nb;
     ScanContext(istream *input_arg, struct yy_buffer_state *buffer_arg,
                 Macro::parser::location_type &yylloc_arg, bool is_for_context_arg,
                 string for_body_arg,
-                Macro::parser::location_type &for_body_loc_arg) :
+                Macro::parser::location_type &for_body_loc_arg,
+                bool is_comprehension_context_arg,
+                string comprehension_clause_arg,
+                Macro::parser::location_type &comprehension_clause_loc_arg,
+                int comprehension_start_condition_arg,
+                int comprehension_iter_nb_arg) :
       input(input_arg), buffer(buffer_arg), yylloc(yylloc_arg), is_for_context(is_for_context_arg),
-      for_body(move(for_body_arg)), for_body_loc(for_body_loc_arg)
+      for_body(move(for_body_arg)), for_body_loc(for_body_loc_arg),
+      is_comprehension_context{is_comprehension_context_arg},
+      comprehension_clause{comprehension_clause_arg},
+      comprehension_clause_loc{comprehension_clause_loc_arg},
+      comprehension_start_condition{comprehension_start_condition_arg},
+      comprehension_iter_nb{comprehension_iter_nb_arg}
     {
     }
   };
@@ -114,6 +129,13 @@ private:
   //! Set to true while parsing an IF statement (only the statement, not the body)
   bool reading_if_statement;
 
+  bool is_comprehension_context{false};
+  int comprehension_iter_nb{0};
+  int comprehension_start_condition;
+  int nested_comprehension_nb{0};
+  string comprehension_clause, comprehension_clause_tmp;
+  Macro::parser::location_type comprehension_clause_loc, comprehension_clause_loc_tmp;
+
   //! Output the @#line declaration
   void output_line(Macro::parser::location_type *yylloc) const;
 
@@ -141,6 +163,9 @@ private:
   //! Initialise a new flex buffer with the loop body
   void new_loop_body_buffer(Macro::parser::location_type *yylloc);
 
+  //! Initialize a new flex buffer with the comprehension conditional clause
+  void new_comprehension_clause_buffer(Macro::parser::location_type *yylloc);
+
 public:
   MacroFlex(istream *in, ostream *out, bool no_line_macro_arg, vector<string> path_arg);
 
@@ -169,6 +194,8 @@ private:
   //! Second is the array over which iteration is done
   //! Third is subscript to be used by next call of iter_loop() (beginning with 0) */
   stack<tuple<vector<string>, shared_ptr<ArrayMV>, int>> loop_stack;
+
+  stack<tuple<vector<string>, shared_ptr<ArrayMV>, int>> comprehension_stack;
 public:
   //! Exception thrown when value of an unknown variable is requested
   class UnknownVariable
@@ -239,6 +266,11 @@ public:
       in that case it destroys the pointer given to init_loop() */
   bool iter_loop() noexcept(false);
 
+  void init_comprehension(const vector<string> &names, MacroValuePtr value);
+  void iter_comprehension();
+  void possibly_add_comprehension_element(vector<MacroValuePtr> &v, MacroValuePtr test_expr) const;
+  int get_comprehension_iter_nb() const;
+
   //! Begins an @#if statement
   void begin_if(const MacroValuePtr &value) noexcept(false);
 
diff --git a/src/macro/MacroFlex.ll b/src/macro/MacroFlex.ll
index 5dee6b33..0de931cd 100644
--- a/src/macro/MacroFlex.ll
+++ b/src/macro/MacroFlex.ll
@@ -55,6 +55,7 @@ using token = Macro::parser::token;
 %x FOR_BODY
 %x THEN_BODY
 %x ELSE_BODY
+%x COMPREHENSION_CLAUSE
 
 %{
 // Increments location counter for every token read
@@ -238,6 +239,17 @@ CONT \\\\
                               yylval->build<string>(yytext + 1).pop_back();
                               return token::STRING;
                             }
+<STMT,EXPR>;                {
+                              comprehension_clause_tmp.erase();
+                              nested_comprehension_nb = 0;
+                              // Save start condition (either STMT or EXPR)
+                              comprehension_start_condition = YY_START;
+                              // Save location
+                              comprehension_clause_loc_tmp = *yylloc;
+                              BEGIN(COMPREHENSION_CLAUSE);
+                              return token::SEMICOLON;
+                            }
+<EXPR>@                     { return token::ATSIGN; } // Used for separation of repeated comprehension clauses
 
 <STMT>line                  { return token::LINE; }
 <STMT>define                { return token::DEFINE; }
@@ -263,7 +275,22 @@ CONT \\\\
                               return token::NAME;
                             }
 
-<EXPR><<EOF>>               { driver.error(*yylloc, "Unexpected end of file while parsing a macro expression"); }
+<EXPR><<EOF>>               {
+                              if (!is_comprehension_context)
+                                driver.error(*yylloc, "Unexpected end of file while parsing a macro expression");
+                              else
+                                {
+                                  if (--comprehension_iter_nb > 0)
+                                    new_comprehension_clause_buffer(yylloc);
+                                  else
+                                    {
+                                      restore_context(yylloc);
+
+                                      BEGIN(comprehension_start_condition);
+                                      return token::RBRACKET;
+                                    }
+                                }
+                            }
 <STMT><<EOF>>               { driver.error(*yylloc, "Unexpected end of file while parsing a macro statement"); }
 
 <FOR_BODY>{EOL}             { yylloc->lines(1); yylloc->step(); for_body_tmp.append(yytext); }
@@ -385,6 +412,39 @@ CONT \\\\
                                 }
                             }
 
+<COMPREHENSION_CLAUSE>{EOL} { driver.error(*yylloc, "Unexpected line break in comprehension"); }
+<COMPREHENSION_CLAUSE><<EOF>>   { driver.error(*yylloc, "Unexpected end of file in comprehension"); }
+<COMPREHENSION_CLAUSE>[^\[\]]   { comprehension_clause_tmp.append(yytext); yylloc->step(); }
+<COMPREHENSION_CLAUSE>\[    { nested_comprehension_nb++; comprehension_clause_tmp.append(yytext); yylloc->step(); }
+<COMPREHENSION_CLAUSE>\]    {
+                              yylloc->step();
+                              if (nested_comprehension_nb)
+                                {
+                                  nested_comprehension_nb--;
+                                  comprehension_clause_tmp.append(yytext);
+                                }
+                              else
+                                {
+                                  int comprehension_iter_nb_tmp = driver.get_comprehension_iter_nb();
+                                  comprehension_clause_tmp.append(" @ ");
+
+                                  if (comprehension_iter_nb_tmp > 0)
+                                    {
+                                      // Save old buffer state and location
+                                      save_context(yylloc);
+
+                                      is_comprehension_context = true;
+                                      comprehension_iter_nb = comprehension_iter_nb_tmp;
+                                      comprehension_clause = comprehension_clause_tmp;
+                                      comprehension_clause_loc = comprehension_clause_loc_tmp;
+
+                                      new_comprehension_clause_buffer(yylloc);
+                                    }
+
+                                  BEGIN(EXPR);
+                                }
+                            }
+
 <INITIAL><<EOF>>            {
                               // Quit lexer if end of main file
                               if (context_stack.empty())
@@ -444,7 +504,9 @@ void
 MacroFlex::save_context(Macro::parser::location_type *yylloc)
 {
   context_stack.push(ScanContext(input, YY_CURRENT_BUFFER, *yylloc, is_for_context,
-                                 for_body, for_body_loc));
+                                 for_body, for_body_loc, is_comprehension_context,
+                                 comprehension_clause, comprehension_clause_loc,
+                                 comprehension_start_condition, comprehension_iter_nb));
 }
 
 void
@@ -456,10 +518,15 @@ MacroFlex::restore_context(Macro::parser::location_type *yylloc)
   is_for_context = context_stack.top().is_for_context;
   for_body = context_stack.top().for_body;
   for_body_loc = context_stack.top().for_body_loc;
+  if (!is_comprehension_context)
+    output_line(yylloc); // Dump @#line instruction
+  is_comprehension_context = context_stack.top().is_comprehension_context;
+  comprehension_clause = context_stack.top().comprehension_clause;
+  comprehension_clause_loc = context_stack.top().comprehension_clause_loc;
+  comprehension_start_condition = context_stack.top().comprehension_start_condition;
+  comprehension_iter_nb = context_stack.top().comprehension_iter_nb;
   // Remove top of stack
   context_stack.pop();
-  // Dump @#line instruction
-  output_line(yylloc);
 }
 
 void
@@ -556,6 +623,17 @@ MacroFlex::new_loop_body_buffer(Macro::parser::location_type *yylloc)
   yy_switch_to_buffer(yy_create_buffer(input, YY_BUF_SIZE));
 }
 
+void
+MacroFlex::new_comprehension_clause_buffer(Macro::parser::location_type *yylloc)
+{
+  input = new stringstream(comprehension_clause);
+  *yylloc = comprehension_clause_loc;
+  yylloc->begin.filename = yylloc->end.filename = new string(*comprehension_clause_loc.begin.filename);
+  is_for_context = false;
+  for_body.clear();
+  yy_switch_to_buffer(yy_create_buffer(input, YY_BUF_SIZE));
+}
+
 /* This implementation of MacroFlexLexer::yylex() is required to fill the
  * vtable of the class MacroFlexLexer. We define the scanner's main yylex
  * function via YY_DECL to reside in the MacroFlex class instead. */
-- 
GitLab