diff --git a/doc/preprocessor/expr-sharing.dia b/doc/preprocessor/expr-sharing.dia new file mode 100644 index 0000000000000000000000000000000000000000..93da37ab9f9b2ffbde15c09502764159c2fb85eb Binary files /dev/null and b/doc/preprocessor/expr-sharing.dia differ diff --git a/doc/preprocessor/expr-sharing.png b/doc/preprocessor/expr-sharing.png new file mode 100644 index 0000000000000000000000000000000000000000..d2d2d9002f66bc6f7c7cccdf77788d493fa1fab8 Binary files /dev/null and b/doc/preprocessor/expr-sharing.png differ diff --git a/doc/preprocessor/expr.dia b/doc/preprocessor/expr.dia new file mode 100644 index 0000000000000000000000000000000000000000..f99a39825f2ad22c0f789696da718c0ad6a55d5c Binary files /dev/null and b/doc/preprocessor/expr.dia differ diff --git a/doc/preprocessor/expr.png b/doc/preprocessor/expr.png new file mode 100644 index 0000000000000000000000000000000000000000..5cab996b25e46e90a2edb6aaea66b0519da0b649 Binary files /dev/null and b/doc/preprocessor/expr.png differ diff --git a/doc/preprocessor/matrices.dia b/doc/preprocessor/matrices.dia new file mode 100644 index 0000000000000000000000000000000000000000..e4af95f8c369ca0b6274c8e03bd3be9f34e4f6b1 Binary files /dev/null and b/doc/preprocessor/matrices.dia differ diff --git a/doc/preprocessor/matrices.png b/doc/preprocessor/matrices.png new file mode 100644 index 0000000000000000000000000000000000000000..bff4e417300f7a33710df9b31b1c7fbbb6bf5092 Binary files /dev/null and b/doc/preprocessor/matrices.png differ diff --git a/doc/preprocessor/overview.dia b/doc/preprocessor/overview.dia new file mode 100644 index 0000000000000000000000000000000000000000..ea635011a0f588660e1e4ed2ccf1cf225b8941d2 Binary files /dev/null and b/doc/preprocessor/overview.dia differ diff --git a/doc/preprocessor/overview.png b/doc/preprocessor/overview.png new file mode 100644 index 0000000000000000000000000000000000000000..a49d4f77766f2b3b59f9e261d321885db715e871 Binary files /dev/null and b/doc/preprocessor/overview.png differ diff --git a/doc/preprocessor/preprocessor.pdf b/doc/preprocessor/preprocessor.pdf new file mode 100644 index 0000000000000000000000000000000000000000..d4fd47ab09c6035a4bc5bbd8799a2ce5a3d933db Binary files /dev/null and b/doc/preprocessor/preprocessor.pdf differ diff --git a/doc/preprocessor/preprocessor.tex b/doc/preprocessor/preprocessor.tex new file mode 100644 index 0000000000000000000000000000000000000000..3eed856851425b4c5d5919ed63cd38be559805e5 --- /dev/null +++ b/doc/preprocessor/preprocessor.tex @@ -0,0 +1,1165 @@ +\documentclass{beamer} +%\documentclass[draft]{beamer} +%\documentclass[handout]{beamer} + + +\mode<handout> +{ + \usepackage{pgfpages} + \pgfpagesuselayout{4 on 1}[a4paper,border shrink=3mm,landscape] + \usetheme{Madrid} + \usecolortheme{seagull} +} + +\mode<beamer> +{ + \usetheme{Madrid} + \setbeamercovered{transparent} +} + + +\usepackage[english]{babel} +\usepackage[utf8]{inputenc} + +\usepackage{times} + + +\title{The Dynare Preprocessor} + +\author[S. Villemot]{Sébastien Villemot} + +\institute{CEPREMAP} + +\date{October 19, 2007} + +\AtBeginSection[] +{ + \begin{frame}{Outline} + \tableofcontents[currentsection] + \end{frame} +} + +\begin{document} + +\begin{frame} + \titlepage +\end{frame} + +\begin{frame} + \frametitle{General overview} + \begin{center} + \includegraphics[width=11cm]{overview.png} + \end{center} +\end{frame} + +\begin{frame}{Outline} + \tableofcontents +\end{frame} + +\section{Introduction to object-oriented programming in C++} + +\begin{frame} + \frametitle{Object-oriented programming (OOP)} + \begin{itemize} + \item Traditional way of programming: a program is a list of instructions (organized in functions) which manipulate data + \item OOP is an alternative programming paradigm that uses \alert{objects} and their interactions to design programs + \pause + \item With OOP, programming becomes a kind of modelization: each object of the program should modelize a real world object, or a mathematical object (\textit{e.g.} a matrix, an equation, a model...) + \item Each object can be viewed as an independent little machine with a distinct role or responsibility + \item Each object is capable of receiving messages, processing data, and sending messages to other objects + \pause + \item Main advantage of OOP is \alert{modularity}, which leads to greater reusability, flexibility and maintainability + \end{itemize} +\end{frame} + +\begin{frame} + \frametitle{Object} + \framesubtitle{Definition and example} + \begin{itemize} + \item An \alert{object} is the bundle of: + \begin{itemize} + \item several variables (called its \alert{attributes}), which modelize the characteristics (or the state) of the object + \item several functions (called its \alert{methods}) which operate on the attributes, and which modelize the behaviour of the object (the actions it can perform) + \end{itemize} + \pause + \item Example: suppose we want to modelize a coffee machine + \begin{itemize} + \item The coffee machine (in real life) is a box, with an internal counter for the credit balance, a slot to put coins in, and a button to get a coffee + \item The corresponding object will have one attribute (the current credit balance) and two methods (one which modelizes the introduction of money, and the other the making of a coffee) + \end{itemize} + \end{itemize} +\end{frame} + +\begin{frame}[fragile] + \frametitle{A coffee machine} + \framesubtitle{Class definition} + \begin{block}{C++ header file (\texttt{CoffeeMachine.hh})} + \begin{scriptsize} +\begin{verbatim} +class CoffeeMachine { +public: + int credit; + CoffeeMachine(); + void put_coin(int coin_value); + void get_coffee(); +}; +\end{verbatim} + \end{scriptsize} + \end{block} + \begin{itemize} + \item A \alert{class} is a template (or a blueprint) of an object + \item Collectively, the attributes and methods defined by a class are called \alert{members} + \item A class definition creates a new \alert{type} (\texttt{CoffeeMachine}) that can be used like other C++ types (\textit{e.g.} \texttt{int}, \texttt{string}, ...) + \item In C++, class definitions are put in header files (\texttt{.hh} extension) + \end{itemize} +\end{frame} + +\begin{frame}[fragile] + \frametitle{A coffee machine} + \framesubtitle{Method bodies} + \begin{block}{C++ source file (\texttt{CoffeeMachine.cc})} + \begin{scriptsize} +\begin{verbatim} +void CoffeeMachine::put_coin(int coin_value) +{ + credit += coin_value; + cout << "Credit is now " << credit << endl; +} + +void CoffeeMachine::get_coffee() +{ + if (credit == 0) + cout << "No credit!" << endl; + else { + credit--; + cout << "Your coffee is ready, credit is now " << credit << endl; + } +} +\end{verbatim} + \end{scriptsize} + \end{block} + \begin{itemize} + \item Methods can refer to other members (here the two methods modify the \texttt{credit} attribute) + \item Method bodies are put in source files (\texttt{.cc} extension) + \end{itemize} +\end{frame} + +\begin{frame}[fragile] + \frametitle{Constructors and destructors} + \begin{itemize} + \item In our class header, there is a special method called \texttt{CoffeeMachine()} (same name than the class) + \item It is a \alert{constructor}: called when the object is created, used to initalize the attributes of the class + \end{itemize} + \begin{block}{C++ source file (\texttt{CoffeeMachine.cc}, continued)} + \begin{scriptsize} +\begin{verbatim} +CoffeeMachine::CoffeeMachine() +{ + credit = 0; +} +\end{verbatim} + \end{scriptsize} + \end{block} + \begin{itemize} + \item It is possible to create constructors with arguments + \item It is also possible to define a \alert{destructor} (method name is the class name prepended by a tilde, like \texttt{$\sim$CoffeeMachine}): called when the object is destroyed, used to do cleaning tasks (\textit{e.g.} freeing memory) + \end{itemize} +\end{frame} + + +\begin{frame}[fragile] + \frametitle{Instantiation and method invocation} + \begin{block}{Program main function} + \begin{scriptsize} +\begin{verbatim} +#include "CoffeeMachine.hh" + +int main() +{ + CoffeeMachine A, B; + + A.put_coin(2); + A.get_coffee(); + + B.put_coin(1); + B.get_coffee(); + B.get_coffee(); +} +\end{verbatim} + \end{scriptsize} + \end{block} + \begin{itemize} + \item Creates two machines: at the end, \texttt{A} has 1 credit, \texttt{B} has no credit and refused last coffee + \item \texttt{A} and \texttt{B} are called \alert{instances} of class \texttt{CoffeeMachine} + \item Methods are invoked by appending a dot and the method name to the instance variable name + \end{itemize} +\end{frame} + +\begin{frame}[fragile] + \frametitle{Dynamic instantiation with \texttt{new}} + \begin{block}{Program main function} + \begin{scriptsize} +\begin{verbatim} +#include "CoffeeMachine.hh" + +void main() +{ + CoffeeMachine *A; + + A = new CoffeeMachine(); + + A->put_coin(2); + A->get_coffee(); + + delete A; +} +\end{verbatim} + \end{scriptsize} + \end{block} + \begin{itemize} + \item Here \texttt{A} is a pointer to an instance of class \texttt{CoffeeMachine} + \item Dynamic creation of instances is done with \texttt{new}, dynamic deletion with \texttt{delete} (analogous to \texttt{malloc} and \texttt{free}) + \item Since \texttt{A} is a pointer, methods are called with \texttt{->} instead of a dot + \end{itemize} +\end{frame} + +\begin{frame}[fragile] + \frametitle{Access modifiers} + \begin{itemize} + \item In our coffee machine example, all attributes and methods were marked as \texttt{public} + \item Means that those attributes and methods can be accessed from anywhere in the program + \item Here, one can gain credit without putting money in the machine, with something like \texttt{A.credit = 1000;} + \item The solution is to declare it \alert{private}: such members can only be accessed from methods within the class + \end{itemize} + \begin{block}{C++ header file (\texttt{CoffeeMachine.hh})} + \begin{scriptsize} +\begin{verbatim} +class CoffeeMachine { +private: + int credit; +public: + CoffeeMachine(); + void put_coin(int coin_value); + void get_coffee(); +}; +\end{verbatim} + \end{scriptsize} + \end{block} +\end{frame} + +\begin{frame} + \frametitle{Interface} + \begin{itemize} + \item The public members of a class form its \alert{interface}: they describe how the class interacts with its environment + \item Seen from outside, an object is a ``black box'', receiving and sending messages through its interface + \item Particular attention should be given to the interface design: an external programmer should be able to work with an class by only studying its interface, but not its internals + \item A good design pratice is to limit the set of public members to the strict minimum: + \begin{itemize} + \item enhances code understandability by making clear the interface + \item limits the risk that an internal change in the object requires a change in the rest of the program: \alert{loose coupling} + \item prevents the disruption of the coherence of the object by an external action: principle of \alert{isolation} + \end{itemize} + \end{itemize} +\end{frame} + +\begin{frame} + \frametitle{Why isolation is important} + \begin{itemize} + \item Consider a class \texttt{Circle} with the following attributes: + \begin{itemize} + \item coordinates of the center + \item radius + \item surface + \end{itemize} + \item If all members are public, it is possible to modify the radius but not the surface, therefore disrupting internal coherence + \item The solution is to make radius and surface private, and to create a public method \texttt{changeRadius} which modifies both simultaneously + \item \textit{Conclusion:} Creating a clear interface and isolating the rest diminishes the risk of introducing bugs + \end{itemize} +\end{frame} + +\begin{frame}[fragile] + \frametitle{Inheritance (1/2)} + + \begin{block}{Matrices and positive definite matrices} + \begin{scriptsize} + \begin{columns}[t] + \begin{column}{4.8cm} +\begin{verbatim} +class Matrix +{ +protected: + int height, width; + double[] elements; +public: + Matrix(int n, int p, + double[] e); + virtual ~Matrix(); + double det(); +}; +\end{verbatim} + \end{column} + \begin{column}{6cm} +\begin{verbatim} +class PositDefMatrix : public Matrix +{ +public: + PositDefMatrix(int n, int p, + double[] e); + Matrix cholesky(); +}; +\end{verbatim} + \end{column} + \end{columns} + \end{scriptsize} + + \end{block} + \begin{itemize} + \item \texttt{PositDefMatrix} is a \alert{subclass} (or \alert{derived class}) of \texttt{Matrix} + \item Conversely \texttt{Matrix} is the \alert{superclass} of \texttt{PositDefMatrix} + \end{itemize} +\end{frame} + +\begin{frame} + \frametitle{Inheritance (2/2)} + \begin{itemize} + \item \texttt{PositDefMatrix} inherits \texttt{width}, \texttt{height}, \texttt{elements} and \texttt{det} from \texttt{Matrix} + \item Method \texttt{cholesky} can be called on an instance of \texttt{PositDefMatrix}, but not of \texttt{Matrix} + \item The keyword \texttt{protected} means: public for subclasses, but private for other classes + \item \alert{Type casts} are legal when going upward in the derivation tree: + \begin{itemize} + \item a pointer to \texttt{PositDefMatrix} can be safely cast to a \texttt{Matrix*} + \item the converse is faulty and leads to unpredictable results + \end{itemize} + \end{itemize} +\end{frame} + +\begin{frame}[fragile] + \frametitle{Constructors and destructors (bis)} + \begin{block}{C++ code snippet} + \begin{scriptsize} +\begin{verbatim} +Matrix::Matrix(int n, int p, double[] e) : height(n), width(p) +{ + elements = new double[n*p]; + memcpy(elements, e, n*p*sizeof(double)); +} + +Matrix::~Matrix() +{ + delete[] elements; +} + +PositDefMatrix::PositDefMatrix(int n, int p, double[] e) : + Matrix(n, p, e) +{ + // Check that matrix is really positive definite +} +\end{verbatim} + \end{scriptsize} + \end{block} + \begin{itemize} + \item Constructor of \texttt{PositDefMatrix} calls constructor of \texttt{Matrix} + \item Note the abbreviated syntax with colon + \end{itemize} +\end{frame} + +\begin{frame} + \frametitle{Possible derivation tree for real matrices} + \framesubtitle{Arrow means \textit{...is a subclass of...}} + \begin{center} + \includegraphics[width=10cm]{matrices.png} + \end{center} +\end{frame} + +\begin{frame} + \frametitle{Polymorphism (1/3)} + \begin{itemize} + \item In previous example, determinant computation method uses the same algorithm for both classes + \item But for positive definite matrices, a faster algorithm exists (using the cholesky) + \item \alert{Polymorphism} offers an elegant solution: + \begin{itemize} + \item declare \texttt{det} as a \alert{virtual method} in class \texttt{Matrix} + \item \alert{override} it in \texttt{PositDefMatrix}, and provide the corresponding implementation + \end{itemize} + \item When method \texttt{det} will be invoked, the correct implementation will be selected, depending on the type of the instance (this is done through a runtime type test) + \end{itemize} +\end{frame} + +\begin{frame}[fragile] + \frametitle{Polymorphism (2/3)} + + \begin{block}{Class headers} + \begin{scriptsize} + \begin{columns}[t] + \begin{column}{4.8cm} +\begin{verbatim} +class Matrix +{ +protected: + int height, width; + double[] elements; +public: + Matrix(int n, int p, + double[] e); + virtual ~Matrix(); + virtual double det(); + bool is_invertible(); +}; +\end{verbatim} + \end{column} + \begin{column}{6cm} +\begin{verbatim} +class PositDefMatrix : public Matrix +{ +public: + PositDefMatrix(int n, int p, + double[] e); + Matrix cholesky(); + virtual double det(); +}; +\end{verbatim} + \end{column} + \end{columns} + \end{scriptsize} + + \end{block} + \begin{itemize} + \item Note the \texttt{virtual} keyword + \item A method has been added to determine if matrix is invertible + \end{itemize} +\end{frame} + +\begin{frame}[fragile] + \frametitle{Polymorphism (3/3)} + \begin{block}{C++ code snippet} + \begin{scriptsize} +\begin{verbatim} +bool Matrix::is_invertible() +{ + return(det() != 0); +} + +double PositDefMatrix::det() +{ + // Square product of diagonal terms of cholesky decomposition +} +\end{verbatim} + \end{scriptsize} + \end{block} + \begin{itemize} + \item A call to \texttt{is\_invertible} on a instance of \texttt{Matrix} will use the generic determinant computation + \item The same call on an instance of \texttt{PositDefMatrix} will call the specialized determinant computation + \end{itemize} +\end{frame} + +\begin{frame} + \frametitle{Abstract classes} + \begin{itemize} + \item It is possible to create classes which don't provide an implementation for some virtual methods + \item Syntax in the header: \\ + \texttt{virtual int method\_name() = 0;} + \item As a consequence, such classes can never be instantiated + \item Generally used as the root of a derivation tree, when classes of the tree share behaviours but not implementations + \item Such classes are called \alert{abstract classes} + \end{itemize} +\end{frame} + + +\begin{frame} + \frametitle{Some programming rules (1/2)} + \begin{itemize} + \item Don't repeat yourself (DRY): if several functions contain similar portions of code, \alert{factorize} that code into a new function + \begin{itemize} + \item makes code shorter + \item reduces the risk of introducing inconsistencies + \item makes easier the propagation of enhancements and bug corrections + \end{itemize} + \item Make short functions + \begin{itemize} + \item often difficult to grasp what a long function does + \item structuring the code by dividing it into short functions makes the logical structure more apparent + \item enhances code readability and maintainability + \end{itemize} + \item Use explicit variable names (except for loop indexes) + \end{itemize} +\end{frame} + +\begin{frame} + \frametitle{Some programming rules (2/2)} + \begin{itemize} + \item Global variables are evil + \begin{itemize} + \item a global variable can be modified from anywhere in the code (nonlocality problem) + \item creates a potentially unlimited number of dependencies between all portions of the code + \item makes bugs difficult to localize (any part of the code could have created the trouble) + \item to summarize, goes against the principle of modularity + \item in addition, global variables are not thread safe (unless used with locks/mutexes) + \end{itemize} + \item Document your code when it doesn't speak by itself + \begin{itemize} + \item Dynare preprocessor code is documented using Doxygen + \item done through special comments beginning with an exclamation mark + \item run \texttt{doxygen} from the source directory to create a bunch of HTML files documenting the code + \end{itemize} + \end{itemize} +\end{frame} + +\section{Parsing} + +\begin{frame} +\frametitle{Parsing overview} +\begin{itemize} +\item Parsing is the action of transforming an input text (a \texttt{mod} file in our case) into a data structure suitable for computation +\item The parser consists of three components: + \begin{itemize} + \item the \alert{lexical analyzer}, which recognizes the ``words'' of the \texttt{mod} file (analog to the \textit{vocabulary} of a language) + \item the \alert{syntax analyzer}, which recognizes the ``sentences'' of the \texttt{mod} file (analog to the \textit{grammar} of a language) + \item the \alert{parsing driver}, which coordinates the whole process and constructs the data structure using the results of the lexical and syntax analyses + \end{itemize} +\end{itemize} +\end{frame} + +\begin{frame} +\frametitle{Lexical analysis} +\begin{itemize} +\item The lexical analyzer recognizes the ``words'' (or \alert{lexemes}) of the language +\item Lexical analyzer is described in \texttt{DynareFlex.ll}. This file is transformed into C++ source code by the program \texttt{flex} +\item This file gives the list of the known lexemes (described by regular expressions), and gives the associated \alert{token} for each of them +\item For punctuation (semicolon, parentheses, ...), operators (+, -, ...) or fixed keywords (\textit{e.g.} \texttt{model}, \texttt{varexo}, ...), the token is simply an integer uniquely identifying the lexeme +\item For variable names or numbers, the token also contains the associated string for further processing +%\item \textit{Note:} the list of tokens can be found at the beginning of \texttt{DynareBison.yy} +\item When invoked, the lexical analyzer reads the next characters of the input, tries to recognize a lexeme, and either produces an error or returns the associated token +\end{itemize} +\end{frame} + +\begin{frame}[fragile] +\frametitle{Lexical analysis} +\framesubtitle{An example} +\begin{itemize} +\item Suppose the \texttt{mod} file contains the following: +\begin{verbatim} +model; +x = log(3.5); +end; +\end{verbatim} +\item Before lexical analysis, it is only a sequence of characters +\item The lexical analysis produces the following stream of tokens: + +\begin{footnotesize} +\begin{verbatim} +MODEL +SEMICOLON +NAME "x" +EQUAL +LOG +LEFT_PARENTHESIS +FLOAT_NUMBER "3.5" +RIGHT_PARENTHESIS +SEMICOLON +END +SEMICOLON +\end{verbatim} +\end{footnotesize} +\end{itemize} +\end{frame} + +\begin{frame}[fragile] +\frametitle{Syntax analysis} +Using the list of tokens produced by lexical analysis, the syntax analyzer determines which ``sentences'' are valid in the language, according to a \alert{grammar} composed of \alert{rules}. +\begin{block}{A grammar for lists of additive and multiplicative expressions} +\begin{footnotesize} +\begin{verbatim} +%start expression_list; + +expression_list := expression SEMICOLON + | expression_list expression SEMICOLON; + +expression := expression PLUS expression + | expression TIMES expression + | LEFT_PAREN expression RIGHT_PAREN + | INT_NUMBER; +\end{verbatim} +\end{footnotesize} +\end{block} +\begin{itemize} +\item \texttt{(1+3)*2; 4+5;} will pass the syntax analysis without error +\item \texttt{1++2;} will fail the syntax analysis, even though it has passed the lexical analysis +\end{itemize} +\end{frame} + +\begin{frame} +\frametitle{Syntax analysis} +\framesubtitle{In Dynare} +\begin{itemize} +\item The \texttt{mod} file grammar is described in \texttt{DynareBison.yy} +\item The grammar is transformed into C++ source code by the program \texttt{bison} +\item The grammar tells a story which looks like: + \begin{itemize} + \item A \texttt{mod} file is a list of statements + \item A statement can be a \texttt{var} statement, a \texttt{varexo} statement, a \texttt{model} block, an \texttt{initval} block, ... + \item A \texttt{var} statement begins with the token \texttt{VAR}, then a list of \texttt{NAME}s, then a semicolon + \item A \texttt{model} block begins with the token \texttt{MODEL}, then a semicolon, then a list of equations separated by semicolons, then an \texttt{END} token + \item An equation can be either an expression, or an expression followed by an \texttt{EQUAL} token and another expression + \item An expression can be a \texttt{NAME}, or a \texttt{FLOAT\_NUMBER}, or an expression followed by a \texttt{PLUS} and another expression, ... + \end{itemize} +\end{itemize} +\end{frame} + + +\begin{frame} +\frametitle{Semantic actions} +\begin{itemize} +\item So far we have only described how to accept valid \texttt{mod} files and to reject others +\item But validating is not enough: one need to do something about what has been parsed +\item Each rule of the grammar can have a \alert{semantic action} associated to it: C/C++ code enclosed in curly braces +\item Each rule can return a semantic value (referenced to by \texttt{\$\$} in the action) +\item In the action, it is possible to refer to semantic values returned by components of the rule (using \texttt{\$1}, \texttt{\$2}, ...) +\end{itemize} +\end{frame} + +\begin{frame}[fragile] +\frametitle{Semantic actions} +\framesubtitle{An example} +\begin{block}{A simple calculator which prints its results} +\begin{footnotesize} +\begin{verbatim} +%start expression_list +%type <int> expression + +expression_list := expression SEMICOLON + { cout << $1; } + | expression_list expression SEMICOLON + { cout << $2; }; + +expression := expression PLUS expression + { $$ = $1 + $3; } + | expression TIMES expression + { $$ = $1 * $3; } + | LEFT_PAREN expression RIGHT_PAREN + { $$ = $2; } + | INT_NUMBER + { $$ = $1; }; +\end{verbatim} +\end{footnotesize} +\end{block} +\end{frame} + +\begin{frame} +\frametitle{Parsing driver} + +The class \texttt{ParsingDriver} has the following roles: +\begin{itemize} +\item Given the \texttt{mod} filename, it opens the file and launches the lexical and syntaxic analyzers on it +\item It implements most of the semantic actions of the grammar +\item By doing so, it creates an object of type \texttt{ModFile}, which is the data structure representing the \texttt{mod} file +\item Or, if there is a parsing error (unknown keyword, undeclared symbol, syntax error), it displays the line and column numbers where the error occurred, and exits +\end{itemize} +\end{frame} + +\section{Data structure representing a \texttt{mod} file} + +\begin{frame} + \frametitle{The \texttt{ModFile} class} + \begin{itemize} + \item This class is the internal data structure used to store all the informations contained in a \texttt{mod} file + \item One instance of the class represents one \texttt{mod} file + \item The class contains the following elements (as class members): + \begin{itemize} + \item a symbol table + \item a numerical constants table + \item two trees of expressions: one for the model, and one for the expressions outside the model + \item the list of the statements (parameter initializations, shocks block, \texttt{check}, \texttt{steady}, \texttt{simul}, ...) + \item an evaluation context + \end{itemize} + \item An instance of \texttt{ModFile} is the output of the parsing process (return value of \texttt{ParsingDriver::parse()}) + \end{itemize} +\end{frame} + +\begin{frame} + \frametitle{The symbol table (1/3)} + \begin{itemize} + \item A \alert{symbol} is simply the name of a variable, of a parameter or of a function unknown to the preprocessor: actually everything that is not recognized as a Dynare keyword + \item The \alert{symbol table} is a simple structure used to maintain the list of the symbols used in the \texttt{mod} file + \item For each symbol, stores: + \begin{itemize} + \item its name (a string) + \item its type (an integer) + \item a unique integer identifier (unique for a given type, but not across types) + \end{itemize} + \end{itemize} +\end{frame} + +\begin{frame} + \frametitle{The symbol table (2/3)} + Existing types of symbols: + \begin{itemize} + \item Endogenous variables + \item Exogenous variables + \item Exogenous deterministic variables + \item Parameters + \item Local variables inside model: declared with a pound sign (\#) construction + \item Local variables outside model: no declaration needed, not interpreted by the preprocessor (\textit{e.g.} Matlab loop indexes) + \item Names of functions unknown to the preprocessor: no declaration needed, not interpreted by the preprocessor, only allowed outside model (until we create an interface for providing custom functions with their derivatives) + \end{itemize} +\end{frame} + +\begin{frame} + \frametitle{The symbol table (2/3)} + \begin{itemize} + \item Symbol table filled in: + \begin{itemize} + \item using the \texttt{var}, \texttt{varexo}, \texttt{varexo\_det}, \texttt{parameter} declarations + \item using pound sign (\#) constructions in the model block + \item on the fly during parsing: local variables outside models or unknown functions when an undeclared symbol is encountered + \end{itemize} + \item Roles of the symbol table: + \begin{itemize} + \item permits parcimonious and more efficient representation of expressions (no need to duplicate or compare strings, only handle a pair of integers) + \item ensures that a given symbol is used with only one type + \end{itemize} + \end{itemize} +\end{frame} + +\begin{frame} + \frametitle{Expression trees (1/2)} + \begin{itemize} + \item The data structure used to store expressions is essentially a \alert{tree} + \item Graphically, the tree representation of $(1+z)*\log(y)$ is: + \begin{center} + \includegraphics[width=4cm]{expr.png} + \end{center} + \item No need to store parentheses + \item Each circle represents a \alert{node} + \item A node has at most one parent and at most two children + \end{itemize} +\end{frame} + +\begin{frame} + \frametitle{Expression trees (2/2)} + \begin{itemize} + \item In Dynare preprocessor, a tree node is a represented by an instance of the abstract class \texttt{ExprNode} + \item This class has 5 sub-classes, corresponding to the 5 types of nodes: + \begin{itemize} + \item \texttt{NumConstNode} for constant nodes: contains the identifier of the numerical constants it represents + \item \texttt{VariableNode} for variable/parameters nodes: contains the identifier of the variable or parameter it represents + \item \texttt{UnaryOpNode} for unary operators (\textit{e.g.} unary minus, $\log$, $\sin$): contains an integer representing the operator, and a pointer to its child + \item \texttt{BinaryOpNode} for binary operators (\textit{e.g.} $+$, $*$, pow): contains an integer representing the operator, and pointers to its two children + \item \texttt{UnknownFunctionNode} for functions unknown to the parser (\textit{e.g.} user defined functions): contains the identifier of the function name, and a vector containing an arbitrary number of children (the function arguments) + \end{itemize} + \end{itemize} +\end{frame} + +\begin{frame} + \frametitle{Classes \texttt{DataTree} and \texttt{ModelTree}} + \begin{itemize} + \item Class \texttt{DataTree} is a container for storing a set of expression trees + \item Class \texttt{ModelTree} is a sub-class of \texttt{DataTree}, specialized for storing a set of model equations (among other things, contains symbolic derivation algorithm) + \item Class \texttt{ModFile} contains: + \begin{itemize} + \item one instance of \texttt{ModelTree} for storing the equations of model block + \item one instance of \texttt{DataTree} for storing all expressions outside model block + \end{itemize} + \item Expression storage is optimized through three mechanisms: + \begin{itemize} + \item pre-computing of numerical constants + \item symbolic simplification rules + \item sub-expression sharing + \end{itemize} + \end{itemize} +\end{frame} + +\begin{frame} + \frametitle{Constructing expression trees} + \begin{itemize} + \item Class \texttt{DataTree} contains a set of methods for constructing expression trees + \item Construction is done bottom-up, node by node: + \begin{itemize} + \item one method for adding a constant node (\texttt{AddPossiblyNegativeConstant(double)}) + \item one method for a log node (\texttt{AddLog(arg)}) + \item one method for a plus node (\texttt{AddPlus(arg1, arg2)}) + \end{itemize} + \item These methods take pointers to \texttt{ExprNode}, allocate the memory for the node, construct it, and return its pointer + \item These methods are called: + \begin{itemize} + \item from \texttt{ParsingDriver} in the semantic actions associated to the parsing of expressions + \item during symbolic derivation, to create derivatives expressions + \end{itemize} + \item Note that \texttt{NodeID} is an alias (typedef) for \texttt{ExprNode*} + \end{itemize} +\end{frame} + +\begin{frame} + \frametitle{Reduction of constants and symbolic simplifications} + \begin{itemize} + \item The construction methods compute constants whenever it is possible + \begin{itemize} + \item Suppose you ask to construct the node $1+1$ + \item The \texttt{AddPlus()} method will return a pointer to a constant node containing 2 + \end{itemize} + \item The construction methods also apply a set of simplification rules, such as: + \begin{itemize} + \item $0+0=0$ + \item $x+0 = x$ + \item $0-x = -x$ + \item $-(-x) = x$ + \item $x*0 = 0$ + \item $x/1 = x$ + \item $x^0 = 1$ + \end{itemize} + \item When a simplification rule applies, no new node is created + \end{itemize} +\end{frame} + +\begin{frame} + \frametitle{Sub-expression sharing (1/2)} + \begin{itemize} + \item Consider the two following expressions: $(1+z)*\log(y)$ and $2^{(1+z)}$ + \item Expressions share a common sub-expression: $1+z$ + \item The internal representation of these expressions is: + \begin{center} + \includegraphics[width=6cm]{expr-sharing.png} + \end{center} + \end{itemize} +\end{frame} + +\begin{frame} + \frametitle{Sub-expression sharing (2/2)} + \begin{itemize} + \item Construction methods implement a simple algorithm which achieves maximal expression sharing + \item Algorithm uses the fact that each node has a unique memory address (pointer to the corresponding instance of \texttt{ExprNode}) + \item It maintains 5 tables which keep track of the already constructed nodes: one table by type of node (constants, variables, unary ops, binary ops, unknown functions) + \item Suppose you want to create the node $e_1+e_2$ (where $e_1$ and $e_2$ are sub-expressions): + \begin{itemize} + \item the algorithm searches the binary ops table for the tuple equal to (address of $e_1$, address of $e_2$, op code of +) (it is the \alert{search key}) + \item if the tuple is found in the table, the node already exists, and its memory address is returned + \item otherwise, the node is created, and is added to the table with its search key + \end{itemize} + \item Maximum sharing is achieved, because expression trees are constructed bottom-up + \end{itemize} +\end{frame} + +\begin{frame} + \frametitle{Final remarks about expressions} + \begin{itemize} + \item Storage of negative constants + \begin{itemize} + \item class \texttt{NumConstNode} only accepts positive constants + \item a negative constant is stored as a unary minus applied to a positive constant + \item this is a kind of identification constraint to avoid having two ways of representing negative constants: $(-2)$ and $-(2)$ + \end{itemize} + \item Widely used constants + \begin{itemize} + \item class \texttt{DataTree} has attributes containing pointers to one, zero, and minus one constants + \item these constants are used in many places (in simplification rules, in derivation algorithm...) + \item sub-expression sharing algorithm ensures that those constants will never be duplicated + \end{itemize} + \end{itemize} +\end{frame} + +\begin{frame} + \frametitle{List of statements} + \begin{itemize} + \item A statement is represented by an instance of a subclass of the abstract class \texttt{Statement} + \item Three groups of statements: + \begin{itemize} + \item initialization statements (parameter initialization with $p = \ldots$, \texttt{initval}, \texttt{histval} or \texttt{endval} block) + \item shocks blocks + \item computing tasks (\texttt{check}, \texttt{simul}, ...) + \end{itemize} + \item Each type of statement has its own class (\textit{e.g.} \texttt{InitValStatement}, \texttt{SimulStatement}, ...) + \item The class \texttt{ModFile} stores a list of pointers of type \texttt{Statement*}, corresponding to the statements of the \texttt{mod} file, in their order of declaration + \item Heavy use of polymorphism in the check pass, computing pass, and when writing outputs: abstract class \texttt{Statement} provides a virtual method for these 3 actions + \end{itemize} +\end{frame} + +\begin{frame} + \frametitle{Evaluation context} + \begin{itemize} + \item The \texttt{ModFile} class contains an \alert{evaluation context} + \item It is a map associating a numerical value to some symbols + \item Filled in with \texttt{initval} block, and with parameters initializations + \item Used during equation normalization (in the block decomposition), for finding non-zero entries in the jacobian + \end{itemize} +\end{frame} + +\section{Check pass} + +\begin{frame} + \frametitle{Error checking during parsing} + \begin{itemize} + \item Some errors in the \texttt{mod} file can be detected during the parsing: + \begin{itemize} + \item syntax errors + \item use of undeclared symbol in model block, initval block... + \item use of a symbol incompatible with its type (\textit{e.g.} parameter in initval, local variable used both in model and outside model) + \item multiple shocks declaration for the same variable + \end{itemize} + \item But some other checks can only be done when parsing is completed + \end{itemize} +\end{frame} + +\begin{frame} + \frametitle{Check pass} + \begin{itemize} + \item The check pass is implemented through method \texttt{ModFile::checkPass()} + \item Does the following checks: + \begin{itemize} + \item check there is at least one equation in the model (except if doing a standalone BVAR estimation) + \item check there is not both a \texttt{simul} and a \texttt{stoch\_simul} (or another command triggering local approximation) + \end{itemize} + \item Other checks could be added in the future, for example: + \begin{itemize} + \item check that every endogenous variable is used at least once in current period + \item check there is a single \texttt{initval} (or \texttt{histval}, \texttt{endval}) block + \item check that \texttt{varobs} is used if there is an estimation + \end{itemize} + \end{itemize} +\end{frame} + +\section{Computing pass} + +\begin{frame} + \frametitle{Overview of the computing pass} + \begin{itemize} + \item Computing pass implemented in \texttt{ModFile::computingPass()} + \item Begins with a determination of which derivatives to compute + \item Then, calls \texttt{ModelTree::computingPass()}, which computes: + \begin{itemize} + \item leag/lag variable incidence matrix + \item symbolic derivatives + \item equation normalization + block decomposition (only in \texttt{sparse\_dll} mode) + \item temporary terms + \item symbolic gaussian elimination (only in \texttt{sparse\_dll} mode) \textit{(actually this is done in the output writing pass, but should be moved to the computing pass)} + \end{itemize} + \item Finally, calls \texttt{Statement::computingPass()} on all statements + \end{itemize} +\end{frame} + +\begin{frame} + \frametitle{The variable table} + \begin{itemize} + \item In the context of class \texttt{ModelTree}, a \alert{variable} is a pair (symbol, lead/lag) + \item The symbol must correspond to an endogenous or exogenous variable (in the sense of the model) + \item The class \texttt{VariableTable} keeps track of those pairs + \item An instance of \texttt{ModelTree} contains an instance of \texttt{VariableTable} + \item Each pair (\texttt{symbol\_id}, lead/lag) is given a unique \texttt{variable\_id} + \item After the computing pass, the class \texttt{VariableTable} writes the leag/lag incidence matrix: + \begin{itemize} + \item endogenous symbols in row + \item leads/lags in column + \item elements of the matrix are either 0 or correspond to a variable ID, depending on whether the pair (symbol, lead/lag) is used or not in the model + \end{itemize} + \end{itemize} +\end{frame} + +\begin{frame} + \frametitle{Static versus dynamic model} + \begin{itemize} + \item The static model is simply the (dynamic) model from which the leads/lags have been omitted + \item Static model used to characterize the steady state + \item The jacobian of the static model is used in the (Matlab) solver for determining the steady state + \item No need to derive static and dynamic models independently: \\ + static derivatives can be easily deduced from dynamic derivatives + \end{itemize} + \begin{block}{Example} + \begin{itemize} + \item suppose dynamic model is $2x \cdot x_{-1} = 0$ + \item static model is $2x^2 = 0$, whose derivative w.r. to $x$ is $4x$ + \item dynamic derivative w.r. to $x$ is $2x_{-1}$, and w.r. to $x_{-1}$ is $2x$ + \item removing leads/lags from dynamic derivatives and summing over the two partial derivatives w.r. to $x$ and $x_{-1}$ gives $4x$ + \end{itemize} + \end{block} +\end{frame} + +\begin{frame} + \frametitle{Which derivatives to compute ?} + \begin{itemize} + \item In deterministic mode: + \begin{itemize} + \item static jacobian (w.r. to endogenous variables only) + \item dynamic jacobian (w.r. to endogenous variables only) + \end{itemize} + \item In stochastic mode: + \begin{itemize} + \item static jacobian (w.r. to endogenous variables only) + \item dynamic jacobian (w.r. to all variables) + \item possibly dynamic hessian (if \texttt{order} option $\geq 2$) + \item possibly dynamic 3rd derivatives (if \texttt{order} option $\geq 3$) + \end{itemize} + \item For ramsey policy: the same as above, but with one further order of derivation than declared by the user with \texttt{order} option (the derivation order is determined in the check pass, see \texttt{RamseyPolicyStatement::checkPass()}) + \end{itemize} +\end{frame} + +\begin{frame} + \frametitle{Derivation algorithm (1/2)} + \begin{itemize} + \item Derivation of the model implemented in \texttt{ModelTree::derive()} + \item Simply calls \texttt{ExprNode::getDerivative(varID)} on each equation node + \item Use of polymorphism: + \begin{itemize} + \item for a constant or variable node, derivative is straightforward (0 or 1) + \item for a unary or binary op node, recursively calls method \texttt{getDerivative()} on children to construct derivative of parent, using usual derivation rules, such as: + \begin{itemize} + \item $(log(e))' = \frac{e'}{e}$ + \item $(e_1 + e_2)' = e'_1 + e'_2$ + \item $(e_1 \cdot e_2)' = e'_1\cdot e_2 + e_1\cdot e'_2$ + \item $\ldots$ + \end{itemize} + \end{itemize} + \end{itemize} +\end{frame} + +\begin{frame} + \frametitle{Derivation algorithm (2/2)} + \framesubtitle{Optimizations} + \begin{itemize} + \item Caching of derivation results + \begin{itemize} + \item method \texttt{ExprNode::getDerivative(varID)} memorizes its result in a member attribute the first time it is called + \item so that the second time it is called (with the same argument), simply returns the cached value without recomputation + \item caching is useful because of sub-expression sharing + \end{itemize} + \pause + \item Symbolic \textit{a priori} + \begin{itemize} + \item consider the expression $x+y^2$ + \item without any computation, you know its derivative w.r. to $z$ is zero + \item each node stores in an attribute the set of variables which appear in the expression it represents ($\{x,y\}$ in the example) + \item that set is computed in the constructor (straigthforwardly for a variable or a constant, recursively for other nodes, using the sets of the children) + \item when \texttt{getDerivative(varID)} is called, immediately returns zero if \texttt{varID} is not in that set + \end{itemize} + \end{itemize} +\end{frame} + +\begin{frame}[fragile] + \frametitle{Temporary terms (1/2)} + \begin{itemize} + \item When the preprocessor writes equations and derivatives in its outputs, it takes advantage of sub-expression sharing + \item In Matlab static and dynamic output files, equations are preceded by a list of \alert{temporary terms} + \item Those terms are temporary variables containing expressions shared by several equations or derivatives + \item Doing so greatly enhances the computing speed of model residual, jacobian or hessian + \end{itemize} + \begin{block}{Example} + \begin{columns}[t] + \begin{column}{6cm} + The equations: +\begin{verbatim} +residual(0)=x+y^2-z^3; +residual(1)=3*(x+y^2)+1; +\end{verbatim} + \end{column} + \begin{column}{4.8cm} + Can be optimized in: +\begin{verbatim} +T01=x+y^2; +residual(0)=T01-z^3; +residual(1)=3*T01+1; +\end{verbatim} + \end{column} + \end{columns} + \end{block} +\end{frame} + +\begin{frame} + \frametitle{Temporary terms (2/2)} + \begin{itemize} + \item Expression storage in the preprocessor implements maximal sharing... + \item ...but it is not optimal for the Matlab output files, because creating a temporary variable also has a cost (in terms of CPU and of memory) + \item Computation of temporary terms implements a trade-off between: + \begin{itemize} + \item cost of duplicating sub-expressions + \item cost of creating new variables + \end{itemize} + \item Algorithm uses a recursive cost calculation, which marks some nodes as being ``temporary'' + \item \textit{Problem}: redundant with optimizations done by the C/C++ compiler (when Dynare is in DLL mode) $\Rightarrow$ compilation very slow on big models + \end{itemize} +\end{frame} + +\begin{frame} + \frametitle{The special case of Ramsey policy} + \begin{itemize} + \item For most statements, the method \texttt{computingPass()} is a no-op... + \item ...except for \texttt{planner\_objective} statement, which serves to declare planner objective when doing optimal policy under commitment + \item Class \texttt{PlannerObjectiveStatement} contains an instance of \texttt{ModelTree}: used to store the objective (only one equation in the tree) + \item During the computing pass, triggers the computation of the first and second order (static) derivatives of the objective + \end{itemize} +\end{frame} + +\section{Writing outputs} + +\begin{frame} + \frametitle{Output overview} + \begin{itemize} + \item Implemented in \texttt{ModFile::writeOutputFiles()} + \item If \texttt{mod} file is \texttt{model.mod}, all created filenames will begin with \texttt{model} + \item Main output file is \texttt{model.m}, containing: + \begin{itemize} + \item general initialization commands + \item symbol table output (from \texttt{SymbolTable::writeOutput()}) + \item lead/lag incidence matrix (from \texttt{ModelTree::writeOutput()}) + \item call to Matlab functions corresponding to the statements of the \texttt{mod} file (written by calling \texttt{Statement::writeOutput()} on all statements through polymorphism) + \end{itemize} + \item Subsidiary output files: + \begin{itemize} + \item one for the static model + \item one for the dynamic model + \item and one for the planner objective (if relevant) + \item written through \texttt{ModelTree} methods: \texttt{writeStaticFile()} and \texttt{writeDynamicFile()} + \end{itemize} + \end{itemize} +\end{frame} + +\begin{frame} + \frametitle{Model output files} + Three possibles modes for \texttt{ModelTree} (see \texttt{mode} attribute): + \begin{itemize} + \item Standard mode: static and dynamic files in Matlab + \item DLL mode: + \begin{itemize} + \item static and dynamic files in C++ source code (with corresponding headers) + \item compiled through \texttt{mex} to allow execution from within Matlab + \end{itemize} + \item Sparse DLL mode: + \begin{itemize} + \item static file in Matlab + \item two possibilities for dynamic file: + \begin{itemize} + \item by default, a C++ source file (with header) and a binary file, to be read from the C++ code + \item or, with \texttt{no\_compiler} option, a binary file in custom format, executed from Matlab through \texttt{simulate} DLL + \item the second option serves to bypass compilation of C++ file which can be very slow + \end{itemize} + \end{itemize} + \end{itemize} +\end{frame} + +\section{Conclusion} + +\begin{frame} + \frametitle{Future work (1/2)} + \framesubtitle{Enhancements, optimizations} + \begin{itemize} + \item Refactor and reorganize some portions of the code + \item Create a testsuite (with unitary tests) + \item Separate computation of temporary terms between static and dynamic outputs + \item Enhance sub-expression sharing algorithm (using associativity, commutativity and factorization rules) + \item Add many checks on the structure of the \texttt{mod} file + \end{itemize} +\end{frame} + +\begin{frame} + \frametitle{Future work (2/2)} + \framesubtitle{Features} + \begin{itemize} + \item Add precompiler macros (\#include, \#define, \#if) + \item Add handling for several (sub-)models + \item Add indexed variables and control statements (if, loops) both in models and command language + \item Add sum, diff, prod operators + \item For unknown functions in the model: let user provide a derivative, or trigger numerical derivation + \item Generalize binary code output + \item Generalize block decomposition ? + \end{itemize} +\end{frame} + + + +\end{document}