Initial commit (TFM Carlos)

This commit is contained in:
Carlos Galindo 2019-10-18 08:54:33 +00:00
commit d7d30674fe
24 changed files with 889 additions and 0 deletions

13
Makefile Normal file
View File

@ -0,0 +1,13 @@
all: paper.pdf
images:
$(MAKE) -C img
paper.pdf: paper.tex images
pdflatex -synctex=1 -interaction=nonstopmode paper.tex
pdflatex -synctex=1 -interaction=nonstopmode paper.tex
clean:
rm -f *.toc *.aux *.bbl *.blg *.fls *.out *.log *.synctex.gz *.dot
rm -rf tmp
$(MAKE) -C img clean

78
bibliography.tex Normal file
View File

@ -0,0 +1,78 @@
\begin{thebibliography}{99}
\bibitem{weiser79}
Mark D. Weiser.
\textsl{Program Slices: Formal, Psychological, and Practical Investigations of an Automatic Program Abstraction Method.}
1979.
\bibitem{sinha98}
Saurabh Sinha, Mary Jean Harrold.
\textsl{Analysis of Programs with Exception-Handling Constructs.}
1998.
\bibitem{sinha99}
Saurabh Sinha, Mary Jean Harrold, Gregg Rothermel.
\textsl{System-Dependence-Graph-Based Slicing of Programs With Arbitrary Interprocedural Control Flow.}
1999.
\bibitem{sinha00}
Saurabh Sinha, Mary Jean Harrold.
\textsl{Analysis and Testing of Programs with Exception-Handling Constructs.}
2000.
\bibitem{allen03}
Mathew Allen, Susan Horwitz.
\textsl{Slicing Java Programs That Throw and Catch Exceptions.}
2003.
\bibitem{jo04}
Jang-Wu Jo, Byeong-Mo Chang.
\textsl{Constructing Control Flow Graph for Java by Decoupling Exception Flow from Normal Flow.}
2004.
\bibitem{jiang06}
Shujuan Jiang, Shengwu Zhou, Yuqin Shi, Yuanpeng Jiang.
\textsl{Improving the Preciseness of Dependence Analysis using Exception Analysis.}
2006.
\bibitem{jiang07}
Shujuan Jiang, Yuanpeng Jiang.
\textsl{An Analysis Approach for Testing Exception Handling Programs.}
2007.
\bibitem{dooren07}
Marko van Dooren, Eric Steegmans.
\textsl{Combining the Robustness of Checked Exceptions with the Flexibility of Unchecked Exceptions using Anchored Exception Declarations.}
2007.
\bibitem{xu07}
Guoqing Xu, Atanas Rountev.
\textsl{Data-flow and Control-flow Analysis of AspectJ Software for Program Slicing.}
2007.
\bibitem{corbat08}
Thomas Corbat.
\textsl{Dependence Graphs for Slicing and Refactoring.}
2008.
\bibitem{qui10}
Xiang Qiu, Li Zhang, Xiaoli Lian.
\textsl{Static Analysis for Java Exception Propagation Structure.}
2010.
\bibitem{prabhu11}
Prakash Prabhu, Naoto Maeda, Gogul Balakrishnan, Franjo Ivančić, Aarti Gupta.
\textsl{Interprocedural Exception Analysis for C++.}
2011.
\bibitem{jie11}
Hao Jie, Jiang Shu-juan.
\textsl{An Approach of Slicing for Object-oriented Language with Exception Handling.}
2011.
\bibitem{chang15}
Byeong-Mo Chang, Kwanghoon Choi.
\textsl{A review on exception analysis.}
2015.
%\bibitem{citekey}
\end{thebibliography}

9
img/Makefile Normal file
View File

@ -0,0 +1,9 @@
dotfiles = $(shell ls *.dot | sed 's/\.dot/\.pdf/' -)
all: $(dotfiles)
%.pdf: %.dot
dot -Tpdf $< -o $@
clean:
rm -f *.pdf

8
img/breakcfg.dot Normal file
View File

@ -0,0 +1,8 @@
digraph g {
Start [shape=box];
End [shape=box];
Start -> End [style=dashed];
Start -> "int a = 1" -> "while (a > 0)" -> "if (a > 10)" -> "break" -> "print(a)";
"break" -> "a++" [style=dashed];
"if (a > 10)" -> "a++" -> "while (a > 0)" -> "print(a)" -> End;
}

22
img/breakpdg.dot Normal file
View File

@ -0,0 +1,22 @@
digraph g {
"f()" [shape=box, rank=min];
// Rank adjustment
{ rank = same; "int a = 1"; "while (a > 0)"; }
{ rank = same; "print(a)"; "a++"; }
{ rank = max; "a++"; "print(a)"; }
// Control flow
"f()" -> "int a = 1" [style=bold];
"f()" -> "while (a > 0)" [style=bold];
"while (a > 0)" -> "if (a > 10)" [style=bold];
"if (a > 10)" -> "break" [style=bold];
"break" -> "print(a)" [style=bold];
"break" -> "a++" [style=bold];
"break" -> "while (a > 0)" [style=bold];
// Data flow
"int a = 1" -> "while (a > 0)" [color=red];
"int a = 1" -> "if (a > 10)" [color=red];
"int a = 1" -> "print(a)" [color=red];
"a++" -> "a++" -> "while (a > 0)" [color=red];
"a++" -> "if (a > 10)" [color=red];
"a++" -> "print(a)" [color=red];
}

6
img/cfgsimple.dot Normal file
View File

@ -0,0 +1,6 @@
digraph g {
Start [shape=box];
End [shape=box];
f [label=<x_in = a<br/>y_in = b<br/>f (a, b)<br/>b = x_out>]
Start -> "a = 10" -> "b = 20" -> f -> "print(a)" -> End;
}

5
img/cfgsimple2.dot Normal file
View File

@ -0,0 +1,5 @@
digraph g {
s [shape=box,label=<Start<br/>x = x_in<br/>y = y_in>];
End [shape=box,label=<x_out = x<br/>End>];
s -> "while (x > y)" -> "x = x - 1" -> "while (x > y)" -> "print(x)" -> End;
}

42
img/legendsimple.dot Normal file
View File

@ -0,0 +1,42 @@
digraph g {
rankdir=LR
node [shape=plaintext]
subgraph cluster_01 {
label = "Legend";
key [label=<<table border="0" cellpadding="2" cellspacing="0" cellborder="0">
<tr><td align="right" port="i1">Control dependency</td></tr>
<tr><td align="right" port="i2">Data dependency</td></tr>
<tr><td align="right" port="i3">Call, param-in, param-out</td></tr>
<tr><td align="right" port="i4">Summary</td></tr>
</table>>]
key2 [label=<<table border="0" cellpadding="2" cellspacing="0" cellborder="0">
<tr><td port="i1">&nbsp;</td></tr>
<tr><td port="i2">&nbsp;</td></tr>
<tr><td port="i3">&nbsp;</td></tr>
<tr><td port="i4">&nbsp;</td></tr>
</table>>]
key:i1:e -> key2:i1:w [style=bold]
key:i2:e -> key2:i2:w [color=red]
key:i3:e -> key2:i3:w [style=dashed]
key:i4:e -> key2:i4:w [color=blue]
}
}

5
img/multiplycfg.dot Normal file
View File

@ -0,0 +1,5 @@
digraph g {
Start [shape=box];
End [shape=box];
Start -> "int result = 0" -> "while (x > 0)" -> "result += y" -> "x--" -> "while (x > 0)" -> "System.out.println(result)" -> "return result" -> "End";
}

26
img/multiplypdg.dot Normal file
View File

@ -0,0 +1,26 @@
digraph g { "multiply()" [shape=box, rank=min];
// Rank adjustment
{ rank = same; "int result = 0"; "while (x > 0)"; "System.out.println(result)"; "return result"; }
{ rank = same; "result += y"; "x--"; }
// Control flow
"multiply()" -> "int result = 0" [style=bold];
"multiply()" -> "while (x > 0)" [style=bold];
"multiply()" -> "System.out.println(result)" [style=bold];
"multiply()" -> "return result" [style=bold];
"while (x > 0)" -> "result += y" [style=bold];
"while (x > 0)" -> "x--" [style=bold];
// Data flow
"int result = 0" -> "result += y" [color=red];
"int result = 0" -> "System.out.println(result)" [color=red];
"int result = 0" -> "return result" [color=red];
"result += y" -> "result += y" [color=red];
"result += y" -> "System.out.println(result)" [color=red];
"result += y" -> "return result" [color=red];
"x--" -> "x--" [color=red];
"x--" -> "while (x > 0)" [color=red];
// Order adjustment
"int result = 0" -> "while (x > 0)" [style=invis];
"while (x > 0)" -> "System.out.println(result)" [style=invis];
"System.out.println(result)" -> "return result" [style=invis];
"result += y" -> "x--" [style=invis];
}

52
img/multiplysdg.dot Normal file
View File

@ -0,0 +1,52 @@
digraph g {
"main()" [shape=box];
"main()" -> "multiply(3, 2)" [style=bold];
{ rank = same; "x"; "y"; "out" }
"x" -> "y" [style=invis];
"y" -> "out" [style=invis];
"multiply(3, 2)" -> "multiply()" [style=dotted];
"multiply(3, 2)" -> "x";
"multiply(3, 2)" -> "y";
"multiply(3, 2)" -> "out";
"x" -> "x_in" [style=dotted];
"y" -> "y_in" [style=dotted];
"output" -> "out" [style=dotted];
"x" -> "out" [style=bold, color=blue];
"y" -> "out" [style=bold, color=blue];
"multiply()" [shape=box];
// Rank adjustment
{ rank = same; "x_in"; "y_in"; "output" }
{ rank = same; "int result = 0"; "while (x > 0)"; "System.out.println(result)"; "return result"; }
{ rank = same; "result += y"; "x--"; }
// Input/output
"multiply()" -> "x_in";
"multiply()" -> "y_in";
"multiply()" -> "output"
"x_in" -> "while (x > 0)" [color=red];
"x_in" -> "x--" [color=red];
"y_in" -> "result += y" [color=red];
"return result" -> "output" [color=red];
// Control flow
"multiply()" -> "int result = 0" [style=bold];
"multiply()" -> "while (x > 0)" [style=bold];
"multiply()" -> "System.out.println(result)" [style=bold];
"multiply()" -> "return result" [style=bold];
"while (x > 0)" -> "result += y" [style=bold];
"while (x > 0)" -> "x--" [style=bold];
// Data flow
"int result = 0" -> "result += y" [color=red];
"int result = 0" -> "System.out.println(result)" [color=red];
"int result = 0" -> "return result" [color=red];
"result += y" -> "result += y" [color=red];
"result += y" -> "System.out.println(result)" [color=red];
"result += y" -> "return result" [color=red];
"x--" -> "x--" [color=red];
"x--" -> "while (x > 0)" [color=red];
// Order adjustment
"int result = 0" -> "while (x > 0)" [style=invis];
"while (x > 0)" -> "System.out.println(result)" [style=invis];
"System.out.println(result)" -> "return result" [style=invis];
"result += y" -> "x--" [style=invis];
"x_in" -> "y_in" [style=invis];
}

30
img/pdgsimple.dot Normal file
View File

@ -0,0 +1,30 @@
digraph g {
Start [shape=box];
l2 [label="a = 10"];
l3 [label="b = 20"];
l4 [label="f(a, b)"];
p1 [label="x_in = a"];
p2 [label="y_in = b"];
p3 [label="a = x_out"];
l5 [label="print(a)"];
// Rank
{ rank = same; l2; l3; l4; l5; }
{ rank = min; Start; }
{ rank = same; p1; p2; p3; }
// Control
{ edge [style = bold];
Start -> { l2 l3 l4 l5 };
l4 -> { p1 p2 p3 };
}
// Data
{ edge [color = red];
l2 -> p1;
l3 -> p2;
p3 -> l5;
}
// Order
{ edge [style = invis];
l2 -> l3 -> l4 -> l5;
p1 -> p2 -> p3;
}
}

27
img/pdgsimple2.dot Normal file
View File

@ -0,0 +1,27 @@
digraph g {
Start [shape=box];
s1 [label="x = x_in"];
s0 [label="y = y_in"];
s2 [label="while (x > y)"];
s3 [label="x = x + 1"];
s4 [label="print(x)"];
s5 [label="x_out = x"];
// Rank
{ rank=same; s0; s1; s5; }
{ rank=same; s2; s4; }
{ rank=min; Start; }
s0 -> s2 [style=invis];
// Control
{
edge [style = bold];
Start -> {s0 s1 s2 s4 s5};
s2 -> s3;
}
// Data
{
edge [color = red];
edge [constraint = false];
{s1 s3} -> {s2 s3 s4 s5};
s0 -> s2;
}
}

50
img/sdgsimple.dot Normal file
View File

@ -0,0 +1,50 @@
digraph g {
subgraph {
l1; l2; l3; l4; l5;
"x_in = a"; "y_in = b"; "a = x_out";
}
subgraph {
l8; l9; l10; l12;
"x = x_in"; "y = y_in"; "x_out = x";
}
l1 [label="main()"];
l2 [label="a = 10"];
l3 [label="b = 20"];
l4 [label="f(a, b)"];
l5 [label="print(a)"];
l8 [label="f()"];
l9 [label="while (x > y)"];
l10 [label="x = x + 1"];
l12 [label="print(x)"];
// Rank
{ rank = same; l9; l12; }
// s0 -> s2 [style=invis];
// Control
{
edge [style = bold];
l1 -> {l2 l3 l4 l5};
l4 -> {"x_in = a" "y_in = b" "a = x_out"};
l8 -> {"x = x_in" "y = y_in" l9 l12 "x_out = x"};
l9 -> l10;
}
// Data
{
edge [color = red];
edge [constraint = false];
l2 -> "x_in = a";
l3 -> "y_in = b";
"a = x_out" -> l5;
{"x = x_in" l10} -> {l9 l10 l12 "x_out = x"};
"y = y_in" -> l9;
}
{
edge [style=dashed];
edge [constraint=false];
"x_in = a" -> "x = x_in";
"y_in = b" -> "y = y_in";
l4 -> l8 [constraint=true];
"x_out = x" -> "a = x_out";
}
{edge [color=blue,constraint=false]; {"x_in = a" "y_in = b"} -> "a = x_out"}
{edge [style=invis]; "y_in = b" -> l8; "y = y_in" -> l9; }
}

122
incremental_slicing.tex Normal file
View File

@ -0,0 +1,122 @@
\chapter{Main explanation?}
\section{First definition of the SDG}
The system dependence graph (SDG) is a method for program slicing that was first proposed by Horwitz, Reps and Blinkey \cite{horwitz90}. It builds upon the existing control flow graph (CFG), defining dependencies between vertices of the CFG, and building a program dependence graph (PDG), which represents them. The system dependence graph (SDG) is then build from the assembly of the different PDGs (each representing a method of the program), linking each method call to its corresponding definition. Because each graph is built from the previous one, new constructs can be added with to the CFG, without the need to alter the algorithm that converts CFG to PDG and then to SDG. The only modification possible is the redefinition of a dependency or the addition of new kinds of dependence.
The language covered by the initial proposal was a simple one, featuring procedures with modifiable parameters and basic instructions, including calls to procedures, variable assignments, arithmetic and logic operators and conditional instructions (branches and loops): the basic features of an imperative programming language. The control flow graph was as simple as the programs themselves, with each graph representing one procedure. The instructions of the program are represented as vertices of the graph and are split into two categories: statements, which have no effect on the control flow (assignments, procedure calls) and predicates, whose execution may lead to one of multiple ---though traditionally two--- paths (conditional instructions). Statements are connected sequentially to the next instruction. Predicates have two outgoing edges, each connected to the first statement that should be executed, according to the result of evaluating the conditional expression in the guard of the predicate.
To build the PDG and then the SDG, some dependencies must be extracted from the CFG, which are defined as follows:
\begin{definition}[Postdominance]
Vertex $b$ \textit{postdominates} vertex $b$ if and only if $a \neq b$ and $b$ is on every path from $a$ to the ``End'' vertex.
\end{definition}
\begin{definition}[Control dependency]
\label{def:ctrl-dep}
Vertex $b$ is \textit{control dependent} on vertex $a$ ($a \ctrldep b$) if and only if $b$ postdominates one but not all of $a$'s successors. It follows that a vertex with only one successor cannot be the source of control dependence.
\end{definition}
\begin{definition}[Data dependency]
Vertex $b$ is \textit{data dependent} on vertex $a$ ($a \datadep b$) if and only if $a$ may define a variable $x$, $b$ may use $x$ and there an $x$-definition free path from $a$ to $b$.\footnote{The initial definition of data dependency was further split into in-loop data dependencies and the rest, but the difference is not relevant for computing the slices in the SDG.}
\end{definition}
It should be noted that variable definitions and uses can be computed for each statement independently, analyzing the procedures called by it if necessary. In general, any instruction uses all variables that appear in it, save for the left-hand side of assignments. Similarly, no instruction defines variables, except those in the left-hand side of assignments. The variables used and defined by a procedure call are those used and defined by its body.
With the data and control dependencies, the PDG may be built, by replacing the edges from the CFG by data and control dependence edges. The first tends to be represented as a thin solid line, and the latter as a thick solid line. In the examples, data dependencies will be thin solid red lines.
The organization of the vertices of the PDG tends to resemble a tree graph, with the ``Start'' node in the position of the root (at the top), and the ``End'' node typically omitted. The control dependence edges structure the tree vertically. In the case that a vertex is control dependent on multiple vertices, it will be placed one level below the lowest source of control dependency. With a programming language this simple, cyclical control dependencies do not appear, but should they do so in further sections, the instructions are sorted top to bottom in the order they appear in the program. Horizontally, the vertices are sorted by their order in the program, left to right, in order to make the graph more readable. Data dependency edges are placed without reordering the nodes of the graph. In the examples given, edges like $a \datadep a$ or $b \ctrldep b$ may be omitted, as they are not relevant for later use of the graph. Please be noted that the location of the vertices is irrelevant for the slicing algorithm, and the aforementioned sorting rules are just for consistency with previous papers on the topic and to ease the visualization of programs.
Finally, the SDG is built from the combination of all the PDGs that compose the program. Each call vertex is connected to the ``Start'' of the corresponding procedure. All edges that connect PDGs are represented with dashed lines.
\begin{figure}
\begin{minipage}{0.3\linewidth}
\begin{lstlisting}
proc main() {
a = 10;
b = 20;
f(a, b);
print(a);
}
proc f(x, y) {
while (x > y) {
x = x - 1;
}
print(x);
}
\end{lstlisting}
\end{minipage}
\begin{minipage}{0.6\linewidth}
\includegraphics[width=0.3\linewidth]{img/cfgsimple}
\includegraphics[width=0.65\linewidth]{img/cfgsimple2}
\end{minipage}
\includegraphics[width=0.5\linewidth]{img/pdgsimple}
\includegraphics[width=0.49\linewidth]{img/pdgsimple2}
\includegraphics[width=0.6\linewidth]{img/sdgsimple}
\includegraphics[width=0.4\linewidth]{img/legendsimple}
\caption{A simple program with its CFGs (top right), PDGs (center) and SDG (bottom).}
\label{fig:sdg-loop}
\end{figure}
\subsubsection{Procedures and data dependencies}
The only thing left to explain before introducing more constructs into the language is the passing of parameters. Most programming language accept a variable number of input parameters and one output parameter. In the case of input parameters passed by reference, or constructs such as structs or classes, modifying a field of a parameter may modify the original variable. In order to deal with everything related to parameter passing, including global variables, class fields, etc. there is a small extension to be made to the CFG and PDG.
In the CFG, the ``Start'' and ``End'' nodes contain a list of assignments, inputting and outputting respectively the appropriate values, as can be seen in the example. Consequently, every vertex that contains a procedure or function call pack and unpack the arguments. For every variable $x$ that is used in a procedure, every call to it must be preceded by $x_{in} = x$, and the procedures's ``Start'' vertex must contain $x = x_{in}$. The opposite happens when a variable must be ``outputted''\todo{replace}: before the ``End'' node, the value must be packed ($x_{out} = x$), and after each call, the value must be assigned to the corresponding variable ($x = x_{out}$). Parameters may be assigned as $par^i_{in} = expr_i$ (where $i$ is the index of the parameter in the procedure definition, $par^i$ is the name of the parameter and $expr_i$ is the expression in the $i^{th}$ position in the procedure call) in the call vertex, and parameters whose modifications inside the procedure are passed back to the calling procedure must be extracted as $var = par^i_{out}$ (where $var$ is the name of the variable ---passed by reference--- in the calling procedure).\todo{What if object/struct passed by value?} As an addition, in the SDG, an extra edge is added (summary edge), which represents the dependencies that the input variables have on the outputs. This allows the algorithm to know the dependencies without traversing the corresponding function.
All these additions are added as extra lines in the ``Start'', ``End'' and calling vertices.
When building the PDG, all additions (variable assignments) are split into their own vertices, and are control dependent on them.
Data dependencies no longer flow throw the call vertex, but throw the appropriate child, which minimizes the size of the slice produced.
As an example, figure~\ref{fig:sdg-loop} shows the three stages of a program, from CFG to SDG.
The construction of the CFG is straight-forward, save for the packing and unpacking of variables in the start, end and call vertices.
In the PDG, the statements are split, control and data dependencies replace the control flow edges.
Finally, both PDGs are linked via call and parameter (input and output) edges, forming the SDG.
Summary edges are placed according to the data and control flow of the method call, and the graph is complete.
\section{Unconditional control flow}
Even though the initial definition of the SDG was useful to compute slices, the language covered was not enough for the typical language of the 1980's, which included (in one form or another) unconditional control flow.
Therefore, one of the first additions contributed to the algorithm to build system dependence graphs was the inclusion of unconditional jumps, such as ``break'', ``continue'', ``goto'' and ``return'' statements (or any other equivalent).
A naive representation would be to treat them the same as any other statement, but with the outgoing edge landing in the corresponding instruction (outside the loop, at the loop condition, at the method's end, etc.).
An alternative approach is to represent the instruction as an edge, not a vertex, connecting the previous statement with the next to be executed.
Both of these approaches fail to generate a control dependence from the unconditional jump, as the definition of control dependence (see Definition~\ref{def:ctrl-dep}) requires a vertex to have more than one successor for it to be possible to be a source of control dependence.
A possible ---but difficult--- solution would be to redefine control dependence, as some\todo{citation-needed} have done.
The most popular solution was proposed by Ball and Horwitz\cite{ball??}, and represents unconditional jumps as a predicate.
The true edge would lead to the next instruction to be executed, and the false edge would be non-executable or \textit{dummy} edges, connected to the instruction that would be executed were the unconditional jump a \textit{nop}.
The consequence of this solution is that every instruction placed after the unconditional jump is control dependent on the jump, as can be seen in Figure~\ref{fig:break-graphs}.
In the example, when slicing with respect to variable $a$ on line 5, every statement would be included, save for ``print(a)''.
Line 4 is not strictly necessary in this example ---in the context of weak slicing---, but is included nonetheless.
In the original paper, the transformation is proved to be complete, but not correct, as for some examples, the slice includes more unconditional jumps that would be strictly necessary, even for weak slicing.
Ball and Horwitz theorize that a more correct approach would be possible, if it weren't for the limitation of slices to be a subset of statements of the program, in the same order as in the original.
\begin{figure}
\centering
\begin{minipage}{0.3\linewidth}
\begin{lstlisting}
static void f() {
int a = 1;
while (a > 0) {
if (a > 10) break;
a++;
}
System.out.println(a);
}
\end{lstlisting}
\end{minipage}
\begin{minipage}{0.6\linewidth}
\includegraphics[width=0.4\linewidth]{img/breakcfg}
\includegraphics[width=0.59\linewidth]{img/breakpdg}
\end{minipage}
\caption{A program with unconditional control flow, its CFG (center) and PDG(right).}
\label{fig:break-graphs}
\end{figure}
\section{Exceptions}
As seen in section~\ref{sec:intro-exception}, exception handling adds two constructs: the \texttt{throw} and the \texttt{try-catch} statements. The first one resembles an unconditional control flow statement, with an unknown (on compile time) destination. The exception will be caught by a \texttt{catch} of the corresponding type or a supertype ---if it exists. , but polymorphism and inheritance make the analysis difficult.
\subsection{\texttt{throw} statement}
\subsection{\texttt{try-catch} statement}

97
introduction.tex Normal file
View File

@ -0,0 +1,97 @@
\chapter{Introduction}
\section{Program slicing}
\textsl{Program slicing} is a debugging technique which answers the question: ``which parts of a program affect a selected statement and variable?'' The statement and the variable are the basic input to create a slice and are called the \textsl{slicing criterion}. The criterion can be more complex, as different slicing techniques may require additional pieces of input.
There exist two dimensions along which the problem of slicing can be proposed:
\begin{itemize}
\item \textsl{Static} or \textsl{dynamic}: slicing can be performed statically (which is the default) or dynamically, which includes an execution log. A statement in the log is marked, along with a variable. The dynamic slice will only include statements from the execution log, even if in the general case more statements are required. This makes the slice more useful for the specific case, and may help solve a bug related with an indeterministic behaviour (such as a random or pseudo-random number generator), but must be recomputed for each case to be analyzed.
\item \textsl{Backward} or \textsl{forward}: the default tends to be backward slicing, which looks at which statements affect the selected one. Forward slicing obtains the statements that are affected by the chosen one. There also exists a mixed approach, which is used to find all the statements that affect or affected by a specific line.
\end{itemize}
The default choice tends to be a \textsl{static backward slice}, which obtains the list of statements that affect the value of a variable in a given statement in all possible executions of the program.
The \textsl{slice} of a program is a list of statements from the original program which constitutes a valid program, whose execution will result in the same values for the variable being read by a debugger in the selected statement\cite{weiser79}.
Some definitions of slicing\todo{Citation needed} allow for the slice to continue producing values after the program has stopped, making the slices simpler to produce and smaller in size at the cost of different endings\footnotemark. We will name the exact slice ---one that produces exactly the same values--- a \textit{strong} slice, and the permissive one, a \textit{weak} slice. See table \ref{tab:slice-permissive} for an example; with each row showing the values logged at the slicing criterion from the execution of 4 different programs. The first is the original, which computes $3!$. Slice A is one slice, whose execution is identical and therefore is a strong slice. Slice B is correct but continues producing values after the original stops ---a weak slice. It would fit the relaxed definition but not a strict one. Slice C is incorrect, as the values differ from the original. Some data or control dependency has not been included in the slice and the program is behaving in a different way.
\footnotetext{POSSIBLE ADDITION: It could be argued that permissive or weak slicing is enough for most uses of slicing, as if we suppose that the bug is present before the end of the program, then the bug must show up in the slice as well, regardless of whether the sliced program continues producing extra values or not.}
\begin{table}
\centering
\label{tab:slice-permissive}
\begin{tabular}{r | r | r | r | r | r }
Iteration & \textbf{1} & \textbf{2} & \textbf{3} & \textbf{4} & \textbf{5} \\ \hline
Original & 1 & 2 & 6 & & \\ \hline
Slice A & 1 & 2 & 6 & & \\ \hline
Slice B & 1 & 2 & 6 & 24 & 120 \\ \hline
Slice C & 1 & 1 & 3 & 5 & 8 \\
\end{tabular}
\caption{Execution logs of different slices and their original program.}
\end{table}
The most efficient and broadly used tool for slicing is the system dependence graph (SDG), first introduced by Horwitz, Reps and Blinkey\cite{horwitz90}. It represents the statements of a program as vertices, and their dependencies as directed edges. Method calls are connected to method definitions, and so are the corresponding input and output parameters. SDGs show two different kinds of dependencies: \textsl{data} and \textsl{control}. The first one connects nodes that write to variables to the nodes that use (or \textsl{may} use) the value, and it is represented as a dashed\todo{check} line. The latter represents which nodes have control over the execution of others (conditional jumps and loops, mainly), and its representation is a solid line. In order to obtain a slice of a program, its SDG must be built from the source code. Then a two pass search ($\mathcal{O}(n)$ each) is performed to obtain the slice. The SDG can be reused to obtain a different slice of the same program (with a different criterion or kind\footnotemark of slice). The efficiency derives from the linear cost of the search on the SDG, so most modifications\todo{citation needed} modify the complexity of the SDG's construction, but try to keep the slice process linear.
\footnotetext{TODO: change this word to the proper one.}
The SDG is built in 3 stages, each resulting in a different graph:
\begin{description}
\item[CFG] The control flow graph is the representation of the control dependencies in a method of a program. Every statement has an edge from itself to every statement that can immediately follow. This means that most will only have one outgoing edge, and conditional jumps and loops will have two. The graph starts in a ``Begin'' or ``Start'' node, and ends in an ``End'' node, to which the last statement and all return statements are connected. It is created directly from the source code, without any need for data dependency analysis.
\item[PDG] The program dependence graph is the result of restructuring and adding data dependencies to a CFG. All statements are placed below and connected to a ``Begin'' node, except those which are inside a loop or conditional block. Then data dependencies are added (red or dashed edges), adding an edge between two nodes if there is a data dependency. \todo{add definitions?}
\item[SDG] Finally, the system dependence graph is the interconnection of each method's PDG. When a call is made, the input arguments are passed to subnodes of the call, and the result is obtained in another subnode. There is an edge from the call to the beginning of the corresponding method, and an extra type of edge exists: \textsl{summary edges}, which summarize the data dependencies between input and output variables.
\end{description}
An example is provided in figure \ref{fig:basic-graphs}, where a simple multiplication program is converted to CFG, then PDG and finally SDG. For simplicity only the CFG and PDG of \texttt{multiply} are shown. Control dependencies are black, data dependencies red and summary edges blue.
\begin{figure}
\centering
% \lstinputlisting[firstline=8, lastline=16]{./dot/simple.java}
\includegraphics[width=0.5\linewidth]{img/multiplycfg}
\includegraphics[width=\linewidth]{img/multiplypdg}
\includegraphics[width=\linewidth]{img/multiplysdg}
\caption{A simple multiplication program, its CFG, PDG and SDG}
\label{fig:basic-graphs}
\end{figure}
The original proposal by Weiser\cite{weiser79} covers the simplest of an imperative programming language. The various iterations\todo{cite} until reaching the SDG\todo{cite} have added other elements, such as return statements\todo{cite}, global variables\todo{cite}, object oriented features\todo{cite} and finally exception handling\cite{horwitz03}.
\subsection{Metrics}
There are 5 metrics considered when evaluating a slicing algorithm:
\begin{description}
\item[Completeness] The solution includes all the statements that affect the slice. This is the most important feature, and almost all publications achieve at least completeness. Trivial completeness is easily achievable, as simple as including the whole program in the slice.
\item[Correctness] The solution excludes all statements that don't affect the slice. Most solutions are complete, but the degree of correctness is what sets them apart, as smaller slices will not execute unnecessary code to compute the values, decreasing the executing time.
\item[Features covered] Which features or language a slicing algorithm covers. Different approaches to slicing cover different programming languages and even paradigms. There are slicing techniques (published or commercially available) for most popular programming languages, from C++ to Erlang. Some slicing techniques only cover a subset of the targeted language, and as such are less useful for commercial applications, but can be a stepping stone in the betterment of the field.
\item[Speed] Speed of graph generation and slice creation. As previously commented, slicing is a two-step process: build a graph and traverse it. The traversal is linear in most proposals, with small variations. Graph generation tends to be longer and with higher variance, but it is not as relevant, because it is only done once (per program being analyzed). As such, this is the least important metric. Only proposals that deviate from the aforementioned schema show a wider variation in speed.
\end{description}
\subsection{Program slicing as a debugging technique}
Program slicing is first and foremost a debugging technique, having each variation a different purpose:
\begin{description}
\item[Backward static]
\end{description}
\section{Exception handling in Java}
\label{sec:intro-exception}
Exception handling is common in most modern programming languages. In Java, it consists of the following elements:
\begin{description}
\item[Throwable] An interface that encompasses all the exceptions or errors that may be thrown. Child classes are \texttt{Exception} for most errors and \texttt{Error} for internal errors in the Java Virtual Machine. Exceptions can be classified in two categories: \textsl{unchecked} (those inheriting from \texttt{RuntimeException} or \texttt{Error}) and \textsl{checked} (the rest). The first may be thrown anywhere, whereas the second, if thrown, must be caught or declared in the method header.
\item[throws] A statement that activates an exception, altering the normal control-flow of the method. If the statement is inside a \textsl{try} block with a \textsl{catch} clause for its type or any supertype, the control flow will continue in the first statement of such clause. Otherwise, the method is exited and the check performed again, until either the exception is caught or the last method in the stack (\textsl{main}) is popped, and the execution of the program ends abruptly.
\item[try] This statement is followed by a block of statements and by one or more \textsl{catch} clauses. All exceptions thrown in the statements contained or any methods called will be processed by the list of catches. Optionally, after the \textsl{catch} clauses a \textsl{finally} block may appear.
\item[catch] Contains two elements: a variable declaration (the type must be an exception) and a block of statements to be executed when an exception of the corresponding type (or a subtype) is thrown. \textsl{catch} clauses are processed sequentially, and if any matches the type of the thrown exception, its block is executed, and the rest are ignored. Variable declarations may be of multiple types \texttt{(T1|T2 exc)}, when two unrelated types of exception must be caught and the same code executed for both. When there is an inheritance relationship, the parent suffices.\footnotemark
\item[finally] Contains a block of statements that will always be executed if the \textsl{try} is entered. It is used to tidy up, for example closing I/O streams. The \textsl{finally} can be reached in two ways: with an exception pending (thrown in \textsl{try} and not captured by any \textsl{catch} or thrown inside a \textsl{catch}) or without it (when the \textsl{try} or \textsl{catch} block end successfully). After the last instruction of the block is executed, if there is an exception pending, control will be passed to the corresponding \textsl{catch} or the program will end. Otherwise, the execution continues in the next statement after the \textsl{try-catch-finally} block.
\end{description}
\footnotetext{Introduced in Java 7, see \url{https://docs.oracle.com/javase/7/docs/technotes/guides/language/catch-multiple.html} for more details.}
\section{Exception handling in other programming languages}
In almost all programming languages, errors exist, and must be dealt with. Java's exception system is a common one among object-oriented programming languages, but not the only one,
Most of the popular object oriented programs feature some kind of error system, normally very similar to Java's exceptions. In this section, we will perform a small survey on the most popular programming languages. The ``most popular'' list has been obtained from the Stack Overflow 2019 Developer Survey\footnotemark ($>5\%$ usage in the industry). The languages and their usage in the industry are shown in Figure~\ref{fig:languages}.
Most of them feature an exception system similar to the one appearing in Java, while others (bash, assembly, VBA, C) have no built-in method, but allow . Some check if the exception is of a given set of types for the catching mechanism (Java, C++, C\#), whilst others rely on a condition that includes the exception (Python, JavaScript, TypeScript). All of them have a mechanism that catches all exceptions ---either by catching the type from which all exceptions inherit or by providing no condition to check.
\footnotetext{\url{https://insights.stackoverflow.com/survey/2019/\#technology-\_-programming-scripting-and-markup-languages}}
Go doesn't have an exception system per se, but a simple one can be built by using the keywords ``panic'' (throw an exception with a value associated), ``defer'' (finally, run even when a panic is activated) and ``recover'' (stopping the panic state, retrieves the value associated with the panic). Deferred code will be run after the main function ends, before the program terminates. Each block is stored as a member of a stack, so the execution order is LIFO. If a panic instruction is run, such code will still run, therefore acting as a finally. The panic can only be stopped via the ``recover'' instruction, which obtains the value associated with the panic. Then, the exception

28
listings-config.tex Normal file
View File

@ -0,0 +1,28 @@
\lstset{
% Numbering
numbers=left,
stepnumber=2,
numberstyle=\tiny,
numbersep=5pt,
% Style
tabsize=2,
basicstyle=\footnotesize\ttfamily,
% Others
language=Java,
% Escape sequences (UTF-8 support)
literate=
{á}{{\'a}}1 {é}{{\'e}}1 {í}{{\'i}}1 {ó}{{\'o}}1 {ú}{{\'u}}1
{Á}{{\'A}}1 {É}{{\'E}}1 {Í}{{\'I}}1 {Ó}{{\'O}}1 {Ú}{{\'U}}1
{à}{{\`a}}1 {è}{{\`e}}1 {ì}{{\`i}}1 {ò}{{\`o}}1 {ù}{{\`u}}1
{À}{{\`A}}1 {È}{{\'E}}1 {Ì}{{\`I}}1 {Ò}{{\`O}}1 {Ù}{{\`U}}1
{ä}{{\"a}}1 {ë}{{\"e}}1 {ï}{{\"i}}1 {ö}{{\"o}}1 {ü}{{\"u}}1
{Ä}{{\"A}}1 {Ë}{{\"E}}1 {Ï}{{\"I}}1 {Ö}{{\"O}}1 {Ü}{{\"U}}1
{â}{{\^a}}1 {ê}{{\^e}}1 {î}{{\^i}}1 {ô}{{\^o}}1 {û}{{\^u}}1
{Â}{{\^A}}1 {Ê}{{\^E}}1 {Î}{{\^I}}1 {Ô}{{\^O}}1 {Û}{{\^U}}1
{Ã}{{\~A}}1 {ã}{{\~a}}1 {Õ}{{\~O}}1 {õ}{{\~o}}1
{œ}{{\oe}}1 {Œ}{{\OE}}1 {æ}{{\ae}}1 {Æ}{{\AE}}1 {ß}{{\ss}}1
{ű}{{\H{u}}}1 {Ű}{{\H{U}}}1 {ő}{{\H{o}}}1 {Ő}{{\H{O}}}1
{ç}{{\c c}}1 {Ç}{{\c C}}1 {ø}{{\o}}1 {å}{{\r a}}1 {Å}{{\r A}}1
{}{{\euro}}1 {£}{{\pounds}}1 {«}{{\guillemotleft}}1
{»}{{\guillemotright}}1 {ñ}{{\~n}}1 {Ñ}{{\~N}}1 {¿}{{?`}}1,
}

58
paper.tex Normal file
View File

@ -0,0 +1,58 @@
\documentclass[a4paper,twoside]{report}
\usepackage[spanish,english]{babel}
\usepackage[utf8]{inputenc}
\usepackage{listings}
\usepackage{algorithm}
\usepackage{algorithmic}
\renewcommand{\algorithmicrequire}{\textbf{Input:}}
\renewcommand{\algorithmicensure}{\textbf{Output:}}
\usepackage{amsthm}
\usepackage{amssymb}
\theoremstyle{definition}
\newtheorem{definition}{Definition}
\usepackage{hyperref}
\usepackage{graphics}
\usepackage{title/mitssTitle}
\newcommand{\ctrldep}{\rightarrow^{ctrl}}
\newcommand{\datadep}{\rightarrow^{data}}
\usepackage{todonotes}
\usepackage{marginnote}
\title{Fragmentación de programas con excepciones}
%\title{Program slicing with exceptions}
\author{Carlos S. Galindo Jiménez}
\date{diciembre de 2019}
\supervisor{Josep Francesc Silva Galiana}
\begin{document}
\algsetup{linenodelimiter=.}
\include{listings-config}
\maketitle
\begin{abstract}
This must be filled \todo{complete}
\end{abstract}
\selectlanguage{spanish}
\begin{abstract}
A completar \todo{completar}
\end{abstract}
\selectlanguage{english}
\tableofcontents
\include{introduction}
\include{incremental_slicing}
\include{state_of_the_art}
\include{solution}
\input{bibliography}
%\bibliography{mybib}
%\bibliographystyle{plain}
\end{document}

107
solution.tex Normal file
View File

@ -0,0 +1,107 @@
\chapter{Proposed solution}
This solution is an extension of Allen's\cite{allen03}, with some modifications to solve the problem found. Before starting, we need to split all instructions in three categories:
\begin{description}
\item[statement] non-branching instruction, e.g. an assignment or method call.
\item[predicate] conditional branch, e.g. if statements and loops.
\item[pseudo-predicate] unconditional jump, e.g. break, continue, return, goto and throw instructions.
\end{description}
Pseudo-predicates have been previously use to model unconditional jumps with a counter-intuitive reasoning: the next statement that would be executed were the pseudo-predicate not there would be executed, therefore it is control dependent on it. Going back to the definition of control dependency, one could argue that the real control dependency is on the conditional branch that lead to the
\begin{figure}
\centering
\begin{lstlisting}
if (a) {
return a;
}
print(a);
\end{lstlisting}
\begin{lstlisting}
if (a) {
}
print(a);
\end{lstlisting}
\caption{Example of pseudo-predicates control dependencies}
\end{figure}
This is the process used to build the Program Dependence Graph.
\begin{description}
\item[Step 1 (static analysis):] Identify for each instruction the variables read and defined. Each method is annotated with the global variables that they access or modify.
\item[Step 2 (build CFGs):] Build a CFG for each method of the program. The start of all methods is a vertex labeled \textsl{enter}, which also contains the assignments for parameters and global variables used (\texttt{var = var\_in}). The \textsl{enter} node is connected to the first instruction of the method. In a similar fashion, all methods end in an \textsl{exit} vertex with the corresponding output variables. There exists one \textsl{normal exit} to which the last instruction and all return instructions are connected. If the method can throw any exceptions, there exists one \textsl{error exit} for each type of exception that may be thrown. The normal and erroneous exits are connected to the \textsl{exit} node.
Every normal statement is connected to the subsequent one by an unlabeled edge. Predicates have two outgoing edges, labeled \textsl{true} and \textsl{false}. Pseudo-predicates also have two outgoing edges. The \textsl{true} edge is connected to the destination of the jump (\textsl{normal exit} in the case of return, the begin or end of the loop in the case of continue and break, etc.). The \textsl{false} edge is a non-executable edge, marked with a dashed line, and it is connected to the next instruction that would be executed if the pseudo-predicate was a \textsl{nop}.
Nodes that represent a call to a method $M$ include the transfer of parameters and variables that may be read or written to, then execute the call, and finally the extraction of modified variables. Call nodes are an exception to the previous paragraph, as they can have an unlimited amount of outgoing edges. Each outgoing edge lands on a pseudo-predicate which indicates if the execution was correct or an exception was raised. The executable edge of each pseudo-predicate will lead to the next instruction to be executed, whereas the non-executable one will lead to the end of the try-catch block. All call nodes can lead to a \textsl{normal return} node, which is linked to the next instruction, and one error node for each type of exception that may be thrown. The erroneous returns are labeled \textsl{catch ExType}, and lead to the first instruction in the corresponding catch block\footnotemark. Any exception that may not be caught will lead to the erroneous exit node of the method it's in. See the example for more details.
\footnotetext{A problem presents itself here, as some exceptions may be able to trigger different catch blocks, due to the secuential nature of catches and polymorphism in Java. A way to fix this is to make catch blocks behave as a switch.}. %TODO
\item[Step 3 (compute dependences):] For each node in the CFG, compute the control and data dependencies. Non-executable edges are only included when computing control dependencies.\\
\todo{put inside definition}
A node $a$ is \textsl{control dependent} on node $b$ iff $a$ post-dominates one but not all of $b$'s successors.\\
A node $a$ is \textsl{data dependent} on node $b$ iff $b$ defines or may define a variable $x$, $a$ uses or may use $x$, and there is an $x$-definition-free path in the CFG from $b$ to $a$.\\
\item[Step 4 (convert each CFG into a PDG):] each node of the CFG is one node of the PDG, with two exceptions. The first are the \textsl{enter}, \textsl{exit} and method call nodes, where the variable input and output assignments are split and placed as control-dependent on their original node. The second is the \textsl{exit} node, which is to be removed (the control-dependencies from \textsl{exit} to the variable outputs is transferred to the \textsl{enter} node). Then all the dependencies computed in the previous step are drawn.
\item[Step 5 (connect PDGs to form a SDG):] each method call to $M$ must be connected to the \textsl{enter} node in $M$'s PDG, as a control dependence. Each variable input from the method call is connected to a variable input of the method definition via a data dependence. Each variable output from the method definition is connected to the variable output of the method call via a data dependence. Each method exit is connected \todo{complete}.
\end{description}
\begin{itemize}
\item An extra type of control dependency represented by an ``exception edge''. It will represent the need to include a \textsl{catch} clause when an exception can be thrown. It is represented with a dotted line (dashed line is for data dependency). These edges have a special characteristic: when one is traversed, only ``exception edges'' may be traversed from the new nodes included in the slice. If the same node is reached by another kind of edge, the restriction is lifted. The behavior is documented in algorithm \ref{alg:2pass}, with changes from the original algorithm are \underline{underlined}.
\item Add an extra ``exception edge'' from each ``exit with exception of type T'' node, where the type of the exception is \texttt{t} to all the corresponding ``\texttt{throw e}'', such that \texttt{e} is or inherits from \texttt{T}.
\item Add an extra ``exception edge'' from each catch statement to every statement that can throw that error.
\item The exception edges will only be placed when the method or the try-catch statement are loop-carrier\footnote{Loop-carrier, when referring to a statement, is the property that in a CFG for the complete program, the node representing the statement is part of a loop, meaning that it could be executed again once it is executed.}.
\end{itemize}
\begin{algorithm} % generate slice
\caption{Two-pass algorithm to obtain a backward static slice with exceptions}
\label{alg:2pass}
\begin{algorithmic}[1]
\REQUIRE SDG $\mathcal{G}$ representing program P. $\mathcal{G} = \{\mathcal{S}, \mathcal{E}\}$, where $\mathcal{S}$ is a set of states (some are statements) connected by a set of edges $\mathcal{E}$. Each edge, is a triplet composed of the type of edge (control, data or \underline{exception} dependency, summary, param-in, param-out), the source and destination of the edge.
\REQUIRE A slicing criterion, composed of a statement $s \in \mathcal{S}$ and a variable $v$.
\ENSURE $\mathcal{S}' \subseteq \mathcal{S}$, representing the slice of P according to the criterion provided.
\medskip
\COMMENT{First pass (do not traverse output parameter edges).}
\STATE{$\mathcal{S}' \Leftarrow \emptyset$ (slice), $\mathcal{Q}\Leftarrow\{s\}$ (queue), $\mathcal{S}\Leftarrow \mathcal{S} - \{s\}$ (not visited), $\mathcal{R}\Leftarrow \emptyset$ (only visited via exception edge)}
\WHILE{$\mathcal{Q} \neq \emptyset$}
\STATE{$a \in \mathcal{Q}$} \COMMENT{Select an element from $\mathcal{Q}$}
\STATE{$\mathcal{Q} \Leftarrow \mathcal{Q} - \{a\}$}
\STATE{$\mathcal{S}' \Leftarrow \mathcal{S}' + \{a\}$}
\FORALL{$\mathcal{A}$ in $\{\{type, origin, a\} \in \mathcal{E}\}$}
\IF{$type \neq$ param-out \AND ($origin \notin \mathcal{S}'$ \OR ($origin \in \mathcal{R}$ \AND $a \notin \mathcal{R}$))} \label{line:param-out}
\IF{\underline{$a \in \mathcal{R}$}}
\IF{\underline{$type =$ exception}}
\STATE{\underline{$\mathcal{Q} \Leftarrow \mathcal{Q} + \{origin\}$}}
\STATE{\underline{$\mathcal{R} \Leftarrow \mathcal{R} + \{origin\}$}}
\ENDIF
\ELSE
\STATE{$\mathcal{Q} \Leftarrow \mathcal{Q} + \{origin\}$}
\ENDIF
\ENDIF
\ENDFOR
\ENDWHILE
\\
\medskip
\COMMENT{Second pass (very similar, do not traverse input parameter edges).}
\STATE $\mathcal{Q} \Leftarrow \mathcal{S}'$
\WHILE{$\mathcal{Q} \neq \emptyset$}
\STATE{$a \in \mathcal{Q}$} \COMMENT{Select an element from $\mathcal{Q}$}
\STATE{$\mathcal{Q} \Leftarrow \mathcal{Q} - \{a\}$}
\STATE{$\mathcal{S}' \Leftarrow \mathcal{S}' + \{a\}$}
\FORALL{$\mathcal{A}$ in $\{\{type, origin, a\} \in \mathcal{E}\}$}
\IF{$type \neq$ param-in \AND ($origin \notin \mathcal{S}'$ \OR ($origin \in \mathcal{R}$ \AND $a \notin \mathcal{R}$))}
\IF{\underline{$a \in \mathcal{R}$}}
\IF{\underline{$type =$ exception}}
\STATE{\underline{$\mathcal{Q} \Leftarrow \mathcal{Q} + \{origin\}$}}
\STATE{\underline{$\mathcal{R} \Leftarrow \mathcal{R} + \{origin\}$}}
\ENDIF
\ELSE
\STATE{$\mathcal{Q} \Leftarrow \mathcal{Q} + \{origin\}$}
\ENDIF
\ENDIF
\ENDFOR
\ENDWHILE
\end{algorithmic}
\end{algorithm}

64
state_of_the_art.tex Normal file
View File

@ -0,0 +1,64 @@
\chapter{State of the art}
Slicing was proposed\cite{weiser79} and improved until the proposal of the current system (the SDG) \todo{(citation)}. Specifically in the context of exceptions, multiple approaches have been attempted, with varying degrees of success. There exist commercial solutions for various programming languages: \todo{name them and link}.
In the realm of academia, there exists no definite solution. One of the most relevant initial proposal\cite{allen03}, although not the first one\cite{sinha98,sinha99} to target Java specifically.
It uses the existing proposals for \textsl{return}, \textsl{goto} and other unconditional jumps to model the behavior of \textsl{throw} statements. Control flow inside \textsl{try-catch-finally} statements is simulated, both for explicit \textsl{throw} and those nested inside a method call. The base algorithm is presented, and then the proposal is detailed as changes. Unchecked exceptions are considered but regarded as ``worthless'' to include, due to the increase in size of the slices, which reduces their effectiveness as a debugging tool. This is due to the number of unchecked exceptions embedded in normal Java instructions, such as \texttt{NullException} in any instance field or method, \texttt{IndexOutOfBoundsException} in array accesses and countless others. On top of that, handling \textsl{unchecked} exceptions opens the problem of calling an API to which there is no analyzable source code, either because the module was compiled before-hand or because it is part of a distributed system. The first should not be an obstacle, as class files can be easily decompiled. The only information that may be lost is variable names and comments, which don't affect a slice's precision, only its readability.
Chang and Jo\cite{chang04} present an alternative to the CFG by computing exception-induced control flow separately from the traditional control flow computation, but go no further into the ramifications it entails for the PDG and the SDG.
Jiang et al.\cite{jiang06} describes a solution specific for the exception system in C++, which differs from Java's implementation of exceptions. They reuse the idea of non-executable edges in \textsl{throw} nodes, and introduce handling \textsl{catch} nodes as a switch, each trying to catch the exception before deferring onto the next \textsl{catch} or propagating it to the calling method. Their proposal is center around the IECFG (Improved Exception Control-Flow Graph), which propagates control dependencies onto the PDG and then the SDG. Finally, in their SDG, each normal and exceptional return and their data output are connected to all \textsl{catch} statements where the data may have arrived, which is fine for the example they propose, but could be inefficient if the method has many different call nodes.
Others\cite{prabhu11} have worked specifically on the C++ exception framework. \todo{remove or expand}.
Finally, Hao\cite{hao11} introduced a Object-Oriented System Dependence Graph with exception handling (EOSDG), which represented a generic object-oriented language, with exception handling capabilities. Its broadness allows for the EOSDG to fit into both Java and C++. It uses concepts from Jiang\cite{jiang06}, such as cascading \textsl{catch} statements, while adding explicit support for virtual calls, polymorphism and inheritance.
% TODO UNCOMPLETE
\hrulefill
\marginnote{Alternative explanation of \cite{allen03}, with counter example. Maybe should move the counter example backwards.}
In her paper, Horwitz suggests treating exceptions in the following way:
\begin{itemize}
\item Statements are divided into statements, predicates (loops and conditional blocks) and pseudo-predicates (return and throw statements). Statements only have one successor in the CFG, predicates have two (one when the condition is true and another when false), pseudo-predicates have two, but the one labeled ``false'' is non-executable. The non-executable edge connects to the statement that would be executed if the unconditional jump was replaced by a ``nop''.
\item \textsl{try-catch-finally} blocks are treated differently, but it has fewer dependencies than needed. Each catch block is control-dependent on any statement that may throw the corresponding exception. The
\end{itemize}
\begin{lstlisting}[title=Example]
void main() {
int x = 0;
while (true) {
try {
f(x);
} catch (ExceptionA e) {
x--;
} catch (ExceptionB e) {
System.err.println(x);
} catch (ExceptionC e) {
System.out.println(x);
}
System.out.println(x);
}
}
void f(x) {
x--;
if (x > 10)
throw new ExceptionA();
else if (x == 0)
throw new ExceptionB();
else if (x > 0)
throw new ExceptionC();
x++;
System.out.println(x);
}
static class ExceptionA extends ExceptionC {}
static class ExceptionB extends Exception {}
static class ExceptionC extends Exception {}
\end{lstlisting}
In this example we can explore all the errors found with the current state of the art.
The first problem found is the lack of \texttt{catch} statements in the slice, as no edge is drawn from the catch. Some of the catch blocks will be included via data dependencies, but some may not be reached, though they are still necessary if the slice includes anything after a caught exception.
Therefore, an extra control dependency must be introduced, in order to always include a ``catch'' statement in the slice if the ``throw'' statement is in the slice. In the example, only the catch statement from line 20 will be included, and if ExceptionC or ExceptionB were thrown, they would not be caught. That would not be a problem if the function $f$ was not executed again, but it is, making the slice incorrect.

BIN
title/logo-dsic.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 18 KiB

BIN
title/logo-upv-cropped.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 115 KiB

BIN
title/logo-upv.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 111 KiB

40
title/mitssTitle.sty Normal file
View File

@ -0,0 +1,40 @@
\ProvidesPackage{mitssTitle}[2019/08/25]
\RequirePackage{graphicx}
\RequirePackage{geometry}
\RequirePackage[export]{adjustbox} % align images
\newcommand*{\supervisor}[1]{\gdef\@supervisor{#1}%
}
\renewcommand*{\maketitle}{%
\newgeometry{margin=1in}
\begin{titlepage}
%\pagestyle{empty}
\makeatletter
{\centering
\includegraphics[width=.5\linewidth,valign=t]{title/logo-upv-cropped} \hfill
\includegraphics[width=.23\linewidth,valign=t]{title/logo-dsic}
}
\vfill
\begin{center}
{\huge\bfseries \@title} \\
\vspace{2em}
{\Large Trabajo Fin de Máster} \\
\vspace{2em}
\Large
\textbf{Máster Universitario en Ingeniería \\ y Tecnología de Sistemas Software}
\end{center}
\vfill
{
\large
\raggedleft
\textbf{Autor}: \@author \\
\textbf{Tutor}: \@supervisor \\
Valencia, \@date \\
}
\end{titlepage}
\restoregeometry
} % maketitle
\endinput