WLP2024/aspUnify.tex

% This is samplepaper.tex, a sample chapter demonstrating the
% LLNCS macro package for Springer Computer Science proceedings;
% Version 2.21 of 2022/01/12
%
\documentclass[runningheads]{llncs}
%
\usepackage[T1]{fontenc}
% T1 fonts will be used to generate the final print and online PDFs,
% so please use T1 fonts in your manuscript whenever possible.
% Other font encondings may result in incorrect characters.
%
\usepackage{graphicx}
% Used for displaying a sample figure. If possible, figure files should
% be included in EPS format.
%
% If you use the hyperref package, please uncomment the following two lines
% to display URLs in blue roman font according to Springer's eBook style:
%\usepackage{color}
%\renewcommand\UrlFont{\color{blue}\rmfamily}
%\urlstyle{rm}
%
\include{prolog}
\usepackage{mathpartir}
\usepackage{amsmath}
\usepackage{amssymb}
\usepackage{enumitem}
%\usepackage{amsthm}

\begin{document}
%
\title{Global Type Inference for Java using SAT Solvers}
%
%\titlerunning{Abbreviated paper title}
% If the paper title is too long for the running head, you can set
% an abbreviated paper title here
%
\author{First Author\inst{1}\orcidID{0000-1111-2222-3333} \and
Second Author\inst{2,3}\orcidID{1111-2222-3333-4444} \and
Third Author\inst{3}\orcidID{2222--3333-4444-5555}}
%
\authorrunning{F. Author et al.}
% First names are abbreviated in the running head.
% If there are more than two authors, 'et al.' is used.
%
\institute{Princeton University, Princeton NJ 08544, USA \and
Springer Heidelberg, Tiergartenstr. 17, 69121 Heidelberg, Germany
\email{lncs@springer.com}\\
\url{http://www.springer.com/gp/computer-science/lncs} \and
ABC Institute, Rupert-Karls-University Heidelberg, Heidelberg, Germany\\
\email{\{abc,lncs\}@uni-heidelberg.de}}
%
\maketitle              % typeset the header of the contribution
%
\begin{abstract}
The abstract should briefly summarize the contents of the paper in
150--250 words.

\keywords{First keyword  \and Second keyword \and Another keyword.}
\end{abstract}
%
%
%
\section{Type Inference}
Every major typed programming language uses some form of type inference.
Rust, Java, C++, Haskell, etc... %(see: https://en.wikipedia.org/wiki/Type_inference)
Type inference adds alot of value to a programming language.
\begin{itemize}
\item Code is more readable.
\item Type inference usually does a better job at finding types than a programmer.
\item Type inference can use types that are not denotable by the programmer.
\item Better reusability.
\item Allows for faster development and the programmer to focus on the actual task instead of struggling with the type system.
\end{itemize}

Java has adopted more and more type inference features over time.
%TODO: list type inference additions
Currently Java only has local type inference.
We want to bring type inference for Java to the next level.

Our type inference algorithm for Java should be able to retrieve missing type annotations
in a Java program.
The main difference from existing type inference algorithms for Java is that we also allow method types to be omitted in the input program.

\begin{figure}
\begin{lstlisting}
public class Matrix extends Vector<Vector<Integer>> {

    public Matrix(vv) {
        Integer i;
        i = 0;
        while(i < vv.size()) {
            this.add(vv.elementAt(i));
            i=i+1;
        }
    }

    public mul(m) {
        var ret = new Matrix();
        var i = 0;
        while(i < size()) {
            var v1 = this.elementAt(i);
            var v2 = new Vector<Integer>();
            var j = 0;
            while(j < v1.size()) {
                var erg = 0;
                var k = 0;
                while(k < v1.size()) {
                    erg = erg + v1.get(k)
                             * m.get(k).get(j);
                    k++; }
                v2.addElement(erg);
                j++; }
            ret.addElement(v2);
            i++;
         }
        return ret;
    }
}
\end{lstlisting}
\end{figure}

\section{Subtyping}
\begin{mathpar}
\inferrule[Refl]{}{
\type{T} <: \type{T}
}
\and
\inferrule[Trans]{\type{T}_1 <: \type{T}_2 \\ \type{T}_2 <: \type{T}_3}{
\type{T}_1 <: \type{T}_3
}
\and
\inferrule[Var]{}{\type{A} <: \Delta(\type{A})}
\and
\inferrule[Class]{\texttt{class}\ \exptype{C}{\ol{X}} \triangleleft \type{N}}{
\exptype{C}{\ol{T}} <: [\ol{T}/\ol{X}]\type{N}
}
\end{mathpar}

\begin{mathpar}
\inferrule[N-Refl]{}{
\type{C} << \type{C}
}
\and
\inferrule[N-Trans]{\type{C}_1 << \type{C}_2 \\ \type{C}_2 << \type{C}_3}{
\type{C}_1 << \type{C}_3
}
\and
\inferrule[N-Class]{\texttt{class}\ \exptype{C}{\ldots} \triangleleft \exptype{D}{\ldots}}{
\type{C} << \type{D}
}
\end{mathpar}

\section{Unify}
Input: Every type placeholder must have an upper bound.
Output: Every $\tv{a} \lessdot \type{T}$ constraint gets a

The $\tv{a} \lessdot \type{Object}$ rule has to be ensured by the incoming constraints.
The need to have an upper bound to every type placeholder.

We have to formulate the unification algorithm with implication rules.
Those can be directly translated to ASP.
\begin{mathpar}
\inferrule[Subst]{
    \tv{a} \doteq \type{N}
}{
    \tv{a} \mapsto \type{N}
}
\and
\inferrule[Subst-L]{
    \tv{a} \mapsto \type{T}_1 \\
    \tv{a} \lessdot \type{T}_2
}{
    \type{T}_1 \lessdot \type{T}_2
}
\and
\inferrule[Subst-R]{
    \tv{a} \mapsto \type{T}_1 \\
    \type{T}_2 \lessdot \tv{a}
}{
    \type{T}_2 \lessdot \type{T}_1
}
\and
\inferrule[Subst-Equal]{
    \tv{a} \mapsto \type{T}_1 \\
    \tv{a} \doteq \type{T}_2
}{
    \type{T}_1 \doteq \type{T}_2
}
\and
\inferrule[Swap]{
    \type{T}_1 \doteq \type{T}_2
}{
    \type{T}_2 \doteq \type{T}_1
}
\and
\inferrule[Unfold]{
    \tv{b} \doteq \exptype{C}{\type{T}_1 \ldots \type{T}_n}
}{
    \type{T}_i \doteq \type{T}_i
}
\and
\inferrule[Subst-Param]{
    \tv{a} \mapsto \type{S} \\
    \type{T} \doteq \exptype{C}{\type{T}_1 \ldots, \tv{a}, \ldots \type{T}_n} \\
}{
    \type{T} \doteq \exptype{C}{\type{T}_1, \ldots \type{S}, \ldots \type{T}_n}
}
\and
\inferrule[S-Object]{}{\tv{a} \lessdot \type{Object}}
\and
\inferrule[Match]{
    \tv{a} \lessdot \type{N}_1 \\
    \tv{a} \lessdot \type{N}_2 \\
    \type{N}_1 << \type{N}_2
}{
    \type{T}_1 \lessdot \type{T}_2
}
\and
\inferrule[Adopt]{
    \tv{a} \lessdot \tv{b} \\
    \tv{b} \lessdot \type{T}
}{
    \tv{a} \lessdot \type{T}
}
\and
% \inferrule[Subst-Param]{
%     \tv{a} \doteq \type{N} \\
%     \tv{a} = \type{T}_i \\
%     \exptype{C}{\type{T}_1 \ldots \type{T}_n} \lessdot \type{T} \\
% }{
%     \type{T}_i \doteq \type{N} \\
% }
\and
\inferrule[Adapt]{
    \type{N}_1 \lessdot \exptype{C}{\type{T}_1 \ldots \type{T}_n} \\
    \type{N}_1 <: \exptype{C}{\type{S}_1 \ldots \type{S}_n} \\
}{
    \exptype{C}{\type{S}_1 \ldots \type{S}_n} \doteq \exptype{C}{\type{T}_1 \ldots \type{T}_n} \\
}
\and
\inferrule[Reduce]{
    \exptype{C}{\type{S}_1 \ldots \type{S}_n} \doteq \exptype{C}{\type{T}_1 \ldots \type{T}_n} \\
}{
    \type{S}_i \doteq \type{T}_i \\
}
\end{mathpar}

\begin{mathpar}
    \text{Apply only once per constraint:}\quad
    \inferrule[Super]{
        \type{T} \lessdot \tv{a}\\
        \type{T} <: \type{N}
    }{
        \tv{a} \doteq \type{N}
    }
\end{mathpar}

\begin{center}
    Apply one or the other:
\end{center}
\begin{mathpar}
    \inferrule[Split-L]{
        \tv{a} \lessdot \tv{b}\\
        \tv{a} \lessdot \type{N}\\
    }{
        \tv{b} \lessdot \type{N}
    }
    \quad \quad
    \vline
    \quad \quad
    \inferrule[Split-R]{
        \tv{a} \lessdot \tv{b}\\
        \tv{a} \lessdot \type{N}\\
    }{
        \type{N} \lessdot \tv{b}
    }
\end{mathpar}

Result:
\begin{mathpar}
% \inferrule[Solution]{
%     \tv{a} \doteq \type{N} \\
%     \tv{a} \notin \type{N}
% }{
%     \sigma(\tv{a}) = \type{N}
% }
\inferrule[Solution]{
    \tv{a} \doteq \type{G}
}{
    \sigma(\tv{a}) = \type{G}
}
% \and
% \inferrule[Generic]{
%     \tv{a} \lessdot \type{N} %, \ldots, \tv{a} \lessdot \exptype{C_n}{\ol{T_n}} \\
%     \\
%     \text{not}\ \tv{a} \doteq \type{N}'
% }{
% \tv{a} \mapsto \type{A}
% }
% \and
% \inferrule[Solution-Sub]{
%     \tv{a} \lessdot \exptype{C_1}{\ol{T_1}}, \ldots, \tv{a} \lessdot \exptype{C_n}{\ol{T_n}} \\
%     \forall i: \type{C_m} << \type{C_i} \\
%     \text{not}\ \tv{a} \doteq \type{N}
% }{
%     \Delta(\type{A}) = \exptype{C_m}{\ol{T_m}} \\ \sigma(\tv{a}) = \type{A}
% }
% \and
% \inferrule[Solution-Gen]{
%     \tv{a} \lessdot \type{G}_1, \ldots, \tv{a} \lessdot \type{G}_n \\
%     \forall i: \type{G} <: \type{G}_i \\
% }{
%     \Delta(\type{A}) = \type{G} \\ \sigma(\tv{a}) = \type{A}
% }
\and
\inferrule[Solution-Gen]{
    \tv{a} \lessdot \type{C}_1, \ldots, \tv{a} \lessdot \type{C}_n \\
    \forall i: \type{C}_m << \type{C}_i \\
}{
    \tv{a} \doteq \type{C}_m
}
\end{mathpar}

Fail:

\begin{mathpar}
% \inferrule[Fail]{
%     \type{T} \lessdot \type{N}\\
%     \type{T} \nless : \type{N}
% }{
%     \emptyset
% }
% \and
\inferrule[Fail]{
    \exptype{C}{\ldots} \doteq \exptype{D}{\ldots}\\
    \type{C} \neq \type{D}
}{
    \emptyset
}
\and
\inferrule[Fail-Sigma]{
    \tv{a} \doteq \type{N} \\
    \tv{a} \in \type{N}
}{
    \emptyset
}
\and
\inferrule[Fail]{
\tv{a} \lessdot \type{N}_1 \\
\tv{a} \lessdot \type{N}_2 \\
\text{not}\ \type{N}_1 << \type{N}_2 \\
\text{not}\ \type{N}_2 << \type{N}_1
}{
    \emptyset
}
\end{mathpar}

% Subst
% a =. N, a <. T, N <: T
% --------------
% N <. T

% a <. List<b>, b <. List<a>

% how to proof completeness and termination?
% TODO: how to proof termination?

The algorithm terminates if every type placeholder in the input constraint set has an assigned type.

\section{ASP Encoding}
\begin{tabular}{l | r}
$\exptype{C}{\ol{X}}$ & \texttt{type("C", paramX)}\\
\end{tabular}

\section{Completeness}
To proof completeness we have to show that every type can be replaced by a placeholder in a correct constraint set.

Completeness -> we never exclude a solution
Following constraints stay: $\tv{a} \lessdot \type{T}$ if $\tv{a}$ is never on a right side of another constraint.
Every other type placeholder will be reduced to $\tv{a} \doteq \type{T}$, if there is a solution.
Proof:
%Induction over every possible constraint variation:
a =. T -> induction start
a <. T -> if no other constraint then it can stay otherwise there is either a =. T or a <. T
in latter case: a <. T, a <. T'

Proof that every type can be replaced by a Type Placeholder.
% Whats with a =. T, can T be replaced by a Type Placeholder?
% What is our finish condition? a <. T constraints stay, a =. b constraints stay.
% Algorithm does not fail -> \emptyset if a solution exists
% Otherwise there exists a substitution. If the algorithm succeeds we have to pick one of the possible solutions
% by: a <. T -> a =.T
%     a =. b, b =. T -> use the solution generation from other paper
% TODO: try to include solution generation in the algorithm and proof that this solution is valid and will always occur as long as there is a solution

Soundness -> we never make a wrong implication
%$\tv{a} \doteq \type{T}$ means that $\[type{T}/\tv{a}]C$ is correct
If it succeeds then we can substitute all $\tv{a} \doteq \type{T}$
constraints in the original constraint set and
there exists a typing for the remaining type placeholders
so that the constraint set is satisfied.

\begin{theorem}{Soundness}\label{lemma:soundness}
if $\type{T} \lessdot \type{T'}$ and $\sigma(\tv{a}) = \type{N}$
then $[\type{N}/\tv{a}]\type{T} <: [\type{N}/\tv{a}]\type{T'}$.
\end{theorem}

\SetEnumitemKey{ncases}{itemindent=!,before=\let\makelabel\ncasesmakelabel}
\newcommand*\ncasesmakelabel[1]{Case #1}

\newenvironment{subproof}
  {\def\proofname{Subproof}%
   \def\qedsymbol{$\triangleleft$}%
   \proof}
  {\endproof}
Due to Match there must be $\type{N}_1 \lessdot \type{N}_2 \ldots \lessdot \type{N}_n$
\begin{proof}
  \begin{enumerate}[ncases]
    \item $\tv{a} \lessdot \exptype{C}{\ol{T}}$.
        Solution-Sub
    Let $\sigma(\tv{a}) = \type{N}$. Then $\type{N} <: \exptype{C}{[\type{N}/\tv{a}]}$
    \item $\tv{a} \doteq \type{N}$.
    Solution
    \item $\tv{a} \lessdot \tv{b}$.
    There must be a $\tv{a} \lessdot \type{N}$
    \begin{subproof}
    $\sigma(\tv{a}) = \type{Object}$,
    $\sigma(\tv{b}) = \type{Object}$.
    \end{subproof}
    \item $\type{N} \lessdot \tv{a}$.
    \begin{subproof}
    $2$
    \end{subproof}
  \end{enumerate}
  And more text.
\end{proof}

\begin{lemma}{Substitution}
    \begin{description}
        \item[If] $\tv{a} \doteq \type{N}$ with $\tv{a} \neq \type{N}$
        \item[Then] for every $\type{T} \doteq \type{T}$ there exists a $[\type{N}/\tv{a}]\type{T} \doteq [\type{N}/\tv{a}]\type{T}$
        \item[Then] for every $\type{T} \lessdot \type{T}$ there exists a $[\type{N}/\tv{a}]\type{T} \lessdot [\type{N}/\tv{a}]\type{T}$
    \end{description}
\end{lemma}
\textit{Proof:}
TODO

\begin{lemma} \label{lemma:subtypeOnly}
If $\sigma(\tv{a}) = \emptyset$ then $\tv{a}$ appears only on the left side of $\tv{a} \lessdot \type{T}$ constraints.
\end{lemma}
Proof:
Every type placeholder gets a solution, because there must be atleast one $\tv{a} \lessdot \type{N}$ constraint.
Then either the Solution-Sub generates a $\sigma$ or the Solution rule can be used TODO: Proof.

The Solution-Sub rule is always correct.

Proof:

\begin{theorem}{Termination}
    %jede nichtendliche Menge von Constraints bleibt endlich. Die Regeln können nicht unendlich oft angewendet werden
%Trivial. The only possibility would be if we allow a =. C<a> constraints!
\end{theorem}

TODO: For completeness we have to proof that not $\tv{a} \doteq \type{N}$ only is the case if $\tv{a}$ only appears on the left side of $\tv{a} \lessdot \type{T}$ constraints.
Problem: a <. List<a>
        a <. List<b>
        then a =. b
Solution: Keep the a <. N constraints and apply the Step 6 from the GTFGJ paper.
Then we have to proof that only a <. N constraints remain with sigma(a) = empty. Or Fail

\begin{theorem}{Completeness}
$\forall \tv{a} \in C_{input}: \sigma(\tv{a}) = \type{N}$, if there is a solution for $C_{input}$
and every type placeholder has an upper bound $\tv{a} \lessdot \type{N}$.
\end{theorem}
%Problem: We do not support multiple inheritance
\begin{proof}
  \begin{enumerate}[ncases]
    \item $\tv{a} \lessdot \type{N}$.
        Solution-Sub
    \item $\tv{a} \doteq \type{N}$.
    Solution
    \item $\tv{a} \lessdot \tv{b}$.
    There must be a $\tv{a} \lessdot \type{N}$
    \begin{subproof}
    $\sigma(\tv{a}) = \type{Object}$,
    $\sigma(\tv{b}) = \type{Object}$.
    \end{subproof}
    \item $\type{N} \lessdot \tv{a}$.
    \begin{subproof}
    $2$
    \end{subproof}
  \end{enumerate}
  And more text.
\end{proof}

\end{document}