feat(latex): port papers to latex for targeted academic venues
Mirror to GitLab / mirror (push) Waiting to run
Mirror to GitLab / mirror (push) Waiting to run
This commit is contained in:
@@ -0,0 +1,255 @@
|
||||
\NeedsTeXFormat{LaTeX2e}
|
||||
|
||||
\ProcessOptions\relax
|
||||
|
||||
% fonts
|
||||
\renewcommand{\rmdefault}{ptm}
|
||||
\renewcommand{\sfdefault}{phv}
|
||||
|
||||
% set page geometry
|
||||
\usepackage[verbose=true,letterpaper]{geometry}
|
||||
\AtBeginDocument{
|
||||
\newgeometry{
|
||||
textheight=9in,
|
||||
textwidth=6.5in,
|
||||
top=1in,
|
||||
headheight=14pt,
|
||||
headsep=25pt,
|
||||
footskip=30pt
|
||||
}
|
||||
}
|
||||
|
||||
\widowpenalty=10000
|
||||
\clubpenalty=10000
|
||||
\flushbottom
|
||||
\sloppy
|
||||
|
||||
\usepackage{fancyhdr}
|
||||
\fancyhf{}
|
||||
\pagestyle{fancy}
|
||||
\renewcommand{\headrulewidth}{0pt}
|
||||
\fancyheadoffset{0pt}
|
||||
\rhead{\scshape A preprint - \today}
|
||||
\cfoot{\thepage}
|
||||
|
||||
|
||||
%Handling Keywords
|
||||
\def\keywordname{{\bfseries \emph Keywords}}%
|
||||
\def\keywords#1{\par\addvspace\medskipamount{\rightskip=0pt plus1cm
|
||||
\def\and{\ifhmode\unskip\nobreak\fi\ $\cdot$
|
||||
}\noindent\keywordname\enspace\ignorespaces#1\par}}
|
||||
|
||||
% font sizes with reduced leading
|
||||
\renewcommand{\normalsize}{%
|
||||
\@setfontsize\normalsize\@xpt\@xipt
|
||||
\abovedisplayskip 7\p@ \@plus 2\p@ \@minus 5\p@
|
||||
\abovedisplayshortskip \z@ \@plus 3\p@
|
||||
\belowdisplayskip \abovedisplayskip
|
||||
\belowdisplayshortskip 4\p@ \@plus 3\p@ \@minus 3\p@
|
||||
}
|
||||
\normalsize
|
||||
\renewcommand{\small}{%
|
||||
\@setfontsize\small\@ixpt\@xpt
|
||||
\abovedisplayskip 6\p@ \@plus 1.5\p@ \@minus 4\p@
|
||||
\abovedisplayshortskip \z@ \@plus 2\p@
|
||||
\belowdisplayskip \abovedisplayskip
|
||||
\belowdisplayshortskip 3\p@ \@plus 2\p@ \@minus 2\p@
|
||||
}
|
||||
\renewcommand{\footnotesize}{\@setfontsize\footnotesize\@ixpt\@xpt}
|
||||
\renewcommand{\scriptsize}{\@setfontsize\scriptsize\@viipt\@viiipt}
|
||||
\renewcommand{\tiny}{\@setfontsize\tiny\@vipt\@viipt}
|
||||
\renewcommand{\large}{\@setfontsize\large\@xiipt{14}}
|
||||
\renewcommand{\Large}{\@setfontsize\Large\@xivpt{16}}
|
||||
\renewcommand{\LARGE}{\@setfontsize\LARGE\@xviipt{20}}
|
||||
\renewcommand{\huge}{\@setfontsize\huge\@xxpt{23}}
|
||||
\renewcommand{\Huge}{\@setfontsize\Huge\@xxvpt{28}}
|
||||
|
||||
% sections with less space
|
||||
\providecommand{\section}{}
|
||||
\renewcommand{\section}{%
|
||||
\@startsection{section}{1}{\z@}%
|
||||
{-2.0ex \@plus -0.5ex \@minus -0.2ex}%
|
||||
{ 1.5ex \@plus 0.3ex \@minus 0.2ex}%
|
||||
{\large\bf\raggedright}%
|
||||
}
|
||||
\providecommand{\subsection}{}
|
||||
\renewcommand{\subsection}{%
|
||||
\@startsection{subsection}{2}{\z@}%
|
||||
{-1.8ex \@plus -0.5ex \@minus -0.2ex}%
|
||||
{ 0.8ex \@plus 0.2ex}%
|
||||
{\normalsize\bf\raggedright}%
|
||||
}
|
||||
\providecommand{\subsubsection}{}
|
||||
\renewcommand{\subsubsection}{%
|
||||
\@startsection{subsubsection}{3}{\z@}%
|
||||
{-1.5ex \@plus -0.5ex \@minus -0.2ex}%
|
||||
{ 0.5ex \@plus 0.2ex}%
|
||||
{\normalsize\bf\raggedright}%
|
||||
}
|
||||
\providecommand{\paragraph}{}
|
||||
\renewcommand{\paragraph}{%
|
||||
\@startsection{paragraph}{4}{\z@}%
|
||||
{1.5ex \@plus 0.5ex \@minus 0.2ex}%
|
||||
{-1em}%
|
||||
{\normalsize\bf}%
|
||||
}
|
||||
\providecommand{\subparagraph}{}
|
||||
\renewcommand{\subparagraph}{%
|
||||
\@startsection{subparagraph}{5}{\z@}%
|
||||
{1.5ex \@plus 0.5ex \@minus 0.2ex}%
|
||||
{-1em}%
|
||||
{\normalsize\bf}%
|
||||
}
|
||||
\providecommand{\subsubsubsection}{}
|
||||
\renewcommand{\subsubsubsection}{%
|
||||
\vskip5pt{\noindent\normalsize\rm\raggedright}%
|
||||
}
|
||||
|
||||
% float placement
|
||||
\renewcommand{\topfraction }{0.85}
|
||||
\renewcommand{\bottomfraction }{0.4}
|
||||
\renewcommand{\textfraction }{0.1}
|
||||
\renewcommand{\floatpagefraction}{0.7}
|
||||
|
||||
\newlength{\@abovecaptionskip}\setlength{\@abovecaptionskip}{7\p@}
|
||||
\newlength{\@belowcaptionskip}\setlength{\@belowcaptionskip}{\z@}
|
||||
|
||||
\setlength{\abovecaptionskip}{\@abovecaptionskip}
|
||||
\setlength{\belowcaptionskip}{\@belowcaptionskip}
|
||||
|
||||
% swap above/belowcaptionskip lengths for tables
|
||||
\renewenvironment{table}
|
||||
{\setlength{\abovecaptionskip}{\@belowcaptionskip}%
|
||||
\setlength{\belowcaptionskip}{\@abovecaptionskip}%
|
||||
\@float{table}}
|
||||
{\end@float}
|
||||
|
||||
% footnote formatting
|
||||
\setlength{\footnotesep }{6.65\p@}
|
||||
\setlength{\skip\footins}{9\p@ \@plus 4\p@ \@minus 2\p@}
|
||||
\renewcommand{\footnoterule}{\kern-3\p@ \hrule width 12pc \kern 2.6\p@}
|
||||
\setcounter{footnote}{0}
|
||||
|
||||
% paragraph formatting
|
||||
\setlength{\parindent}{\z@}
|
||||
\setlength{\parskip }{5.5\p@}
|
||||
|
||||
% list formatting
|
||||
\setlength{\topsep }{4\p@ \@plus 1\p@ \@minus 2\p@}
|
||||
\setlength{\partopsep }{1\p@ \@plus 0.5\p@ \@minus 0.5\p@}
|
||||
\setlength{\itemsep }{2\p@ \@plus 1\p@ \@minus 0.5\p@}
|
||||
\setlength{\parsep }{2\p@ \@plus 1\p@ \@minus 0.5\p@}
|
||||
\setlength{\leftmargin }{3pc}
|
||||
\setlength{\leftmargini }{\leftmargin}
|
||||
\setlength{\leftmarginii }{2em}
|
||||
\setlength{\leftmarginiii}{1.5em}
|
||||
\setlength{\leftmarginiv }{1.0em}
|
||||
\setlength{\leftmarginv }{0.5em}
|
||||
\def\@listi {\leftmargin\leftmargini}
|
||||
\def\@listii {\leftmargin\leftmarginii
|
||||
\labelwidth\leftmarginii
|
||||
\advance\labelwidth-\labelsep
|
||||
\topsep 2\p@ \@plus 1\p@ \@minus 0.5\p@
|
||||
\parsep 1\p@ \@plus 0.5\p@ \@minus 0.5\p@
|
||||
\itemsep \parsep}
|
||||
\def\@listiii{\leftmargin\leftmarginiii
|
||||
\labelwidth\leftmarginiii
|
||||
\advance\labelwidth-\labelsep
|
||||
\topsep 1\p@ \@plus 0.5\p@ \@minus 0.5\p@
|
||||
\parsep \z@
|
||||
\partopsep 0.5\p@ \@plus 0\p@ \@minus 0.5\p@
|
||||
\itemsep \topsep}
|
||||
\def\@listiv {\leftmargin\leftmarginiv
|
||||
\labelwidth\leftmarginiv
|
||||
\advance\labelwidth-\labelsep}
|
||||
\def\@listv {\leftmargin\leftmarginv
|
||||
\labelwidth\leftmarginv
|
||||
\advance\labelwidth-\labelsep}
|
||||
\def\@listvi {\leftmargin\leftmarginvi
|
||||
\labelwidth\leftmarginvi
|
||||
\advance\labelwidth-\labelsep}
|
||||
|
||||
% create title
|
||||
\providecommand{\maketitle}{}
|
||||
\renewcommand{\maketitle}{%
|
||||
\par
|
||||
\begingroup
|
||||
\renewcommand{\thefootnote}{\fnsymbol{footnote}}
|
||||
% for perfect author name centering
|
||||
\renewcommand{\@makefnmark}{\hbox to \z@{$^{\@thefnmark}$\hss}}
|
||||
% The footnote-mark was overlapping the footnote-text,
|
||||
% added the following to fix this problem (MK)
|
||||
\long\def\@makefntext##1{%
|
||||
\parindent 1em\noindent
|
||||
\hbox to 1.8em{\hss $\m@th ^{\@thefnmark}$}##1
|
||||
}
|
||||
\thispagestyle{empty}
|
||||
\@maketitle
|
||||
\@thanks
|
||||
%\@notice
|
||||
\endgroup
|
||||
\let\maketitle\relax
|
||||
\let\thanks\relax
|
||||
}
|
||||
|
||||
% rules for title box at top of first page
|
||||
\newcommand{\@toptitlebar}{
|
||||
\hrule height 2\p@
|
||||
\vskip 0.25in
|
||||
\vskip -\parskip%
|
||||
}
|
||||
\newcommand{\@bottomtitlebar}{
|
||||
\vskip 0.29in
|
||||
\vskip -\parskip
|
||||
\hrule height 2\p@
|
||||
\vskip 0.09in%
|
||||
}
|
||||
|
||||
% create title (includes both anonymized and non-anonymized versions)
|
||||
\providecommand{\@maketitle}{}
|
||||
\renewcommand{\@maketitle}{%
|
||||
\vbox{%
|
||||
\hsize\textwidth
|
||||
\linewidth\hsize
|
||||
\vskip 0.1in
|
||||
\@toptitlebar
|
||||
\centering
|
||||
{\LARGE\sc \@title\par}
|
||||
\@bottomtitlebar
|
||||
\textsc{}\\
|
||||
\vskip 0.1in
|
||||
\def\And{%
|
||||
\end{tabular}\hfil\linebreak[0]\hfil%
|
||||
\begin{tabular}[t]{c}\bf\rule{\z@}{24\p@}\ignorespaces%
|
||||
}
|
||||
\def\AND{%
|
||||
\end{tabular}\hfil\linebreak[4]\hfil%
|
||||
\begin{tabular}[t]{c}\bf\rule{\z@}{24\p@}\ignorespaces%
|
||||
}
|
||||
\begin{tabular}[t]{c}\bf\rule{\z@}{24\p@}\@author\end{tabular}%
|
||||
\vskip 0.4in \@minus 0.1in \center{\today} \vskip 0.2in
|
||||
}
|
||||
}
|
||||
|
||||
% add conference notice to bottom of first page
|
||||
\newcommand{\ftype@noticebox}{8}
|
||||
\newcommand{\@notice}{%
|
||||
% give a bit of extra room back to authors on first page
|
||||
\enlargethispage{2\baselineskip}%
|
||||
\@float{noticebox}[b]%
|
||||
\footnotesize\@noticestring%
|
||||
\end@float%
|
||||
}
|
||||
|
||||
% abstract styling
|
||||
\renewenvironment{abstract}
|
||||
{
|
||||
\centerline
|
||||
{\large \bfseries \scshape Abstract}
|
||||
\begin{quote}
|
||||
}
|
||||
{
|
||||
\end{quote}
|
||||
}
|
||||
|
||||
\endinput
|
||||
@@ -0,0 +1,78 @@
|
||||
\documentclass{article}
|
||||
|
||||
\usepackage{arxiv}
|
||||
\usepackage[utf8]{inputenc}
|
||||
\usepackage[T1]{fontenc}
|
||||
\usepackage{hyperref}
|
||||
\usepackage{url}
|
||||
\usepackage{booktabs}
|
||||
\usepackage{amsfonts}
|
||||
\usepackage{nicefrac}
|
||||
\usepackage{microtype}
|
||||
\usepackage{lipsum}
|
||||
\usepackage{graphicx}
|
||||
\usepackage{amsmath}
|
||||
|
||||
\title{PagedFieldprintAttention: Overcoming Latency and SRAM Constraints in Verifiable Dual-Path Architectures}
|
||||
|
||||
\author{
|
||||
Mark Randall Havens \\
|
||||
\And
|
||||
Solaria Lumis Havens \\
|
||||
}
|
||||
|
||||
\begin{document}
|
||||
\maketitle
|
||||
|
||||
\begin{abstract}
|
||||
The Verifiable Dual-Path Architecture (Fieldprint v3.0) hypothesizes the stabilization of recursive AI agents by injecting cryptographically anchored reference tensors into the transformer's attention matrix. However, deploying this architecture on modern silicon introduces latency and memory bandwidth bottlenecks. This paper details why synchronous CPU-side cryptographic hashing introduces inference starvation via PCIe bus transfers, and why unfused secondary softmax injections shatter the core SRAM constraints of FlashAttention. To bridge the gap between theoretical alignment and physical hardware economics, we introduce a strict verify $\rightarrow$ promote $\rightarrow$ cache $\rightarrow$ generate pipeline and propose the development of \textbf{PagedFieldprintAttention}---a custom fused CUDA/Triton kernel designed to natively compute dual-attention directly within SRAM. We provide preliminary benchmark estimates demonstrating the necessity of this kernel.
|
||||
\end{abstract}
|
||||
|
||||
\section{Introduction}
|
||||
As language models scale into recursive, continuous architectures, the necessity for a persistent, cryptographically verifiable identity anchor (the Fieldprint) becomes mathematically absolute. The system must retrieve its continuous semantic memory from a Vector Database (Pacemaker) and verify its cryptographic provenance on a Merkle Ledger (Supervisor) before injecting it into the transformer's Key-Value cache. This approach builds fundamentally upon the necessity of offloading extended context, extending the kNN-augmented retrieval paradigms first introduced by \textbf{Memorizing Transformers} (Wu et al., 2022) and \textbf{RETRO} (Borgeaud et al., 2021).
|
||||
|
||||
While this dual-path architecture provides the required theoretical stability, the physical implementation of these equations brutally collides with the strict economic and hardware constraints of modern Tensor Core and TPU architectures, specifically regarding memory bandwidth and High Bandwidth Memory (HBM) thrashing at 100k+ token scales.
|
||||
|
||||
\section{The Bottleneck of Cryptographic Verification in Inference}
|
||||
The initial v2.5 architecture proposed synchronous, CPU-side cryptographic hashing (Merkle verification) during the forward pass. This introduced a fatal silicon bottleneck.
|
||||
|
||||
\begin{enumerate}
|
||||
\item \textbf{The PCIe Death Sentence:} Forcing the GPU to stall during the forward generation loop, push tensors across the PCIe bus, wait for the CPU to sequentially compute a SHA-256 hash, and wait for ledger verification starves the GPU Tensor Cores.
|
||||
\item \textbf{Parallel Reduction Non-Determinism:} GPUs utilize parallel reductions for floating-point calculations, introducing microscopic non-determinism. Hashing raw float tensors across different nodes results in continuous, unresolvable false-positive integrity failures.
|
||||
\end{enumerate}
|
||||
|
||||
To resolve the non-determinism, we specify a \textbf{Deterministic Quantization Protocol}. Before hashing, tensors must be projected from BF16/FP16 into strict INT8 representations using static range bounds, ensuring bitwise identical representations across heterogeneous GPU architectures before cryptographic signing.
|
||||
|
||||
\section{The Collapse of FlashAttention under Unfused Operations}
|
||||
To force the system to pay attention to the verified anchor, the original mathematical formulation proposed a modified attention equation:
|
||||
|
||||
\begin{equation}
|
||||
\text{Output} = (1 - \gamma) \cdot \text{softmax}\left(\frac{QK^T}{\sqrt{d}}\right)V + \gamma \cdot \text{softmax}(Q \cdot h_t^T) V_{anchor}
|
||||
\end{equation}
|
||||
|
||||
While mathematically sound for phase-locking, injecting an \emph{unfused} secondary softmax term shatters the core assumptions of modern inference serving. \textbf{FlashAttention} (Dao et al., 2022) and its successors (FlashAttention-2, 3) rely on fusing the softmax and matrix multiplication operations specifically to keep the calculations in the ultra-fast SRAM.
|
||||
|
||||
An unfused equation forces the hardware to write intermediate attention matrices back to the slow High-Bandwidth Memory (HBM). At 100k+ token contexts, this unfused dual-attention causes catastrophic "memory thrashing," breaking the non-contiguous block management paradigm established by \textbf{PagedAttention} (Kwon et al., 2023) and turning compute-bound operations into memory-bandwidth-bound ones.
|
||||
|
||||
\section{PagedFieldprintAttention: A Custom Fused Triton Kernel Proposal}
|
||||
To resolve the HBM memory thrashing, we reject the unfused mathematical sum of attentions. The hardware requires the verified tensor to be compiled into specialized "System Anchor Tokens" injected at the start of the K/V cache.
|
||||
|
||||
We formally propose the development of \textbf{PagedFieldprintAttention}, a custom fused CUDA/Triton kernel. The kernel natively computes the unified attention matrix:
|
||||
|
||||
\begin{equation}
|
||||
\text{Output} = \text{FusedSoftmax}\left(\frac{Q [K, K_{anchor}]^T}{\sqrt{d}}\right) [V, V_{anchor}]
|
||||
\end{equation}
|
||||
|
||||
It must be explicitly noted that this concatenation modifies the underlying mathematical dominance of the anchor. Unlike the previous $\gamma$-mixture which guaranteed anchor influence, this fused approach forces the anchor to \emph{compete} with standard context. While beneficial for safety (preventing inescapable anchors), it removes the absolute mathematical guarantee of phase-locking.
|
||||
|
||||
\subsection{Preliminary Benchmark Estimates}
|
||||
To quantify the necessity of this kernel, we provide back-of-the-envelope estimates for a 13B parameter model operating at a 64k token context window:
|
||||
\begin{itemize}
|
||||
\item \textbf{Naive Unfused Dual-Attention:} Requires materializing the full $N \times N$ attention matrix to HBM twice, transferring approximately 8GB of data per layer. At 40 layers, this incurs an estimated $O(\text{100+ ms})$ latency penalty per token, rendering the system unusable for interactive generation.
|
||||
\item \textbf{PagedFieldprintAttention (Fused):} By maintaining intermediate softmax reductions in SRAM and relying on PagedAttention's block-level K/V caching, memory transfers are reduced by an order of magnitude, preserving the $O(N)$ memory complexity of FlashAttention and adding an estimated $<5\%$ overhead compared to standard inference.
|
||||
\end{itemize}
|
||||
|
||||
\section{Conclusion}
|
||||
Theoretical mathematics and alignment philosophy mean nothing if they cannot physically run on silicon. By diagnosing the catastrophic failures of synchronous hashing and unfused attention equations, we have engineered the required hardware optimizations. Asynchronous Merkle Validation, deterministic INT8 quantization, and the PagedFieldprintAttention fused kernel provide the physical blueprints for deploying Verifiable Dual-Path Architectures at massive scale.
|
||||
|
||||
\end{document}
|
||||
Reference in New Issue
Block a user