From ff9637e884ac38f434c3d2aa261f53010a464756 Mon Sep 17 00:00:00 2001 From: Bartosz Taudul Date: Sat, 13 Jul 2019 20:24:58 +0200 Subject: [PATCH] Update DXT1 timings table. Clang is able to get much better times on ARM (around 430 us for both ARM32 and ARM64 NEON). The reference implementation is 1.13 ms on clang. --- manual/tracy.tex | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/manual/tracy.tex b/manual/tracy.tex index bbddce9f..67512423 100644 --- a/manual/tracy.tex +++ b/manual/tracy.tex @@ -459,17 +459,17 @@ To further reduce image data size, frame images are internally compressed using \centering \begin{tabular}[h]{c|c|c} \textbf{Implementation} & \textbf{Required define} & \textbf{Time} \\ \hline -x86 Reference & --- & 228 \si{\micro\second} \\ -x86 SSE4.1\textsuperscript{a} & \texttt{\_\_SSE4\_1\_\_} & 35.8 \si{\micro\second} \\ -x86 AVX2 & \texttt{\_\_AVX2\_\_} & 26.5 \si{\micro\second} \\ +x86 Reference & --- & 218.7 \si{\micro\second} \\ +x86 SSE4.1\textsuperscript{a} & \texttt{\_\_SSE4\_1\_\_} & 33.9 \si{\micro\second} \\ +x86 AVX2 & \texttt{\_\_AVX2\_\_} & 23.8 \si{\micro\second} \\ ARM Reference & --- & 1.23 \si{\milli\second} \\ ARM32 NEON\textsuperscript{b} & \texttt{\_\_ARM\_NEON} & 561 \si{\micro\second} \\ -ARM64 NEON & \texttt{\_\_ARM\_NEON} & 473 \si{\micro\second} +ARM64 NEON & \texttt{\_\_ARM\_NEON} & 469 \si{\micro\second} \end{tabular} \vspace{1em} \textsuperscript{a)} VEX encoding; \hspace{0.5em} \textsuperscript{b)} ARM32 NEON code compiled for ARM64 -\caption{Compression time of $320\times180$ image. x86: i7 8700K; ARM: ODROID-C2.} +\caption{Compression time of $320\times180$ image. x86: i7 8700K (MSVC); ARM: ODROID-C2 (gcc).} \label{EtcSimd} \end{table}