Skip to content

Commit

Permalink
Merge pull request #1303 from SixLabors/js/jpeg-decode-perf
Browse files Browse the repository at this point in the history
Optimize jpeg decoder based on traces.
  • Loading branch information
JimBobSquarePants authored Aug 4, 2020
2 parents 521fae3 + 1eaaf1a commit a31a541
Show file tree
Hide file tree
Showing 4 changed files with 61 additions and 80 deletions.
31 changes: 23 additions & 8 deletions src/ImageSharp/Common/Helpers/SimdUtils.cs
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,10 @@
using System.Numerics;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
#if SUPPORTS_RUNTIME_INTRINSICS
using System.Runtime.Intrinsics;
using System.Runtime.Intrinsics.X86;
#endif

namespace SixLabors.ImageSharp
{
Expand All @@ -28,7 +32,7 @@ internal static partial class SimdUtils
[MethodImpl(MethodImplOptions.AggressiveInlining)]
internal static Vector4 PseudoRound(this Vector4 v)
{
var sign = Vector4Utilities.FastClamp(v, new Vector4(-1), new Vector4(1));
Vector4 sign = Vector4Utilities.FastClamp(v, new Vector4(-1), new Vector4(1));

return v + (sign * 0.5f);
}
Expand All @@ -44,13 +48,24 @@ internal static Vector4 PseudoRound(this Vector4 v)
[MethodImpl(MethodImplOptions.AggressiveInlining)]
internal static Vector<float> FastRound(this Vector<float> v)
{
var magic0 = new Vector<int>(int.MinValue); // 0x80000000
Vector<float> sgn0 = Vector.AsVectorSingle(magic0);
Vector<float> and0 = Vector.BitwiseAnd(sgn0, v);
Vector<float> or0 = Vector.BitwiseOr(and0, new Vector<float>(8388608.0f));
Vector<float> add0 = Vector.Add(v, or0);
Vector<float> sub0 = Vector.Subtract(add0, or0);
return sub0;
#if SUPPORTS_RUNTIME_INTRINSICS

if (Avx2.IsSupported)
{
ref Vector256<float> v256 = ref Unsafe.As<Vector<float>, Vector256<float>>(ref v);
Vector256<float> vRound = Avx.RoundToNearestInteger(v256);
return Unsafe.As<Vector256<float>, Vector<float>>(ref vRound);
}
else
#endif
{
var magic0 = new Vector<int>(int.MinValue); // 0x80000000
var sgn0 = Vector.AsVectorSingle(magic0);
var and0 = Vector.BitwiseAnd(sgn0, v);
var or0 = Vector.BitwiseOr(and0, new Vector<float>(8388608.0f));
var add0 = Vector.Add(v, or0);
return Vector.Subtract(add0, or0);
}
}

/// <summary>
Expand Down
23 changes: 11 additions & 12 deletions src/ImageSharp/Formats/Jpeg/Components/Decoder/HuffmanScanBuffer.cs
Original file line number Diff line number Diff line change
Expand Up @@ -93,25 +93,24 @@ public void FillBuffer()
public unsafe int DecodeHuffman(ref HuffmanTable h)
{
this.CheckBits();
int v = this.PeekBits(JpegConstants.Huffman.LookupBits);
int symbol = h.LookaheadValue[v];
int size = h.LookaheadSize[v];
int index = this.PeekBits(JpegConstants.Huffman.LookupBits);
int size = h.LookaheadSize[index];

if (size == JpegConstants.Huffman.SlowBits)
if (size < JpegConstants.Huffman.SlowBits)
{
ulong x = this.data << (JpegConstants.Huffman.RegisterSize - this.remainingBits);
while (x > h.MaxCode[size])
{
size++;
}
this.remainingBits -= size;
return h.LookaheadValue[index];
}

v = (int)(x >> (JpegConstants.Huffman.RegisterSize - size));
symbol = h.Values[(h.ValOffset[size] + v) & 0xFF];
ulong x = this.data << (JpegConstants.Huffman.RegisterSize - this.remainingBits);
while (x > h.MaxCode[size])
{
size++;
}

this.remainingBits -= size;

return symbol;
return h.Values[(h.ValOffset[size] + (int)(x >> (JpegConstants.Huffman.RegisterSize - size))) & 0xFF];
}

[MethodImpl(InliningOptions.ShortMethod)]
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
// Copyright (c) Six Labors.
// Licensed under the Apache License, Version 2.0.

using System.Drawing;
using System.IO;
using BenchmarkDotNet.Attributes;

Expand All @@ -15,7 +14,7 @@ namespace SixLabors.ImageSharp.Benchmarks.Codecs.Jpeg
[Config(typeof(Config.ShortClr))]
public class DecodeJpegParseStreamOnly
{
[Params(TestImages.Jpeg.Baseline.Jpeg420Exif)]
[Params(TestImages.Jpeg.BenchmarkSuite.Lake_Small444YCbCr)]
public string TestImage { get; set; }

private string TestImageFullPath => Path.Combine(TestEnvironment.InputImagesDirectoryFullPath, this.TestImage);
Expand All @@ -37,7 +36,7 @@ public SDSize JpegSystemDrawing()
}

[Benchmark(Description = "JpegDecoderCore.ParseStream")]
public void ParseStreamPdfJs()
public void ParseStream()
{
using var memoryStream = new MemoryStream(this.jpegBytes);
using var bufferedStream = new BufferedReadStream(Configuration.Default, memoryStream);
Expand All @@ -46,22 +45,18 @@ public void ParseStreamPdfJs()
decoder.ParseStream(bufferedStream);
decoder.Dispose();
}

// RESULTS (2019 April 23):
//
// BenchmarkDotNet=v0.11.3, OS=Windows 10.0.17763.437 (1809/October2018Update/Redstone5)
// Intel Core i7-6600U CPU 2.60GHz (Skylake), 1 CPU, 4 logical and 2 physical cores
// .NET Core SDK=2.2.202
// [Host] : .NET Core 2.1.9 (CoreCLR 4.6.27414.06, CoreFX 4.6.27415.01), 64bit RyuJIT
// Clr : .NET Framework 4.7.2 (CLR 4.0.30319.42000), 64bit RyuJIT-v4.7.3362.0
// Core : .NET Core 2.1.9 (CoreCLR 4.6.27414.06, CoreFX 4.6.27415.01), 64bit RyuJIT
//
// | Method | Job | Runtime | TestImage | Mean | Error | StdDev | Ratio | RatioSD | Gen 0 | Gen 1 | Gen 2 | Allocated |
// |---------------------------- |----- |-------- |--------------------- |---------:|---------:|----------:|------:|--------:|---------:|------:|------:|----------:|
// | 'System.Drawing FULL' | Clr | Clr | Jpg/b(...)f.jpg [28] | 18.69 ms | 8.273 ms | 0.4535 ms | 1.00 | 0.00 | 343.7500 | - | - | 757.89 KB |
// | JpegDecoderCore.ParseStream | Clr | Clr | Jpg/b(...)f.jpg [28] | 15.76 ms | 4.266 ms | 0.2339 ms | 0.84 | 0.03 | - | - | - | 11.83 KB |
// | | | | | | | | | | | | | |
// | 'System.Drawing FULL' | Core | Core | Jpg/b(...)f.jpg [28] | 17.68 ms | 2.711 ms | 0.1486 ms | 1.00 | 0.00 | 343.7500 | - | - | 757.04 KB |
// | JpegDecoderCore.ParseStream | Core | Core | Jpg/b(...)f.jpg [28] | 14.27 ms | 3.671 ms | 0.2012 ms | 0.81 | 0.00 | - | - | - | 11.76 KB |
}

/*
| Method | Job | Runtime | TestImage | Mean | Error | StdDev | Ratio | Gen 0 | Gen 1 | Gen 2 | Allocated |
|---------------------------- |----------- |-------------- |--------------------- |---------:|----------:|----------:|------:|--------:|------:|------:|----------:|
| 'System.Drawing FULL' | Job-HITJFX | .NET 4.7.2 | Jpg/b(...)e.jpg [21] | 5.828 ms | 0.9885 ms | 0.0542 ms | 1.00 | 46.8750 | - | - | 211566 B |
| JpegDecoderCore.ParseStream | Job-HITJFX | .NET 4.7.2 | Jpg/b(...)e.jpg [21] | 5.833 ms | 0.2923 ms | 0.0160 ms | 1.00 | - | - | - | 12416 B |
| | | | | | | | | | | | |
| 'System.Drawing FULL' | Job-WPSKZD | .NET Core 2.1 | Jpg/b(...)e.jpg [21] | 6.018 ms | 2.1374 ms | 0.1172 ms | 1.00 | 46.8750 | - | - | 210768 B |
| JpegDecoderCore.ParseStream | Job-WPSKZD | .NET Core 2.1 | Jpg/b(...)e.jpg [21] | 4.382 ms | 0.9009 ms | 0.0494 ms | 0.73 | - | - | - | 12360 B |
| | | | | | | | | | | | |
| 'System.Drawing FULL' | Job-ZLSNRP | .NET Core 3.1 | Jpg/b(...)e.jpg [21] | 5.714 ms | 0.4078 ms | 0.0224 ms | 1.00 | - | - | - | 176 B |
| JpegDecoderCore.ParseStream | Job-ZLSNRP | .NET Core 3.1 | Jpg/b(...)e.jpg [21] | 4.239 ms | 1.0943 ms | 0.0600 ms | 0.74 | - | - | - | 12406 B |
*/
}
52 changes: 12 additions & 40 deletions tests/ImageSharp.Benchmarks/Codecs/Jpeg/DecodeJpeg_ImageSpecific.cs
Original file line number Diff line number Diff line change
Expand Up @@ -90,45 +90,17 @@ public Size JpegImageSharp()
}
}

// RESULTS (2018 November 4):
//
// BenchmarkDotNet=v0.10.14, OS=Windows 10.0.17134
// Intel Core i7-7700HQ CPU 2.80GHz (Kaby Lake), 1 CPU, 8 logical and 4 physical cores
// Frequency=2742191 Hz, Resolution=364.6719 ns, Timer=TSC
// .NET Core SDK=2.1.403
// [Host] : .NET Core 2.1.5 (CoreCLR 4.6.26919.02, CoreFX 4.6.26919.02), 64bit RyuJIT
//
// Method | TestImage | Mean | Error | StdDev | Scaled | ScaledSD | Gen 0 | Gen 1 | Gen 2 | Allocated |
// ------------------------------- |-------------------------------------------- |-----------:|-----------:|----------:|-------:|---------:|----------:|---------:|---------:|------------:|
// 'Decode Jpeg - System.Drawing' | Jpg/baseline/Lake.jpg | 6.117 ms | 0.3923 ms | 0.0222 ms | 1.00 | 0.00 | 62.5000 | - | - | 205.83 KB |
// 'Decode Jpeg - ImageSharp' | Jpg/baseline/Lake.jpg | 18.126 ms | 0.6023 ms | 0.0340 ms | 2.96 | 0.01 | - | - | - | 19.97 KB |
// | | | | | | | | | | |
// 'Decode Jpeg - System.Drawing' | Jpg/baseline/jpeg420exif.jpg | 17.063 ms | 2.6096 ms | 0.1474 ms | 1.00 | 0.00 | 218.7500 | - | - | 757.04 KB |
// 'Decode Jpeg - ImageSharp' | Jpg/baseline/jpeg420exif.jpg | 41.366 ms | 1.0115 ms | 0.0572 ms | 2.42 | 0.02 | - | - | - | 21.94 KB |
// | | | | | | | | | | |
// 'Decode Jpeg - System.Drawing' | Jpg/issues/Issue518-Bad-RST-Progressive.jpg | 428.282 ms | 94.9163 ms | 5.3629 ms | 1.00 | 0.00 | 2375.0000 | - | - | 7403.76 KB |
// 'Decode Jpeg - ImageSharp' | Jpg/issues/Issue518-Bad-RST-Progressive.jpg | 386.698 ms | 33.0065 ms | 1.8649 ms | 0.90 | 0.01 | 125.0000 | 125.0000 | 125.0000 | 35186.97 KB |
// | | | | | | | | | | |
// 'Decode Jpeg - System.Drawing' | Jpg/issues/issue750-exif-tranform.jpg | 95.192 ms | 3.1762 ms | 0.1795 ms | 1.00 | 0.00 | 1750.0000 | - | - | 5492.63 KB |
// 'Decode Jpeg - ImageSharp' | Jpg/issues/issue750-exif-tranform.jpg | 230.158 ms | 48.8128 ms | 2.7580 ms | 2.42 | 0.02 | 312.5000 | 312.5000 | 312.5000 | 58834.66 KB |

// RESULTS (2019 April 23):
//
// BenchmarkDotNet=v0.11.5, OS=Windows 10.0.17763.437 (1809/October2018Update/Redstone5)
// Intel Core i7-6600U CPU 2.60GHz (Skylake), 1 CPU, 4 logical and 2 physical cores
// .NET Core SDK=2.2.202
// [Host] : .NET Core 2.1.9 (CoreCLR 4.6.27414.06, CoreFX 4.6.27415.01), 64bit RyuJIT
// Core : .NET Core 2.1.9 (CoreCLR 4.6.27414.06, CoreFX 4.6.27415.01), 64bit RyuJIT
//
// | Method | TestImage | Mean | Error | StdDev | Ratio | RatioSD | Gen 0 | Gen 1 | Gen 2 | Allocated |
// |------------------------------- |--------------------- |-----------:|-----------:|-----------:|------:|--------:|----------:|------:|------:|------------:|
// | 'Decode Jpeg - System.Drawing' | Jpg/b(...)e.jpg [21] | 6.957 ms | 9.618 ms | 0.5272 ms | 1.00 | 0.00 | 93.7500 | - | - | 205.83 KB |
// | 'Decode Jpeg - ImageSharp' | Jpg/b(...)e.jpg [21] | 18.348 ms | 8.876 ms | 0.4865 ms | 2.65 | 0.23 | - | - | - | 14.49 KB |
// | | | | | | | | | | | |
// | 'Decode Jpeg - System.Drawing' | Jpg/b(...)f.jpg [28] | 18.687 ms | 11.632 ms | 0.6376 ms | 1.00 | 0.00 | 343.7500 | - | - | 757.04 KB |
// | 'Decode Jpeg - ImageSharp' | Jpg/b(...)f.jpg [28] | 41.990 ms | 25.514 ms | 1.3985 ms | 2.25 | 0.10 | - | - | - | 15.48 KB |
// | | | | | | | | | | | |
// | 'Decode Jpeg - System.Drawing' | Jpg/i(...)e.jpg [43] | 477.265 ms | 732.126 ms | 40.1303 ms | 1.00 | 0.00 | 3000.0000 | - | - | 7403.76 KB |
// | 'Decode Jpeg - ImageSharp' | Jpg/i(...)e.jpg [43] | 348.545 ms | 91.480 ms | 5.0143 ms | 0.73 | 0.06 | - | - | - | 35177.21 KB |
/*
| Method | TestImage | Mean | Error | StdDev | Ratio | RatioSD | Gen 0 | Gen 1 | Gen 2 | Allocated |
|------------------------------- |--------------------- |-----------:|------------:|-----------:|------:|--------:|------:|------:|------:|-----------:|
| 'Decode Jpeg - System.Drawing' | Jpg/b(...)e.jpg [21] | 5.122 ms | 1.3978 ms | 0.0766 ms | 1.00 | 0.00 | - | - | - | 176 B |
| 'Decode Jpeg - ImageSharp' | Jpg/b(...)e.jpg [21] | 11.991 ms | 0.2514 ms | 0.0138 ms | 2.34 | 0.03 | - | - | - | 15816 B |
| | | | | | | | | | | |
| 'Decode Jpeg - System.Drawing' | Jpg/b(...)f.jpg [28] | 14.943 ms | 1.8410 ms | 0.1009 ms | 1.00 | 0.00 | - | - | - | 176 B |
| 'Decode Jpeg - ImageSharp' | Jpg/b(...)f.jpg [28] | 29.759 ms | 1.5452 ms | 0.0847 ms | 1.99 | 0.01 | - | - | - | 16824 B |
| | | | | | | | | | | |
| 'Decode Jpeg - System.Drawing' | Jpg/i(...)e.jpg [43] | 388.229 ms | 382.8946 ms | 20.9877 ms | 1.00 | 0.00 | - | - | - | 216 B |
| 'Decode Jpeg - ImageSharp' | Jpg/i(...)e.jpg [43] | 276.490 ms | 195.5104 ms | 10.7166 ms | 0.71 | 0.01 | - | - | - | 36022368 B |
*/
}
}

0 comments on commit a31a541

Please sign in to comment.