TURBO-565-浮雕为3D算子

2026-04-20 09:56:46 +08:00
parent 4390ad1e9f
commit 78c21c21f0
24 changed files with 79115 additions and 42 deletions
@@ -32,7 +32,9 @@ public class SuperResolutionProcessor : ImageProcessorBase

    // 会话缓存，避免重复加载
    private static InferenceSession? _cachedSession;
+
    private static string _cachedModelKey = string.Empty;
+    private static readonly object _sessionLock = new();

    public SuperResolutionProcessor()
    {
@@ -89,32 +91,33 @@ public class SuperResolutionProcessor : ImageProcessorBase
        // 加载或复用会话
        string modelKey = $"{model}_{scale}";
        InferenceSession session;
-        if (_cachedModelKey == modelKey && _cachedSession != null)
+        lock (_sessionLock)
        {
-            session = _cachedSession;
-            _logger.Debug("Reusing cached session: {ModelKey}", modelKey);
-        }
-        else
-        {
-            _cachedSession?.Dispose();
-            var options = new SessionOptions();
-            options.GraphOptimizationLevel = GraphOptimizationLevel.ORT_ENABLE_ALL;
-            try
+            if (_cachedModelKey == modelKey && _cachedSession != null)
            {
-                options.AppendExecutionProvider_CUDA(0);
-                _logger.Information("Using CUDA GPU for inference");
+                session = _cachedSession;
+                _logger.Debug("Reusing cached session: {ModelKey}", modelKey);
            }
-            catch
+            else
            {
-                _logger.Warning("CUDA not available, falling back to CPU");
+                _cachedSession?.Dispose();
+                var options = new SessionOptions();
+                options.GraphOptimizationLevel = GraphOptimizationLevel.ORT_ENABLE_ALL;
+                bool cudaEnabled = false;
+                try
+                {
+                    options.AppendExecutionProvider_CUDA(0);
+                    cudaEnabled = true;
+                }
+                catch (Exception ex)
+                {
+                    _logger.Warning(ex, "CUDA EP unavailable (check CUDA/cuDNN version match), falling back to CPU");
+                }
+                session = new InferenceSession(modelPath, options);
+                _cachedSession = session;
+                _cachedModelKey = modelKey;
+                _logger.Information("Loaded ONNX model: {ModelPath}, CUDA={CudaEnabled}", modelPath, cudaEnabled);
            }
-            session = new InferenceSession(modelPath, options);
-            _cachedSession = session;
-            _cachedModelKey = modelKey;
-            // 记录实际使用的 Execution Provider
-            var providers = session.ModelMetadata?.CustomMetadataMap;
-            _logger.Information("Loaded ONNX model: {ModelPath}, Providers: {Providers}",
-                modelPath, string.Join(", ", session.GetType().Name));
        }

        int h = inputImage.Height;
@@ -178,7 +181,7 @@ public class SuperResolutionProcessor : ImageProcessorBase
                for (int x = 0; x < w; x++)
                {
                    int px = rowOffset + x * 3;
-                    buf[px]     = imgData[y, x, 0];
+                    buf[px] = imgData[y, x, 0];
                    buf[px + 1] = imgData[y, x, 1];
                    buf[px + 2] = imgData[y, x, 2];
                }
@@ -193,44 +196,42 @@ public class SuperResolutionProcessor : ImageProcessorBase
        };

        using var results = session.Run(inputs);
-        var outputTensor = results.First().AsTensor<float>();
+        var outputTensor = (DenseTensor<float>)results.First().AsTensor<float>();

-        // 输出 shape: [1, C, H*scale, W*scale] (NCHW，模型输出经过 Transpose)
+        // 输出 shape: [1, C, H*scale, W*scale] (NCHW)
        var shape = outputTensor.Dimensions;
        int outC = shape[1];
        int outH = shape[2];
        int outW = shape[3];
+        var outBuf = outputTensor.Buffer.ToArray(); // Span 不能跨 lambda 捕获，转为数组
+
+        Image<Gray, byte> result = new(outW, outH);
+        var outData = result.Data;
+        int planeSize = outH * outW;

-        // 转换为灰度图像
-        // 使用 Parallel.For + 直接内存操作
-        Image<Gray, byte> result;
        if (outC == 1)
        {
-            // FSRCNN: 单通道输出 [1, 1, outH, outW]
-            result = new Image<Gray, byte>(outW, outH);
-            var outData = result.Data;
+            // FSRCNN: [1, 1, outH, outW]
            Parallel.For(0, outH, y =>
            {
+                int rowOffset = y * outW;
                for (int x = 0; x < outW; x++)
-                    outData[y, x, 0] = (byte)Math.Clamp((int)outputTensor[0, 0, y, x], 0, 255);
+                    outData[y, x, 0] = (byte)Math.Clamp((int)outBuf[rowOffset + x], 0, 255);
            });
        }
        else
        {
-            // EDSR: 三通道输出 [1, 3, outH, outW] → 灰度
-            // 直接计算灰度值，跳过中间 BGR 图像分配
-            result = new Image<Gray, byte>(outW, outH);
-            var outData = result.Data;
+            // EDSR: [1, 3, outH, outW] → 灰度，BT.601
            Parallel.For(0, outH, y =>
            {
+                int rowOffset = y * outW;
                for (int x = 0; x < outW; x++)
                {
-                    float b = outputTensor[0, 0, y, x];
-                    float g = outputTensor[0, 1, y, x];
-                    float r = outputTensor[0, 2, y, x];
-                    // BT.601 灰度公式: 0.299*R + 0.587*G + 0.114*B
-                    int gray = (int)(0.299f * r + 0.587f * g + 0.114f * b);
-                    outData[y, x, 0] = (byte)Math.Clamp(gray, 0, 255);
+                    int i = rowOffset + x;
+                    float b = outBuf[i];
+                    float g = outBuf[planeSize + i];
+                    float r = outBuf[planeSize * 2 + i];
+                    outData[y, x, 0] = (byte)Math.Clamp((int)(0.299f * r + 0.587f * g + 0.114f * b), 0, 255);
                }
            });
        }