C产品在市场上有哪些独特优势?

2026-05-17 09:241阅读0评论SEO资讯
  • 内容介绍
  • 文章标签
  • 相关推荐

本文共计1432个文字,预计阅读时间需要6分钟。

我们现在想实现一些简单的批量运算,比如累乘积、累加求和。下面是一个简单的Java类示例:

javapublic class NormalCalc { public static double Multiply(double[] nums) { double result=1.0d; for (int i=0; i

public static double Sum(double[] nums) { double total=0.0d; for (int i=0; i

我们现在想做一些简单的批量运算,比如累乘得积,累加求和

public class NormalCalc { public static double Multiply(double[] nums) { double result = 1.0d; for (int i = 0; i < nums.Length; i++) { result *= nums[i]; } return result; } public static double AddTotal(double[] nums) { double result = 0.0d; for (int i = 0; i < nums.Length; i++) { result += nums[i]; } return result; } }

这种批量运算不正是指令集的优势么,那就试试吧

C#中可以使用Vector类来做宽位运算,我这里有avx2指令集,也就是256位,double是64位的,那就有4个,如果做int运算自然就有8个

在这里就是4个4个放到一个Vector里一起做乘法运算,最后把4拷贝到数组中互乘,再把多余的乘完就好了,乘法嘛,用1作为种子

public unsafe static double Multiply(double[] nums) { int vectorSize = Vector<double>.Count; var accVector = Vector<double>.One; int i; var array = nums; double result = 1.0d; fixed (double* p = array) { for (i = 0; i <= array.Length - vectorSize; i += vectorSize) { //var v = new Vector<double>(array, i); var v = Unsafe.Read<Vector<double>>(p + i); accVector = Vector.Multiply(accVector, v); } } var tempArray = new double[Vector<double>.Count]; accVector.CopyTo(tempArray); for (int j = 0; j < tempArray.Length; j++) { result = result * tempArray[j]; } for (; i < array.Length; i++) { result *= array[i]; } return result; }

下一个问题就是,我总不见得又得每种数据类型都写一遍吧,咱有没有办法用C#的各种新特性写成泛型?咱有Span有预览特性INumber,试了下还真可以

用new Vector构造,泛型T用INumber约束就有了T.One来表示数字1,并且能随便的做乘法运算了

public static T Multiply<T>(T[] nums) where T : struct, INumber<T> { int vectorSize = Vector<T>.Count; var accVector = Vector<T>.One; int i; var array = nums; T result = T.One; for (i = 0; i <= array.Length - vectorSize; i += vectorSize) { var v = new Vector<T>(array, i); accVector = Vector.Multiply(accVector, v); } var tempArray = new T[Vector<T>.Count]; accVector.CopyTo(tempArray); for (int j = 0; j < tempArray.Length; j++) { result = result * tempArray[j]; } for (; i < array.Length; i++) { result *= array[i]; } return result; }

理论上Span速度不会比指针快,new Vector不会比Unsafe.Read快,但是差不了太多,就能写成泛型方法

来测试一下速度:

//生成运算数组 double[] nums = new double[100000]; Random random = new Random(); for (int i = 0; i < nums.Length; i++) { nums[i] = random.NextDouble() * 2.723; } //普通连乘 Stopwatch stopwatch = Stopwatch.StartNew(); for (int i = 0; i < 10000; i++) { NormalCalc.Multiply(nums); } stopwatch.Stop(); Console.WriteLine(stopwatch.ElapsedMilliseconds); //Vector stopwatch = Stopwatch.StartNew(); for (int i = 0; i < 10000; i++) { SIMD_Calc.Multiply(nums); } stopwatch.Stop(); Console.WriteLine(stopwatch.ElapsedMilliseconds); //Vector+Span+INumber写成泛型 stopwatch = Stopwatch.StartNew(); for (int i = 0; i < 10000; i++) { SIMD_Calc.MultiplySpan(nums); } stopwatch.Stop(); Console.WriteLine(stopwatch.ElapsedMilliseconds);

结果为:

730

185

190

不错不错,效果还挺满意的

接下来来个泛型的累加

public static T AddTotal<T>(T[] nums) where T : struct, INumber<T> { int vectorSize = Vector<T>.Count; var accVector = Vector<T>.Zero; int i; var array = nums; for (i = 0; i <= array.Length - vectorSize; i += vectorSize) { var v = new Vector<T>(array, i); accVector = Vector.Add(accVector, v); } T result = Vector.Dot(accVector, Vector<T>.One); for (; i < array.Length; i++) { result += array[i]; } return result; }

还有一种方式不使用Vector,而是直接使用Avx2类下的方法做运算,需要加个是否支持的判断

public unsafe static int AddTotal_Avx2(int[] nums) { if (Avx2.IsSupported) { int vectorSize = 256 / 8 / 4; var accVector = Vector256<int>.Zero; int i; var array = nums; fixed (int* ptr = array) { for (i = 0; i <= array.Length - vectorSize; i += vectorSize) { var v = Avx2.LoadVector256(ptr + i); accVector = Avx2.Add(accVector, v); } } int result = 0; var temp = stackalloc int[vectorSize]; Avx2.Store(temp, accVector); for (int j = 0; j < vectorSize; j++) { result += temp[j]; } for (; i < array.Length; i++) { result += array[i]; } return result; } else { throw new NotSupportedException(); } }

但是我们要做好回落,比如没有avx2就用sse,没有sse就用普通的

不过System.Runtime.Intrinsics.X86下面一堆这种,再说了还有arm的,所以通用性不如Vector方法

public unsafe static int AddTotal_2(int[] nums) { if (Avx2.IsSupported) { return AddTotal_Avx2(nums); } else if (Sse2.IsSupported) { return AddTotal_Sse2(nums); } else { return NormalCalc.AddTotal(nums); } }

把Vector用在两个数组相加相乘上会更加简单

public unsafe static T[] Multiply<T>(T[] numsl, T[] numsr) where T : struct,INumber<T> { if (numsl.Length != numsr.Length) { throw new ArgumentException(); } T[] result = new T[numsl.Length]; int vectorSize = Vector<T>.Count; int i; for (i = 0; i <= numsl.Length - vectorSize; i += vectorSize) { var l = new Vector<T>(numsl, i); var r = new Vector<T>(numsr, i); var multiplied = Vector.Multiply(l, r); //for (int j = i; j < i + vectorSize; j++) //{ // result[j] = multiplied[j % vectorSize]; //} multiplied.CopyTo(result, i); } for (; i < numsl.Length; i++) { result[i] = numsl[i] * numsr[i]; } return result; }

批量加1

普通方法:

public static void AddOne(int[] nums) { for (int i = 0; i < nums.Length; i++) { nums[i]++; } }

SIMD:

public static void AddOne<T>(T[] nums) where T : struct, INumber<T> { int vectorSize = Vector<T>.Count; var accVector = Vector<T>.One; int i; var array = nums; for (i = 0; i <= array.Length - vectorSize; i += vectorSize) { var v = new Vector<T>(array, i); var vec = Vector.Add(v, accVector); vec.CopyTo(array, i); } for (; i < array.Length; i++) { array[i]++; } }

跑分是 int类型 普通方法:390 SIMD:70

double类型 普通方法:578 SIMD:145

这对于我们平时普通计算的性能帮助还是有的,官方还用SIMD优化了Matrix的一些类,不过都是很小的二位矩阵,你可以根据自己的需要去设计更复杂的大矩阵运算类,如果需要更复杂的批量多维矩阵处理推荐OpenCvSharp

代码下载:wwu.lanzoub.com/iglMD032ky0f

参考链接:

zhuanlan.zhihu.com/p/60171538

habr.com/en/post/467689

www.zhihu.com/question/266256257

本文共计1432个文字,预计阅读时间需要6分钟。

我们现在想实现一些简单的批量运算,比如累乘积、累加求和。下面是一个简单的Java类示例:

javapublic class NormalCalc { public static double Multiply(double[] nums) { double result=1.0d; for (int i=0; i

public static double Sum(double[] nums) { double total=0.0d; for (int i=0; i

我们现在想做一些简单的批量运算,比如累乘得积,累加求和

public class NormalCalc { public static double Multiply(double[] nums) { double result = 1.0d; for (int i = 0; i < nums.Length; i++) { result *= nums[i]; } return result; } public static double AddTotal(double[] nums) { double result = 0.0d; for (int i = 0; i < nums.Length; i++) { result += nums[i]; } return result; } }

这种批量运算不正是指令集的优势么,那就试试吧

C#中可以使用Vector类来做宽位运算,我这里有avx2指令集,也就是256位,double是64位的,那就有4个,如果做int运算自然就有8个

在这里就是4个4个放到一个Vector里一起做乘法运算,最后把4拷贝到数组中互乘,再把多余的乘完就好了,乘法嘛,用1作为种子

public unsafe static double Multiply(double[] nums) { int vectorSize = Vector<double>.Count; var accVector = Vector<double>.One; int i; var array = nums; double result = 1.0d; fixed (double* p = array) { for (i = 0; i <= array.Length - vectorSize; i += vectorSize) { //var v = new Vector<double>(array, i); var v = Unsafe.Read<Vector<double>>(p + i); accVector = Vector.Multiply(accVector, v); } } var tempArray = new double[Vector<double>.Count]; accVector.CopyTo(tempArray); for (int j = 0; j < tempArray.Length; j++) { result = result * tempArray[j]; } for (; i < array.Length; i++) { result *= array[i]; } return result; }

下一个问题就是,我总不见得又得每种数据类型都写一遍吧,咱有没有办法用C#的各种新特性写成泛型?咱有Span有预览特性INumber,试了下还真可以

用new Vector构造,泛型T用INumber约束就有了T.One来表示数字1,并且能随便的做乘法运算了

public static T Multiply<T>(T[] nums) where T : struct, INumber<T> { int vectorSize = Vector<T>.Count; var accVector = Vector<T>.One; int i; var array = nums; T result = T.One; for (i = 0; i <= array.Length - vectorSize; i += vectorSize) { var v = new Vector<T>(array, i); accVector = Vector.Multiply(accVector, v); } var tempArray = new T[Vector<T>.Count]; accVector.CopyTo(tempArray); for (int j = 0; j < tempArray.Length; j++) { result = result * tempArray[j]; } for (; i < array.Length; i++) { result *= array[i]; } return result; }

理论上Span速度不会比指针快,new Vector不会比Unsafe.Read快,但是差不了太多,就能写成泛型方法

来测试一下速度:

//生成运算数组 double[] nums = new double[100000]; Random random = new Random(); for (int i = 0; i < nums.Length; i++) { nums[i] = random.NextDouble() * 2.723; } //普通连乘 Stopwatch stopwatch = Stopwatch.StartNew(); for (int i = 0; i < 10000; i++) { NormalCalc.Multiply(nums); } stopwatch.Stop(); Console.WriteLine(stopwatch.ElapsedMilliseconds); //Vector stopwatch = Stopwatch.StartNew(); for (int i = 0; i < 10000; i++) { SIMD_Calc.Multiply(nums); } stopwatch.Stop(); Console.WriteLine(stopwatch.ElapsedMilliseconds); //Vector+Span+INumber写成泛型 stopwatch = Stopwatch.StartNew(); for (int i = 0; i < 10000; i++) { SIMD_Calc.MultiplySpan(nums); } stopwatch.Stop(); Console.WriteLine(stopwatch.ElapsedMilliseconds);

结果为:

730

185

190

不错不错,效果还挺满意的

接下来来个泛型的累加

public static T AddTotal<T>(T[] nums) where T : struct, INumber<T> { int vectorSize = Vector<T>.Count; var accVector = Vector<T>.Zero; int i; var array = nums; for (i = 0; i <= array.Length - vectorSize; i += vectorSize) { var v = new Vector<T>(array, i); accVector = Vector.Add(accVector, v); } T result = Vector.Dot(accVector, Vector<T>.One); for (; i < array.Length; i++) { result += array[i]; } return result; }

还有一种方式不使用Vector,而是直接使用Avx2类下的方法做运算,需要加个是否支持的判断

public unsafe static int AddTotal_Avx2(int[] nums) { if (Avx2.IsSupported) { int vectorSize = 256 / 8 / 4; var accVector = Vector256<int>.Zero; int i; var array = nums; fixed (int* ptr = array) { for (i = 0; i <= array.Length - vectorSize; i += vectorSize) { var v = Avx2.LoadVector256(ptr + i); accVector = Avx2.Add(accVector, v); } } int result = 0; var temp = stackalloc int[vectorSize]; Avx2.Store(temp, accVector); for (int j = 0; j < vectorSize; j++) { result += temp[j]; } for (; i < array.Length; i++) { result += array[i]; } return result; } else { throw new NotSupportedException(); } }

但是我们要做好回落,比如没有avx2就用sse,没有sse就用普通的

不过System.Runtime.Intrinsics.X86下面一堆这种,再说了还有arm的,所以通用性不如Vector方法

public unsafe static int AddTotal_2(int[] nums) { if (Avx2.IsSupported) { return AddTotal_Avx2(nums); } else if (Sse2.IsSupported) { return AddTotal_Sse2(nums); } else { return NormalCalc.AddTotal(nums); } }

把Vector用在两个数组相加相乘上会更加简单

public unsafe static T[] Multiply<T>(T[] numsl, T[] numsr) where T : struct,INumber<T> { if (numsl.Length != numsr.Length) { throw new ArgumentException(); } T[] result = new T[numsl.Length]; int vectorSize = Vector<T>.Count; int i; for (i = 0; i <= numsl.Length - vectorSize; i += vectorSize) { var l = new Vector<T>(numsl, i); var r = new Vector<T>(numsr, i); var multiplied = Vector.Multiply(l, r); //for (int j = i; j < i + vectorSize; j++) //{ // result[j] = multiplied[j % vectorSize]; //} multiplied.CopyTo(result, i); } for (; i < numsl.Length; i++) { result[i] = numsl[i] * numsr[i]; } return result; }

批量加1

普通方法:

public static void AddOne(int[] nums) { for (int i = 0; i < nums.Length; i++) { nums[i]++; } }

SIMD:

public static void AddOne<T>(T[] nums) where T : struct, INumber<T> { int vectorSize = Vector<T>.Count; var accVector = Vector<T>.One; int i; var array = nums; for (i = 0; i <= array.Length - vectorSize; i += vectorSize) { var v = new Vector<T>(array, i); var vec = Vector.Add(v, accVector); vec.CopyTo(array, i); } for (; i < array.Length; i++) { array[i]++; } }

跑分是 int类型 普通方法:390 SIMD:70

double类型 普通方法:578 SIMD:145

这对于我们平时普通计算的性能帮助还是有的,官方还用SIMD优化了Matrix的一些类,不过都是很小的二位矩阵,你可以根据自己的需要去设计更复杂的大矩阵运算类,如果需要更复杂的批量多维矩阵处理推荐OpenCvSharp

代码下载:wwu.lanzoub.com/iglMD032ky0f

参考链接:

zhuanlan.zhihu.com/p/60171538

habr.com/en/post/467689

www.zhihu.com/question/266256257