z memcpy(for MSVC)小内存高速复制

来源:互联网 发布:linux内网建站 编辑:程序博客网 时间:2024/05/02 02:34


z memcpy(for MSVC only)小内存高速复制,即使在debug模式下复制的也比memcpy快,release下差距也不大

2016-3-6注意: 由于指令缓存命中、内联深度等方面的原因,此函数性能测试看起来很不错,插入到程序中实际应用时就不一定了,请测试对比后再决定使用

对VC2008及以上均测试通过

Release Mode
All time to memcpy 63 * 100M is 0.042s in 3GHz (zmemcopy template const size)
All time to memcpy 63 * 100M is 0.050s in 3GHz (zmemcopy static  const size)
All time to memcpy 63 * 100M is 0.147s in 3GHz (memcpy const size)
All time to memcpy 63 * 100M is 0.048s in 3GHz (zmemcopy const size)
All time to memcpy 63 * 100M is 0.050s in 3GHz (zmemcopy unknown array direct)
All time to memcpy 63 * 100M is 0.051s in 3GHz (zmemcopy unknown small size)
All time to memcpy 63 * 100M is 0.056s in 3GHz (zmemcopy unknown size)
All time to memcpy 63 * 100M is 0.140s in 3GHz (memcpy unknown size)


Debug Mode
All time to memcpy 63 * 100M is 0.056s in 3GHz (zmemcopy template const size)
All time to memcpy 63 * 100M is 0.055s in 3GHz (zmemcopy static  const size)
All time to memcpy 63 * 100M is 0.171s in 3GHz (memcpy const size)
All time to memcpy 63 * 100M is 0.093s in 3GHz (zmemcopy const size)
All time to memcpy 63 * 100M is 0.060s in 3GHz (zmemcopy unknown array direct)
All time to memcpy 63 * 100M is 0.086s in 3GHz (zmemcopy unknown small size)
All time to memcpy 63 * 100M is 0.100s in 3GHz (zmemcopy unknown size)
All time to memcpy 63 * 100M is 0.172s in 3GHz (memcpy unknown size)

使用方法,一般用zmemcpy(dest,src,size),在已知目标长度(且是常量表达式时)可以用 ZMemoryCopy::copy<size>(dest,src)

// zmemcpy.cpp : 定义控制台应用程序的入口点。//#include "stdafx.h"#include "zmemcpy.h"//#include <intrin.h>//#include <nmmintrin.h>//#include <windows.h>//#include <utility>__declspec(noinline) void* GetCurrentAddress(){return _ReturnAddress();}inline void* GetRetAddress(){return _ReturnAddress();}__declspec(noinline) bool IsReleaseMode(){return _ReturnAddress() == GetRetAddress();}bool g_IsReleaseMode = IsReleaseMode();// #pragma runtime_checks( "scu", restore )//#define pCopyFunc(x) copy<x>#define pCopyFunc(x) ZMemoryCopy::___copy_##xvoid(*const copys[129])(char* dest, const char* src) ={ZMemoryCopy::___copy_0,pCopyFunc(1), pCopyFunc(2), pCopyFunc(3), pCopyFunc(4), pCopyFunc(5), pCopyFunc(6), pCopyFunc(7), pCopyFunc(8), pCopyFunc(9), pCopyFunc(10),pCopyFunc(11), pCopyFunc(12), pCopyFunc(13), pCopyFunc(14), pCopyFunc(15), pCopyFunc(16), pCopyFunc(17), pCopyFunc(18), pCopyFunc(19), pCopyFunc(20),pCopyFunc(21), pCopyFunc(22), pCopyFunc(23), pCopyFunc(24), pCopyFunc(25), pCopyFunc(26), pCopyFunc(27), pCopyFunc(28), pCopyFunc(29), pCopyFunc(30),pCopyFunc(31), pCopyFunc(32), pCopyFunc(33), pCopyFunc(34), pCopyFunc(35), pCopyFunc(36), pCopyFunc(37), pCopyFunc(38), pCopyFunc(39), pCopyFunc(40),pCopyFunc(41), pCopyFunc(42), pCopyFunc(43), pCopyFunc(44), pCopyFunc(45), pCopyFunc(46), pCopyFunc(47), pCopyFunc(48), pCopyFunc(49), pCopyFunc(50),pCopyFunc(51), pCopyFunc(52), pCopyFunc(53), pCopyFunc(54), pCopyFunc(55), pCopyFunc(56), pCopyFunc(57), pCopyFunc(58), pCopyFunc(59), pCopyFunc(60),pCopyFunc(61), pCopyFunc(62), pCopyFunc(63), pCopyFunc(64), pCopyFunc(65), pCopyFunc(66), pCopyFunc(67), pCopyFunc(68), pCopyFunc(69), pCopyFunc(70),pCopyFunc(71), pCopyFunc(72), pCopyFunc(73), pCopyFunc(74), pCopyFunc(75), pCopyFunc(76), pCopyFunc(77), pCopyFunc(78), pCopyFunc(79), pCopyFunc(80),pCopyFunc(81), pCopyFunc(82), pCopyFunc(83), pCopyFunc(84), pCopyFunc(85), pCopyFunc(86), pCopyFunc(87), pCopyFunc(88), pCopyFunc(89), pCopyFunc(90),pCopyFunc(91), pCopyFunc(92), pCopyFunc(93), pCopyFunc(94), pCopyFunc(95), pCopyFunc(96), pCopyFunc(97), pCopyFunc(98), pCopyFunc(99), pCopyFunc(100),pCopyFunc(101), pCopyFunc(102), pCopyFunc(103), pCopyFunc(104), pCopyFunc(105), pCopyFunc(106), pCopyFunc(107), pCopyFunc(108), pCopyFunc(109), pCopyFunc(110),pCopyFunc(111), pCopyFunc(112), pCopyFunc(113), pCopyFunc(114), pCopyFunc(115), pCopyFunc(116), pCopyFunc(117), pCopyFunc(118), pCopyFunc(119), pCopyFunc(120),pCopyFunc(121), pCopyFunc(122), pCopyFunc(123), pCopyFunc(124), pCopyFunc(125), pCopyFunc(126), pCopyFunc(127), pCopyFunc(128),};#undef pCopyFunc#define pCopyFunc(x) ZMemoryCopy::copy<x>void(*const template_copys[129])(char* dest, const char* src) ={ZMemoryCopy::___copy_0,pCopyFunc(1), pCopyFunc(2), pCopyFunc(3), pCopyFunc(4), pCopyFunc(5), pCopyFunc(6), pCopyFunc(7), pCopyFunc(8), pCopyFunc(9), pCopyFunc(10),pCopyFunc(11), pCopyFunc(12), pCopyFunc(13), pCopyFunc(14), pCopyFunc(15), pCopyFunc(16), pCopyFunc(17), pCopyFunc(18), pCopyFunc(19), pCopyFunc(20),pCopyFunc(21), pCopyFunc(22), pCopyFunc(23), pCopyFunc(24), pCopyFunc(25), pCopyFunc(26), pCopyFunc(27), pCopyFunc(28), pCopyFunc(29), pCopyFunc(30),pCopyFunc(31), pCopyFunc(32), pCopyFunc(33), pCopyFunc(34), pCopyFunc(35), pCopyFunc(36), pCopyFunc(37), pCopyFunc(38), pCopyFunc(39), pCopyFunc(40),pCopyFunc(41), pCopyFunc(42), pCopyFunc(43), pCopyFunc(44), pCopyFunc(45), pCopyFunc(46), pCopyFunc(47), pCopyFunc(48), pCopyFunc(49), pCopyFunc(50),pCopyFunc(51), pCopyFunc(52), pCopyFunc(53), pCopyFunc(54), pCopyFunc(55), pCopyFunc(56), pCopyFunc(57), pCopyFunc(58), pCopyFunc(59), pCopyFunc(60),pCopyFunc(61), pCopyFunc(62), pCopyFunc(63), pCopyFunc(64), pCopyFunc(65), pCopyFunc(66), pCopyFunc(67), pCopyFunc(68), pCopyFunc(69), pCopyFunc(70),pCopyFunc(71), pCopyFunc(72), pCopyFunc(73), pCopyFunc(74), pCopyFunc(75), pCopyFunc(76), pCopyFunc(77), pCopyFunc(78), pCopyFunc(79), pCopyFunc(80),pCopyFunc(81), pCopyFunc(82), pCopyFunc(83), pCopyFunc(84), pCopyFunc(85), pCopyFunc(86), pCopyFunc(87), pCopyFunc(88), pCopyFunc(89), pCopyFunc(90),pCopyFunc(91), pCopyFunc(92), pCopyFunc(93), pCopyFunc(94), pCopyFunc(95), pCopyFunc(96), pCopyFunc(97), pCopyFunc(98), pCopyFunc(99), pCopyFunc(100),pCopyFunc(101), pCopyFunc(102), pCopyFunc(103), pCopyFunc(104), pCopyFunc(105), pCopyFunc(106), pCopyFunc(107), pCopyFunc(108), pCopyFunc(109), pCopyFunc(110),pCopyFunc(111), pCopyFunc(112), pCopyFunc(113), pCopyFunc(114), pCopyFunc(115), pCopyFunc(116), pCopyFunc(117), pCopyFunc(118), pCopyFunc(119), pCopyFunc(120),pCopyFunc(121), pCopyFunc(122), pCopyFunc(123), pCopyFunc(124), pCopyFunc(125), pCopyFunc(126), pCopyFunc(127), pCopyFunc(128),};#undef pCopyFunc#define pCopyFunc(x) ZMemoryCopy::___copy_##xstatic void(*const static_copys[129])(char* dest, const char* src) ={ZMemoryCopy::___copy_0,pCopyFunc(1), pCopyFunc(2), pCopyFunc(3), pCopyFunc(4), pCopyFunc(5), pCopyFunc(6), pCopyFunc(7), pCopyFunc(8), pCopyFunc(9), pCopyFunc(10),pCopyFunc(11), pCopyFunc(12), pCopyFunc(13), pCopyFunc(14), pCopyFunc(15), pCopyFunc(16), pCopyFunc(17), pCopyFunc(18), pCopyFunc(19), pCopyFunc(20),pCopyFunc(21), pCopyFunc(22), pCopyFunc(23), pCopyFunc(24), pCopyFunc(25), pCopyFunc(26), pCopyFunc(27), pCopyFunc(28), pCopyFunc(29), pCopyFunc(30),pCopyFunc(31), pCopyFunc(32), pCopyFunc(33), pCopyFunc(34), pCopyFunc(35), pCopyFunc(36), pCopyFunc(37), pCopyFunc(38), pCopyFunc(39), pCopyFunc(40),pCopyFunc(41), pCopyFunc(42), pCopyFunc(43), pCopyFunc(44), pCopyFunc(45), pCopyFunc(46), pCopyFunc(47), pCopyFunc(48), pCopyFunc(49), pCopyFunc(50),pCopyFunc(51), pCopyFunc(52), pCopyFunc(53), pCopyFunc(54), pCopyFunc(55), pCopyFunc(56), pCopyFunc(57), pCopyFunc(58), pCopyFunc(59), pCopyFunc(60),pCopyFunc(61), pCopyFunc(62), pCopyFunc(63), pCopyFunc(64), pCopyFunc(65), pCopyFunc(66), pCopyFunc(67), pCopyFunc(68), pCopyFunc(69), pCopyFunc(70),pCopyFunc(71), pCopyFunc(72), pCopyFunc(73), pCopyFunc(74), pCopyFunc(75), pCopyFunc(76), pCopyFunc(77), pCopyFunc(78), pCopyFunc(79), pCopyFunc(80),pCopyFunc(81), pCopyFunc(82), pCopyFunc(83), pCopyFunc(84), pCopyFunc(85), pCopyFunc(86), pCopyFunc(87), pCopyFunc(88), pCopyFunc(89), pCopyFunc(90),pCopyFunc(91), pCopyFunc(92), pCopyFunc(93), pCopyFunc(94), pCopyFunc(95), pCopyFunc(96), pCopyFunc(97), pCopyFunc(98), pCopyFunc(99), pCopyFunc(100),pCopyFunc(101), pCopyFunc(102), pCopyFunc(103), pCopyFunc(104), pCopyFunc(105), pCopyFunc(106), pCopyFunc(107), pCopyFunc(108), pCopyFunc(109), pCopyFunc(110),pCopyFunc(111), pCopyFunc(112), pCopyFunc(113), pCopyFunc(114), pCopyFunc(115), pCopyFunc(116), pCopyFunc(117), pCopyFunc(118), pCopyFunc(119), pCopyFunc(120),pCopyFunc(121), pCopyFunc(122), pCopyFunc(123), pCopyFunc(124), pCopyFunc(125), pCopyFunc(126), pCopyFunc(127), pCopyFunc(128),};#undef pCopyFuncchar dest[32000000];char dest2[32000000];const char pSource_[32000000] = "1234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890""123456789012345678901234567-901234567890123456789012345678901234567890123456789012345678901234567890""1234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890""1234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890""1234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890""123456789012345678901234567-901234567890123456789012345678901234567890123456789012345678901234567890""1234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890""1234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890""1234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890""123456789012345678901234567-901234567890123456789012345678901234567890123456789012345678901234567890""1234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890""1234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890""1234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890""123456789012345678901234567-901234567890123456789012345678901234567890123456789012345678901234567890""1234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890""1234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890""1234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890""123456789012345678901234567-901234567890123456789012345678901234567890123456789012345678901234567890""1234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890""1234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890""1234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890""123456789012345678901234567-901234567890123456789012345678901234567890123456789012345678901234567890""1234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890""1234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890""1234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890""123456789012345678901234567-901234567890123456789012345678901234567890123456789012345678901234567890""1234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890""1234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890""1234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890""123456789012345678901234567-901234567890123456789012345678901234567890123456789012345678901234567890""1234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890""1234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890""1234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890""123456789012345678901234567-901234567890123456789012345678901234567890123456789012345678901234567890""1234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890""1234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890""1234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890""123456789012345678901234567-901234567890123456789012345678901234567890123456789012345678901234567890""1234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890""1234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890""abcde67890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890";const char * volatile pSource = pSource_;// __declspec(noinline) void __fastcall donothing(int v)// {//  __asm nop;// }// int _______reserved = (donothing((const volatile int&)(const int&)1), donothing((const volatile int&)(const int&)2), 1);// __declspec(noinline) void __fastcall donothing_(int v)// {// donothing(v);// }// // __declspec(noinline) bool testConstFunction()// {// unsigned char* ptr;// if (*(unsigned short*)((ptr = (unsigned char*)_ReturnAddress())- 15) == *(unsigned short*)"\xC7\x05")// {// DWORD p;// VirtualProtect(ptr - 15, 15, PAGE_EXECUTE_READWRITE, &p);// p = *(int*)(ptr - 15 + 2 + 4);// memcpy(ptr - 15, "\xB8\x90\x90\x90\x90\xB8\x01\x00\x00\x00\x90\x90\x90\x90\x90", 15);//mov eax, 1// *(int*)(ptr - 14) = p;// return true;// }// else// {// DWORD p;// VirtualProtect(ptr - 10, 10, PAGE_EXECUTE_READWRITE, &p);// memcpy(ptr - 10, "\xB8\x00\x00\x00\x00\xB8\x00\x00\x00\x00", 10);//mov eax, 0// return false;// }// }// // static int g_somewhere;// #define testConst(v) (_mm_pause(), g_somewhere = v, testConstFunction())void test();#define constsize 63//每次复制的内存大小#if 0//1 开启无缓存测试:通过不断改变地址使cache失效#define nocache + (i & 0xFFF) * 4096#else//0 开启有缓存测试:源数据和目标数据总在cache中#define nocache#endif#if constsize > 128#define copysize constsizenamespace ZMemoryCopy{#include "zmemcpyinc.h"}#endif#pragma runtime_checks( "s", restore ) int _tmain(int argc, _TCHAR* argv[]){if (g_IsReleaseMode)puts("Release Mode");elseputs("Debug Mode");char test[2000];for (int i = 1; i <= 128; ++i){memset(test, 0, 200);template_copys[i](test, pSource);if(memcmp(test, pSource, i) != 0)__debugbreak();if (test[i] != (char)0)__debugbreak();}for (int i = 1; i <= 499; ++i){memset(test, 0, 500);zmemcpy(test, pSource, i);if (memcmp(test, pSource, i) != 0)__debugbreak();if (test[i] != (char)0)__debugbreak();}for (int i = 1; i <= 128; ++i){memset(test, 0, 200);static_copys[i](test, pSource);if (memcmp(test, pSource, i) != 0)__debugbreak();if (test[i] != (char)0)__debugbreak();}memset(dest, 0, sizeof(dest));memset(dest2, 0, sizeof(dest2));memcpy(dest2, pSource, sizeof(dest2));int volatile unknownSize = constsize;for (int j = 0; j < 4; ++j){{__int64 t = __rdtsc();for (int i = 0; i < 10000000; ++i){ZMemoryCopy::copy<constsize>(dest nocache/*+ 1*/, pSource nocache /*+ 1*/);}t = __rdtsc() - t;printf("All time to memcpy %d * %dM is %0.3fs in 3GHz (zmemcopy template const size)\n", constsize, 100000000 / 1000000, t / 3000000000.0);}{__int64 t = __rdtsc();for (int i = 0; i < 10000000; ++i){_COMBINE(ZMemoryCopy::___copy_, constsize)(dest nocache/*+ 1*/, pSource nocache /*+ 1*/);}t = __rdtsc() - t;printf("All time to memcpy %d * %dM is %0.3fs in 3GHz (zmemcopy static  const size)\n", constsize, 100000000 / 1000000, t / 3000000000.0);}{__int64 t = __rdtsc();for (int i = 0; i < 10000000; ++i){memcpy(dest nocache/*+ 1*/, pSource nocache/*+ 1*/, constsize);}t = __rdtsc() - t;printf("All time to memcpy %d * %dM is %0.3fs in 3GHz (memcpy const size)\n", constsize, 100000000 / 1000000, t / 3000000000.0);}{__int64 t = __rdtsc();for (int i = 0; i < 10000000; ++i){zmemcpy(dest nocache/*+ 1*/, pSource nocache /*+ 1*/, constsize);}t = __rdtsc() - t;printf("All time to memcpy %d * %dM is %0.3fs in 3GHz (zmemcopy const size)\n", constsize, 100000000 / 1000000, t / 3000000000.0);}if (unknownSize < 128){__int64 t = __rdtsc();for (int i = 0; i < 10000000; ++i){copys[unknownSize](dest nocache/*+ 1*/, pSource nocache /*+ 1*/);}t = __rdtsc() - t;printf("All time to memcpy %d * %dM is %0.3fs in 3GHz (zmemcopy unknown array direct)\n", constsize, 100000000 / 1000000, t / 3000000000.0);}if (unknownSize < 128){__int64 t = __rdtsc();for (int i = 0; i < 10000000; ++i){zmemcpy_max128(dest nocache/*+ 1*/, pSource nocache /*+ 1*/, unknownSize);}t = __rdtsc() - t;printf("All time to memcpy %d * %dM is %0.3fs in 3GHz (zmemcopy unknown small size)\n", constsize, 100000000 / 1000000, t / 3000000000.0);}{__int64 t = __rdtsc();for (int i = 0; i < 10000000; ++i){zmemcpy(dest nocache/*+ 1*/, pSource nocache /*+ 1*/, unknownSize);}t = __rdtsc() - t;printf("All time to memcpy %d * %dM is %0.3fs in 3GHz (zmemcopy unknown size)\n", constsize, 100000000 / 1000000, t / 3000000000.0);}{__int64 t = __rdtsc();for (int i = 0; i < 10000000; ++i){memcpy(dest nocache/*+ 1*/, pSource nocache /*+ 1*/, unknownSize);}t = __rdtsc() - t;printf("All time to memcpy %d * %dM is %0.3fs in 3GHz (memcpy unknown size)\n", constsize, 100000000 / 1000000, t / 3000000000.0);}puts("");}return 0;} #pragma runtime_checks( "s", restore ) 


zmemcpy.h:

#pragma once#include <windows.h>#include <intrin.h>#ifndef _SAFEBUFFERS#if _MSC_VER >= 1600#define _SAFEBUFFERS __declspec(safebuffers)#else#define _SAFEBUFFERS#endif#endifnamespace z{#ifndef _Z_IF_DEFINED#define _Z_IF_DEFINED template<bool v> struct If {  enum{ True = 1 }; }; template<> struct If < false > {  enum{ False = 1 }; }; //强制使用z::If<false>和z::If<true> //这样__if_exists有效#if _MSC_VER >= 1600 static_assert(z::If<false>::False, ""); static_assert(z::If<true>::True, "");#else enum{ ___unknown = z::If<true>::True + z::If<false>::False };#endif#endif //_Z_IF_DEFINED#pragma runtime_checks( "s", off)  //由于#pragma runtime_checks必须在cpp末尾关闭,才能对模板生效,因此这里手工动态修改机器码移除stack check代码 //加速Debug模式下的函数执行 inline __declspec(noinline) _SAFEBUFFERS void RemoveCodeOf_InitESPBuffer() {  __asm pushad;  {   unsigned char* ptr;   if (*(unsigned int*)((ptr = (unsigned char*)_ReturnAddress() - 17) + 5) == *(unsigned int*)"\xB8\xCC\xCC\xCC")   {    DWORD p;    VirtualProtect(ptr, 17, PAGE_EXECUTE_READWRITE, &p);    //memset(ptr, 0x90, 17);    memcpy(ptr, "\xE9\x0C\x00\x00\x00\x90\x90\x90\x90\x90\x90\x90\x90\x90\x90\x90\x90", 17);   }   else if (*(unsigned char*)(ptr = (unsigned char*)_ReturnAddress() - 5) == 0xE8u)   {    DWORD p;    VirtualProtect(ptr, 5, PAGE_EXECUTE_READWRITE, &p);    memset(ptr, 0x90, 5);   }  }  __asm popad; } inline __declspec(noinline) _SAFEBUFFERS void RemoveCodeOf_CheckESP() {  unsigned char* ptr;  if (*(unsigned char*)(ptr = (unsigned char*)_ReturnAddress() - 5) == 0xE8u)  {   DWORD p;   VirtualProtect(ptr, 22, PAGE_EXECUTE_READWRITE, &p);   memset(ptr, 0x90, 5);   if (*(unsigned int*)(ptr + 5 + 1 + 3 + 5) == *(unsigned int*)"\x00\x3B\xEC\xE8")   {    memset(ptr + 5 + 1 + 3 + 5 + 1, 0x90, 7);    memcpy(ptr + 5 + 1 + 3 + 5 + 1, "\x8B\xE5\x5D\xC3", 4);    //   memcpy(ptr + 5, "\x5F\x5E\x5B\x81\xC4\xC0\x00\x00\x00\x8B\xE5\x5D\xC3", 13);   }   //3B EC cmp         ebp, esp   //E8 xxxxxxxx call  __RTC_CheckEsp  }  else  {   __debugbreak();  } }#pragma runtime_checks( "s", restore ) }namespace ZMemoryCopy{#pragma runtime_checks( "s", off)  //成组地复制128字节 inline __declspec(noinline) _SAFEBUFFERS void __copy_group(char* dest, const char* src, int size) {  __asm  {   mov esi, dword ptr[src];   mov edi, dword ptr[dest];  }  while ((size -= 0x80) >= 0)  {   __asm   {    movdqu xmm0, xmmword ptr[esi + 0x00];    movdqu xmm1, xmmword ptr[esi + 0x10];    movdqu xmm2, xmmword ptr[esi + 0x20];    movdqu xmm3, xmmword ptr[esi + 0x30];    movdqu xmm4, xmmword ptr[esi + 0x40];    movdqu xmm5, xmmword ptr[esi + 0x50];    movdqu xmm6, xmmword ptr[esi + 0x60];    movdqu xmm7, xmmword ptr[esi + 0x70];    prefetchnta[esi + 0x80];    prefetchnta[esi + 0xC0];    movdqu xmmword ptr[edi + 0x00], xmm0;    movdqu xmmword ptr[edi + 0x10], xmm1;    movdqu xmmword ptr[edi + 0x20], xmm2;    movdqu xmmword ptr[edi + 0x30], xmm3;    movdqu xmmword ptr[edi + 0x40], xmm4;    movdqu xmmword ptr[edi + 0x50], xmm5;    movdqu xmmword ptr[edi + 0x60], xmm6;    movdqu xmmword ptr[edi + 0x70], xmm7;    add esi, 0x80;    add edi, 0x80;   }  } } //如果已知块大小(且是常量表达式),可以直接使用这个版本 template<int copysize> static _SAFEBUFFERS __forceinline void copy(char* dest, const char* src) {  //由于#pragma runtime_checks必须在cpp末尾关闭,才能对模板生效,因此这里手工动态修改机器码移除stack check代码  //加速Debug模式下的函数执行  z::RemoveCodeOf_InitESPBuffer();  __if_exists(z::If<(copysize >= 4000)>::True)  {   memcpy(dest, src, copysize);  }  __if_exists(z::If<(copysize >= 4000)>::False)  {   __asm   {    mov esi, dword ptr[src];    mov edi, dword ptr[dest];   }   __if_exists(z::If<(copysize >= 0x80 * 3)>::True)   {    __asm    {     prefetchnta[esi + 0x40];    }    int vsize = copysize;    while ((vsize -= 0x80) >= 0x80)   }   //////////////////////////////////////////////////////////////////////////   __if_exists(z::If<(copysize >= 0x80 * 2)>::True)   {    __asm    {     movdqu xmm0, xmmword ptr[esi + 0x00];     movdqu xmm1, xmmword ptr[esi + 0x10];     movdqu xmm2, xmmword ptr[esi + 0x20];     movdqu xmm3, xmmword ptr[esi + 0x30];     movdqu xmm4, xmmword ptr[esi + 0x40];     movdqu xmm5, xmmword ptr[esi + 0x50];     movdqu xmm6, xmmword ptr[esi + 0x60];     movdqu xmm7, xmmword ptr[esi + 0x70];     prefetchnta[esi + 0x80];     prefetchnta[esi + 0xC0];     movdqu xmmword ptr[edi + 0x00], xmm0;     movdqu xmmword ptr[edi + 0x10], xmm1;     movdqu xmmword ptr[edi + 0x20], xmm2;     movdqu xmmword ptr[edi + 0x30], xmm3;     movdqu xmmword ptr[edi + 0x40], xmm4;     movdqu xmmword ptr[edi + 0x50], xmm5;     movdqu xmmword ptr[edi + 0x60], xmm6;     movdqu xmmword ptr[edi + 0x70], xmm7;     add esi, 0x80;     add edi, 0x80;    }   }   enum { offset1 = 0 };   //////////////////////////////////////////////////////////////////////////   __if_exists(z::If<(copysize >= 0x80)>::True)   {    __asm    {     movdqu xmm0, xmmword ptr[esi + 0x00];     movdqu xmm1, xmmword ptr[esi + 0x10];     movdqu xmm2, xmmword ptr[esi + 0x20];     movdqu xmm3, xmmword ptr[esi + 0x30];     movdqu xmm4, xmmword ptr[esi + 0x40];     movdqu xmm5, xmmword ptr[esi + 0x50];     movdqu xmm6, xmmword ptr[esi + 0x60];     movdqu xmm7, xmmword ptr[esi + 0x70];    }    __if_exists(z::If<(copysize & 0x60)>::True)    {     __asm     {      prefetchnta[esi + 0x80];     }    }    __asm    {     movdqu xmmword ptr[edi + 0x00], xmm0;     movdqu xmmword ptr[edi + 0x10], xmm1;     movdqu xmmword ptr[edi + 0x20], xmm2;     movdqu xmmword ptr[edi + 0x30], xmm3;     movdqu xmmword ptr[edi + 0x40], xmm4;     movdqu xmmword ptr[edi + 0x50], xmm5;     movdqu xmmword ptr[edi + 0x60], xmm6;     movdqu xmmword ptr[edi + 0x70], xmm7;     //    add esi, 0x80;     //    add edi, 0x80;    }    enum { offset2 = 0x80 };   }   __if_exists(z::If<(copysize >= 0x80)>::False)   {    enum { offset2 = 0 };   }   //////////////////////////////////////////////////////////////////////////   __if_exists(z::If<(copysize & 0x40)>::True)   {    __asm    {     movdqu xmm0, xmmword ptr[esi + offset2 + 0x00];     movdqu xmm1, xmmword ptr[esi + offset2 + 0x10];     movdqu xmm2, xmmword ptr[esi + offset2 + 0x20];     movdqu xmm3, xmmword ptr[esi + offset2 + 0x30];     movdqu xmmword ptr[edi + offset2 + 0x00], xmm0;     movdqu xmmword ptr[edi + offset2 + 0x10], xmm1;     movdqu xmmword ptr[edi + offset2 + 0x20], xmm2;     movdqu xmmword ptr[edi + offset2 + 0x30], xmm3;    }    enum { offset3 = offset2 + 0x40 };   }   __if_exists(z::If<(copysize & 0x40)>::False)   {    enum { offset3 = offset2 };   }   //////////////////////////////////////////////////////////////////////////   __if_exists(z::If<(copysize & 0x20)>::True)   {    __asm    {     movdqu xmm4, xmmword ptr[esi + offset3 + 0x00];     movdqu xmm5, xmmword ptr[esi + offset3 + 0x10];     movdqu xmmword ptr[edi + offset3 + 0x00], xmm4;     movdqu xmmword ptr[edi + offset3 + 0x10], xmm5;    }    enum { offset4 = offset3 + 0x20 };   }   __if_exists(z::If<(copysize & 0x20)>::False)   {    enum { offset4 = offset3 };   }   //////////////////////////////////////////////////////////////////////////   __if_exists(z::If<(copysize & 0x10)>::True)   {    __asm    {     movdqu xmm6, xmmword ptr[esi + offset4 + 0x00];     movdqu xmmword ptr[edi + offset4 + 0x00], xmm6;    }    enum { offset5 = offset4 + 0x10 };   }   __if_exists(z::If<(copysize & 0x10)>::False)   {    enum { offset5 = offset4 };   }   //////////////////////////////////////////////////////////////////////////   __if_exists(z::If<(copysize & 0x8)>::True)   {    __asm    {     movlpd xmm7, qword ptr[esi + offset5];     movlpd qword ptr[edi + offset5], xmm7;    }    enum { offset6 = offset5 + 0x8 };   }   __if_exists(z::If<(copysize & 0x8)>::False)   {    enum { offset6 = offset5 };   }   //////////////////////////////////////////////////////////////////////////   __if_exists(z::If<(copysize & 0x7)>::True)   {    enum { copydone = false };    {     __if_exists(z::If < ((copysize & 0x7) > 4) && copysize >= 8 > ::True) // 5 6 7 //8字节移动版     {      enum{ copy_offset = (copysize & 0x7) - 8 };      __asm      {       movlpd xmm0, qword ptr[esi + offset6 + copy_offset];       movlpd qword ptr[edi + offset6 + copy_offset], xmm0;      }      enum { copydone = true };      //return;     }     __if_exists(z::If <!copydone && ((copysize & 0x7) >= 4)>::True) // 4 5 6 7        //缓冲区不够先移动4字节     {      __asm      {       mov eax, dword ptr[esi + offset6];       mov dword ptr[edi + offset6], eax;      }      enum{ offset6 = offset6 + 4 };     }     __if_exists(z::If <!copydone && ((copysize & 0x3) == 3) && (copysize >= 4)> ::True) //3     {      __asm      {       mov eax, dword ptr[esi + offset6 - 1];       mov dword ptr[edi + offset6 - 1], eax;      }      enum { copydone = true };      //return;     }     __if_exists(z::If <!copydone && ((copysize & 0x3) == 3)> ::True) //3     {      __asm      {       mov ax, word ptr[esi + offset6];       mov word ptr[edi + offset6], ax;       mov al, byte ptr[esi + offset6 + 2];       mov byte ptr[edi + offset6 + 2], al;      }      enum { copydone = true };      //return;     }     __if_exists(z::If <!copydone && ((copysize & 0x3) == 2) > ::True) //2     {      __asm      {       mov ax, word ptr[esi + offset6];       mov word ptr[edi + offset6], ax;      }      enum { copydone = true };      //return;     }     __if_exists(z::If <!copydone && ((copysize & 0x3) == 1) > ::True) //1     {      __asm      {       mov al, byte ptr[esi + offset6];       mov byte ptr[edi + offset6], al;      }      enum { copydone = true };      //return;     }     __if_exists(z::If <!copydone && ((copysize & 0x3) == 0) > ::True) //0     {      enum { copydone = true };      //return;     }     __if_exists(z::If<!copydone>::True)     {      static_assert(0, "");     }    }   }  }  z::RemoveCodeOf_CheckESP();  __asm nop; } inline void ___copy_0(char* dest, const char* src) {}#pragma runtime_checks( "s", restore)#define copysize 1#include "zmemcpyinc.h"#define copysize 2#include "zmemcpyinc.h"#define copysize 3#include "zmemcpyinc.h"#define copysize 4#include "zmemcpyinc.h"#define copysize 5#include "zmemcpyinc.h"#define copysize 6#include "zmemcpyinc.h"#define copysize 7#include "zmemcpyinc.h"#define copysize 8#include "zmemcpyinc.h"#define copysize 9#include "zmemcpyinc.h"#define copysize 10#include "zmemcpyinc.h"#define copysize 11#include "zmemcpyinc.h"#define copysize 12#include "zmemcpyinc.h"#define copysize 13#include "zmemcpyinc.h"#define copysize 14#include "zmemcpyinc.h"#define copysize 15#include "zmemcpyinc.h"#define copysize 16#include "zmemcpyinc.h"#define copysize 17#include "zmemcpyinc.h"#define copysize 18#include "zmemcpyinc.h"#define copysize 19#include "zmemcpyinc.h"#define copysize 20#include "zmemcpyinc.h"#define copysize 21#include "zmemcpyinc.h"#define copysize 22#include "zmemcpyinc.h"#define copysize 23#include "zmemcpyinc.h"#define copysize 24#include "zmemcpyinc.h"#define copysize 25#include "zmemcpyinc.h"#define copysize 26#include "zmemcpyinc.h"#define copysize 27#include "zmemcpyinc.h"#define copysize 28#include "zmemcpyinc.h"#define copysize 29#include "zmemcpyinc.h"#define copysize 30#include "zmemcpyinc.h"#define copysize 31#include "zmemcpyinc.h"#define copysize 32#include "zmemcpyinc.h"#define copysize 33#include "zmemcpyinc.h"#define copysize 34#include "zmemcpyinc.h"#define copysize 35#include "zmemcpyinc.h"#define copysize 36#include "zmemcpyinc.h"#define copysize 37#include "zmemcpyinc.h"#define copysize 38#include "zmemcpyinc.h"#define copysize 39#include "zmemcpyinc.h"#define copysize 40#include "zmemcpyinc.h"#define copysize 41#include "zmemcpyinc.h"#define copysize 42#include "zmemcpyinc.h"#define copysize 43#include "zmemcpyinc.h"#define copysize 44#include "zmemcpyinc.h"#define copysize 45#include "zmemcpyinc.h"#define copysize 46#include "zmemcpyinc.h"#define copysize 47#include "zmemcpyinc.h"#define copysize 48#include "zmemcpyinc.h"#define copysize 49#include "zmemcpyinc.h"#define copysize 50#include "zmemcpyinc.h"#define copysize 51#include "zmemcpyinc.h"#define copysize 52#include "zmemcpyinc.h"#define copysize 53#include "zmemcpyinc.h"#define copysize 54#include "zmemcpyinc.h"#define copysize 55#include "zmemcpyinc.h"#define copysize 56#include "zmemcpyinc.h"#define copysize 57#include "zmemcpyinc.h"#define copysize 58#include "zmemcpyinc.h"#define copysize 59#include "zmemcpyinc.h"#define copysize 60#include "zmemcpyinc.h"#define copysize 61#include "zmemcpyinc.h"#define copysize 62#include "zmemcpyinc.h"#define copysize 63#include "zmemcpyinc.h"#define copysize 64#include "zmemcpyinc.h"#define copysize 65#include "zmemcpyinc.h"#define copysize 66#include "zmemcpyinc.h"#define copysize 67#include "zmemcpyinc.h"#define copysize 68#include "zmemcpyinc.h"#define copysize 69#include "zmemcpyinc.h"#define copysize 70#include "zmemcpyinc.h"#define copysize 71#include "zmemcpyinc.h"#define copysize 72#include "zmemcpyinc.h"#define copysize 73#include "zmemcpyinc.h"#define copysize 74#include "zmemcpyinc.h"#define copysize 75#include "zmemcpyinc.h"#define copysize 76#include "zmemcpyinc.h"#define copysize 77#include "zmemcpyinc.h"#define copysize 78#include "zmemcpyinc.h"#define copysize 79#include "zmemcpyinc.h"#define copysize 80#include "zmemcpyinc.h"#define copysize 81#include "zmemcpyinc.h"#define copysize 82#include "zmemcpyinc.h"#define copysize 83#include "zmemcpyinc.h"#define copysize 84#include "zmemcpyinc.h"#define copysize 85#include "zmemcpyinc.h"#define copysize 86#include "zmemcpyinc.h"#define copysize 87#include "zmemcpyinc.h"#define copysize 88#include "zmemcpyinc.h"#define copysize 89#include "zmemcpyinc.h"#define copysize 90#include "zmemcpyinc.h"#define copysize 91#include "zmemcpyinc.h"#define copysize 92#include "zmemcpyinc.h"#define copysize 93#include "zmemcpyinc.h"#define copysize 94#include "zmemcpyinc.h"#define copysize 95#include "zmemcpyinc.h"#define copysize 96#include "zmemcpyinc.h"#define copysize 97#include "zmemcpyinc.h"#define copysize 98#include "zmemcpyinc.h"#define copysize 99#include "zmemcpyinc.h"#define copysize 100#include "zmemcpyinc.h"#define copysize 101#include "zmemcpyinc.h"#define copysize 102#include "zmemcpyinc.h"#define copysize 103#include "zmemcpyinc.h"#define copysize 104#include "zmemcpyinc.h"#define copysize 105#include "zmemcpyinc.h"#define copysize 106#include "zmemcpyinc.h"#define copysize 107#include "zmemcpyinc.h"#define copysize 108#include "zmemcpyinc.h"#define copysize 109#include "zmemcpyinc.h"#define copysize 110#include "zmemcpyinc.h"#define copysize 111#include "zmemcpyinc.h"#define copysize 112#include "zmemcpyinc.h"#define copysize 113#include "zmemcpyinc.h"#define copysize 114#include "zmemcpyinc.h"#define copysize 115#include "zmemcpyinc.h"#define copysize 116#include "zmemcpyinc.h"#define copysize 117#include "zmemcpyinc.h"#define copysize 118#include "zmemcpyinc.h"#define copysize 119#include "zmemcpyinc.h"#define copysize 120#include "zmemcpyinc.h"#define copysize 121#include "zmemcpyinc.h"#define copysize 122#include "zmemcpyinc.h"#define copysize 123#include "zmemcpyinc.h"#define copysize 124#include "zmemcpyinc.h"#define copysize 125#include "zmemcpyinc.h"#define copysize 126#include "zmemcpyinc.h"#define copysize 127#include "zmemcpyinc.h"#define copysize 128#include "zmemcpyinc.h"#pragma runtime_checks( "s", off )  __forceinline void zmemcpy(char* dest, const char* src, size_t size) {#define pCopyFunc(x) ZMemoryCopy::___copy_##x  static void(*const static_copys[129])(char* dest, const char* src) =  {   ZMemoryCopy::___copy_0,   pCopyFunc(1), pCopyFunc(2), pCopyFunc(3), pCopyFunc(4), pCopyFunc(5), pCopyFunc(6), pCopyFunc(7), pCopyFunc(8), pCopyFunc(9), pCopyFunc(10),   pCopyFunc(11), pCopyFunc(12), pCopyFunc(13), pCopyFunc(14), pCopyFunc(15), pCopyFunc(16), pCopyFunc(17), pCopyFunc(18), pCopyFunc(19), pCopyFunc(20),   pCopyFunc(21), pCopyFunc(22), pCopyFunc(23), pCopyFunc(24), pCopyFunc(25), pCopyFunc(26), pCopyFunc(27), pCopyFunc(28), pCopyFunc(29), pCopyFunc(30),   pCopyFunc(31), pCopyFunc(32), pCopyFunc(33), pCopyFunc(34), pCopyFunc(35), pCopyFunc(36), pCopyFunc(37), pCopyFunc(38), pCopyFunc(39), pCopyFunc(40),   pCopyFunc(41), pCopyFunc(42), pCopyFunc(43), pCopyFunc(44), pCopyFunc(45), pCopyFunc(46), pCopyFunc(47), pCopyFunc(48), pCopyFunc(49), pCopyFunc(50),   pCopyFunc(51), pCopyFunc(52), pCopyFunc(53), pCopyFunc(54), pCopyFunc(55), pCopyFunc(56), pCopyFunc(57), pCopyFunc(58), pCopyFunc(59), pCopyFunc(60),   pCopyFunc(61), pCopyFunc(62), pCopyFunc(63), pCopyFunc(64), pCopyFunc(65), pCopyFunc(66), pCopyFunc(67), pCopyFunc(68), pCopyFunc(69), pCopyFunc(70),   pCopyFunc(71), pCopyFunc(72), pCopyFunc(73), pCopyFunc(74), pCopyFunc(75), pCopyFunc(76), pCopyFunc(77), pCopyFunc(78), pCopyFunc(79), pCopyFunc(80),   pCopyFunc(81), pCopyFunc(82), pCopyFunc(83), pCopyFunc(84), pCopyFunc(85), pCopyFunc(86), pCopyFunc(87), pCopyFunc(88), pCopyFunc(89), pCopyFunc(90),   pCopyFunc(91), pCopyFunc(92), pCopyFunc(93), pCopyFunc(94), pCopyFunc(95), pCopyFunc(96), pCopyFunc(97), pCopyFunc(98), pCopyFunc(99), pCopyFunc(100),   pCopyFunc(101), pCopyFunc(102), pCopyFunc(103), pCopyFunc(104), pCopyFunc(105), pCopyFunc(106), pCopyFunc(107), pCopyFunc(108), pCopyFunc(109), pCopyFunc(110),   pCopyFunc(111), pCopyFunc(112), pCopyFunc(113), pCopyFunc(114), pCopyFunc(115), pCopyFunc(116), pCopyFunc(117), pCopyFunc(118), pCopyFunc(119), pCopyFunc(120),   pCopyFunc(121), pCopyFunc(122), pCopyFunc(123), pCopyFunc(124), pCopyFunc(125), pCopyFunc(126), pCopyFunc(127), pCopyFunc(128),  };#undef pCopyFunc  if (size >= 128)   __copy_group(dest, src, size);  if (size & 127)   static_copys[size & 127](dest + (size &~127), src + (size &~127)); } __forceinline void zmemcpy_max128(char* dest, const char* src, size_t size) {#define pCopyFunc(x) ZMemoryCopy::___copy_##x  static void(*const static_copys[129])(char* dest, const char* src) =  {   ZMemoryCopy::___copy_0,   pCopyFunc(1), pCopyFunc(2), pCopyFunc(3), pCopyFunc(4), pCopyFunc(5), pCopyFunc(6), pCopyFunc(7), pCopyFunc(8), pCopyFunc(9), pCopyFunc(10),   pCopyFunc(11), pCopyFunc(12), pCopyFunc(13), pCopyFunc(14), pCopyFunc(15), pCopyFunc(16), pCopyFunc(17), pCopyFunc(18), pCopyFunc(19), pCopyFunc(20),   pCopyFunc(21), pCopyFunc(22), pCopyFunc(23), pCopyFunc(24), pCopyFunc(25), pCopyFunc(26), pCopyFunc(27), pCopyFunc(28), pCopyFunc(29), pCopyFunc(30),   pCopyFunc(31), pCopyFunc(32), pCopyFunc(33), pCopyFunc(34), pCopyFunc(35), pCopyFunc(36), pCopyFunc(37), pCopyFunc(38), pCopyFunc(39), pCopyFunc(40),   pCopyFunc(41), pCopyFunc(42), pCopyFunc(43), pCopyFunc(44), pCopyFunc(45), pCopyFunc(46), pCopyFunc(47), pCopyFunc(48), pCopyFunc(49), pCopyFunc(50),   pCopyFunc(51), pCopyFunc(52), pCopyFunc(53), pCopyFunc(54), pCopyFunc(55), pCopyFunc(56), pCopyFunc(57), pCopyFunc(58), pCopyFunc(59), pCopyFunc(60),   pCopyFunc(61), pCopyFunc(62), pCopyFunc(63), pCopyFunc(64), pCopyFunc(65), pCopyFunc(66), pCopyFunc(67), pCopyFunc(68), pCopyFunc(69), pCopyFunc(70),   pCopyFunc(71), pCopyFunc(72), pCopyFunc(73), pCopyFunc(74), pCopyFunc(75), pCopyFunc(76), pCopyFunc(77), pCopyFunc(78), pCopyFunc(79), pCopyFunc(80),   pCopyFunc(81), pCopyFunc(82), pCopyFunc(83), pCopyFunc(84), pCopyFunc(85), pCopyFunc(86), pCopyFunc(87), pCopyFunc(88), pCopyFunc(89), pCopyFunc(90),   pCopyFunc(91), pCopyFunc(92), pCopyFunc(93), pCopyFunc(94), pCopyFunc(95), pCopyFunc(96), pCopyFunc(97), pCopyFunc(98), pCopyFunc(99), pCopyFunc(100),   pCopyFunc(101), pCopyFunc(102), pCopyFunc(103), pCopyFunc(104), pCopyFunc(105), pCopyFunc(106), pCopyFunc(107), pCopyFunc(108), pCopyFunc(109), pCopyFunc(110),   pCopyFunc(111), pCopyFunc(112), pCopyFunc(113), pCopyFunc(114), pCopyFunc(115), pCopyFunc(116), pCopyFunc(117), pCopyFunc(118), pCopyFunc(119), pCopyFunc(120),   pCopyFunc(121), pCopyFunc(122), pCopyFunc(123), pCopyFunc(124), pCopyFunc(125), pCopyFunc(126), pCopyFunc(127), pCopyFunc(128),  };#undef pCopyFunc  __assume(size <= 128);  static_copys[size](dest, src); }#pragma runtime_checks( "s", restore)}using ZMemoryCopy::zmemcpy;using ZMemoryCopy::zmemcpy_max128;

zmemcpyinc.h :

#ifndef END_WITH_copysize#ifndef _COMBINE2#define _COMBINE2(x,y) x##y#define _COMBINE(x,y) _COMBINE2(x,y)#endif#define END_WITH_copysize(x) _COMBINE(x, copysize)#endif#pragma runtime_checks( "s", off )inline void END_WITH_copysize(___copy_)(char* dest, const char* src){ __asm {  mov esi, dword ptr[src];  mov edi, dword ptr[dest]; } __if_exists(z::If<(copysize >= 0x80 * 3)>::True) {  __asm  {   prefetchnta[esi + 0x40];  }  int vsize = copysize;  while ((vsize -= 0x80) >= 0x80) } ////////////////////////////////////////////////////////////////////////// __if_exists(z::If<(copysize >= 0x80 * 2)>::True) {  __asm  {   movdqu xmm0, xmmword ptr[esi + 0x00];   movdqu xmm1, xmmword ptr[esi + 0x10];   movdqu xmm2, xmmword ptr[esi + 0x20];   movdqu xmm3, xmmword ptr[esi + 0x30];   movdqu xmm4, xmmword ptr[esi + 0x40];   movdqu xmm5, xmmword ptr[esi + 0x50];   movdqu xmm6, xmmword ptr[esi + 0x60];   movdqu xmm7, xmmword ptr[esi + 0x70];   prefetchnta[esi + 0x80];   prefetchnta[esi + 0xC0];   movdqu xmmword ptr[edi + 0x00], xmm0;   movdqu xmmword ptr[edi + 0x10], xmm1;   movdqu xmmword ptr[edi + 0x20], xmm2;   movdqu xmmword ptr[edi + 0x30], xmm3;   movdqu xmmword ptr[edi + 0x40], xmm4;   movdqu xmmword ptr[edi + 0x50], xmm5;   movdqu xmmword ptr[edi + 0x60], xmm6;   movdqu xmmword ptr[edi + 0x70], xmm7;   add esi, 0x80;   add edi, 0x80;  } } enum { offset1 = 0 }; ////////////////////////////////////////////////////////////////////////// __if_exists(z::If<(copysize >= 0x80)>::True) {  __asm  {   movdqu xmm0, xmmword ptr[esi + 0x00];   movdqu xmm1, xmmword ptr[esi + 0x10];   movdqu xmm2, xmmword ptr[esi + 0x20];   movdqu xmm3, xmmword ptr[esi + 0x30];   movdqu xmm4, xmmword ptr[esi + 0x40];   movdqu xmm5, xmmword ptr[esi + 0x50];   movdqu xmm6, xmmword ptr[esi + 0x60];   movdqu xmm7, xmmword ptr[esi + 0x70];  }  __if_exists(z::If<(copysize & 0x60)>::True)  {   __asm   {    prefetchnta[esi + 0x80];   }  }  __asm  {   movdqu xmmword ptr[edi + 0x00], xmm0;   movdqu xmmword ptr[edi + 0x10], xmm1;   movdqu xmmword ptr[edi + 0x20], xmm2;   movdqu xmmword ptr[edi + 0x30], xmm3;   movdqu xmmword ptr[edi + 0x40], xmm4;   movdqu xmmword ptr[edi + 0x50], xmm5;   movdqu xmmword ptr[edi + 0x60], xmm6;   movdqu xmmword ptr[edi + 0x70], xmm7;   //    add esi, 0x80;   //    add edi, 0x80;  }  enum { offset2 = 0x80 }; } __if_exists(z::If<(copysize >= 0x80)>::False) {  enum { offset2 = 0 }; } ////////////////////////////////////////////////////////////////////////// __if_exists(z::If<(copysize & 0x40)>::True) {  __asm  {   movdqu xmm0, xmmword ptr[esi + offset2 + 0x00];   movdqu xmm1, xmmword ptr[esi + offset2 + 0x10];   movdqu xmm2, xmmword ptr[esi + offset2 + 0x20];   movdqu xmm3, xmmword ptr[esi + offset2 + 0x30];   movdqu xmmword ptr[edi + offset2 + 0x00], xmm0;   movdqu xmmword ptr[edi + offset2 + 0x10], xmm1;   movdqu xmmword ptr[edi + offset2 + 0x20], xmm2;   movdqu xmmword ptr[edi + offset2 + 0x30], xmm3;  }  enum { offset3 = offset2 + 0x40 }; } __if_exists(z::If<(copysize & 0x40)>::False) {  enum { offset3 = offset2 }; } ////////////////////////////////////////////////////////////////////////// __if_exists(z::If<(copysize & 0x20)>::True) {  __asm  {   movdqu xmm4, xmmword ptr[esi + offset3 + 0x00];   movdqu xmm5, xmmword ptr[esi + offset3 + 0x10];   movdqu xmmword ptr[edi + offset3 + 0x00], xmm4;   movdqu xmmword ptr[edi + offset3 + 0x10], xmm5;  }  enum { offset4 = offset3 + 0x20 }; } __if_exists(z::If<(copysize & 0x20)>::False) {  enum { offset4 = offset3 }; } ////////////////////////////////////////////////////////////////////////// __if_exists(z::If<(copysize & 0x10)>::True) {  __asm  {   movdqu xmm6, xmmword ptr[esi + offset4 + 0x00];   movdqu xmmword ptr[edi + offset4 + 0x00], xmm6;  }  enum { offset5 = offset4 + 0x10 }; } __if_exists(z::If<(copysize & 0x10)>::False) {  enum { offset5 = offset4 }; } ////////////////////////////////////////////////////////////////////////// __if_exists(z::If<(copysize & 0x8)>::True) {  __asm  {   movlpd xmm7, qword ptr[esi + offset5];   movlpd qword ptr[edi + offset5], xmm7;  }  enum { offset6 = offset5 + 0x8 }; } __if_exists(z::If<(copysize & 0x8)>::False) {  enum { offset6 = offset5 }; } ////////////////////////////////////////////////////////////////////////// __if_exists(z::If<(copysize & 0x7)>::True) {  enum { copydone = false };  {   __if_exists(z::If < ((copysize & 0x7) > 4) && copysize >= 8 > ::True) // 5 6 7 //8字节移动版   {    enum{ copy_offset = (copysize & 0x7) - 8 };    __asm    {     movlpd xmm0, qword ptr[esi + offset6 + copy_offset];     movlpd qword ptr[edi + offset6 + copy_offset], xmm0;    }    enum { copydone = true };    //return;   }   __if_exists(z::If <!copydone && ((copysize & 0x7) >= 4)>::True) // 4 5 6 7        //缓冲区不够先移动4字节   {    __asm    {     mov eax, dword ptr[esi + offset6];     mov dword ptr[edi + offset6], eax;    }    enum{ offset6 = offset6 + 4 };   }   __if_exists(z::If <!copydone && ((copysize & 0x3) == 3) && (copysize >= 4)> ::True) //3   {    __asm    {     mov eax, dword ptr[esi + offset6 - 1];     mov dword ptr[edi + offset6 - 1], eax;    }    enum { copydone = true };    //return;   }   __if_exists(z::If <!copydone && ((copysize & 0x3) == 3)> ::True) //3   {    __asm    {     mov ax, word ptr[esi + offset6];     mov word ptr[edi + offset6], ax;     mov al, byte ptr[esi + offset6 + 2];     mov byte ptr[edi + offset6 + 2], al;    }    enum { copydone = true };    //return;   }   __if_exists(z::If <!copydone && ((copysize & 0x3) == 2) > ::True) //2   {    __asm    {     mov ax, word ptr[esi + offset6];     mov word ptr[edi + offset6], ax;    }    enum { copydone = true };    //return;   }   __if_exists(z::If <!copydone && ((copysize & 0x3) == 1) > ::True) //1   {    __asm    {     mov al, byte ptr[esi + offset6];     mov byte ptr[edi + offset6], al;    }    enum { copydone = true };    //return;   }   __if_exists(z::If <!copydone && ((copysize & 0x3) == 0) > ::True) //0   {    enum { copydone = true };    //return;   }   __if_exists(z::If<!copydone>::True)   {    static_assert(0, "");   }  } }}#pragma runtime_checks( "s", restore ) #undef copysize







0 0
原创粉丝点击