E:/p4/sw/physx/PxShared/1.0/trunk/src/foundation/include/windows/PsWindowsIntrinsics.h
Go to the documentation of this file.00001 // This code contains NVIDIA Confidential Information and is disclosed to you 00002 // under a form of NVIDIA software license agreement provided separately to you. 00003 // 00004 // Notice 00005 // NVIDIA Corporation and its licensors retain all intellectual property and 00006 // proprietary rights in and to this software and related documentation and 00007 // any modifications thereto. Any use, reproduction, disclosure, or 00008 // distribution of this software and related documentation without an express 00009 // license agreement from NVIDIA Corporation is strictly prohibited. 00010 // 00011 // ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES 00012 // NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO 00013 // THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, 00014 // MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. 00015 // 00016 // Information and code furnished is believed to be accurate and reliable. 00017 // However, NVIDIA Corporation assumes no responsibility for the consequences of use of such 00018 // information or for any infringement of patents or other rights of third parties that may 00019 // result from its use. No license is granted by implication or otherwise under any patent 00020 // or patent rights of NVIDIA Corporation. Details are subject to change without notice. 00021 // This code supersedes and replaces all information previously supplied. 00022 // NVIDIA Corporation products are not authorized for use as critical 00023 // components in life support devices or systems without express written approval of 00024 // NVIDIA Corporation. 00025 // 00026 // Copyright (c) 2008-2014 NVIDIA Corporation. All rights reserved. 00027 // Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. 00028 // Copyright (c) 2001-2004 NovodeX AG. All rights reserved. 00029 00030 #ifndef PSFOUNDATION_PSWINDOWSINTRINSICS_H 00031 #define PSFOUNDATION_PSWINDOWSINTRINSICS_H 00032 00033 #include "Ps.h" 00034 #include "foundation/PxAssert.h" 00035 00036 // this file is for internal intrinsics - that is, intrinsics that are used in 00037 // cross platform code but do not appear in the API 00038 00039 #if !(PX_WINDOWS_FAMILY || PX_WINRT) 00040 #error "This file should only be included by Windows or WIN8ARM builds!!" 00041 #endif 00042 00043 #pragma warning(push) 00044 //'symbol' is not defined as a preprocessor macro, replacing with '0' for 'directives' 00045 #pragma warning(disable : 4668) 00046 #if PX_VC == 10 00047 #pragma warning(disable : 4987) // nonstandard extension used: 'throw (...)' 00048 #endif 00049 #include <intrin.h> 00050 #pragma warning(pop) 00051 00052 #pragma warning(push) 00053 #pragma warning(disable : 4985) // 'symbol name': attributes not present on previous declaration 00054 #include <math.h> 00055 #pragma warning(pop) 00056 00057 #include <float.h> 00058 #include <mmintrin.h> 00059 00060 #pragma intrinsic(_BitScanForward) 00061 #pragma intrinsic(_BitScanReverse) 00062 00063 namespace physx 00064 { 00065 namespace shdfnd 00066 { 00067 00068 /* 00069 * Implements a memory barrier 00070 */ 00071 PX_FORCE_INLINE void memoryBarrier() 00072 { 00073 _ReadWriteBarrier(); 00074 /* long Barrier; 00075 __asm { 00076 xchg Barrier, eax 00077 }*/ 00078 } 00079 00083 PX_FORCE_INLINE uint32_t highestSetBitUnsafe(uint32_t v) 00084 { 00085 unsigned long retval; 00086 _BitScanReverse(&retval, v); 00087 return retval; 00088 } 00089 00093 PX_FORCE_INLINE uint32_t lowestSetBitUnsafe(uint32_t v) 00094 { 00095 unsigned long retval; 00096 _BitScanForward(&retval, v); 00097 return retval; 00098 } 00099 00103 PX_FORCE_INLINE uint32_t countLeadingZeros(uint32_t v) 00104 { 00105 if(v) 00106 { 00107 unsigned long bsr = (unsigned long)-1; 00108 _BitScanReverse(&bsr, v); 00109 return 31 - bsr; 00110 } 00111 else 00112 return 32; 00113 } 00114 00118 #if !PX_ARM 00119 PX_FORCE_INLINE void prefetchLine(const void* ptr, uint32_t offset = 0) 00120 { 00121 // cache line on X86/X64 is 64-bytes so a 128-byte prefetch would require 2 prefetches. 00122 // However, we can only dispatch a limited number of prefetch instructions so we opt to prefetch just 1 cache line 00123 /*_mm_prefetch(((const char*)ptr + offset), _MM_HINT_T0);*/ 00124 // We get slightly better performance prefetching to non-temporal addresses instead of all cache levels 00125 _mm_prefetch(((const char*)ptr + offset), _MM_HINT_NTA); 00126 } 00127 #else 00128 PX_FORCE_INLINE void prefetchLine(const void* ptr, uint32_t offset = 0) 00129 { 00130 // arm does have 32b cache line size 00131 __prefetch(((const char*)ptr + offset)); 00132 } 00133 #endif 00134 00138 #if !PX_ARM 00139 PX_FORCE_INLINE void prefetch(const void* ptr, uint32_t count = 1) 00140 { 00141 const char* cp = (char*)ptr; 00142 uint64_t p = size_t(ptr); 00143 uint64_t startLine = p >> 6, endLine = (p + count - 1) >> 6; 00144 uint64_t lines = endLine - startLine + 1; 00145 do 00146 { 00147 prefetchLine(cp); 00148 cp += 64; 00149 } while(--lines); 00150 } 00151 #else 00152 PX_FORCE_INLINE void prefetch(const void* ptr, uint32_t count = 1) 00153 { 00154 const char* cp = (char*)ptr; 00155 uint32_t p = size_t(ptr); 00156 uint32_t startLine = p >> 5, endLine = (p + count - 1) >> 5; 00157 uint32_t lines = endLine - startLine + 1; 00158 do 00159 { 00160 prefetchLine(cp); 00161 cp += 32; 00162 } while(--lines); 00163 } 00164 #endif 00165 00167 PX_CUDA_CALLABLE PX_FORCE_INLINE float recipFast(float a) 00168 { 00169 return 1.0f / a; 00170 } 00171 00173 PX_CUDA_CALLABLE PX_FORCE_INLINE float recipSqrtFast(float a) 00174 { 00175 return 1.0f / ::sqrtf(a); 00176 } 00177 00179 PX_CUDA_CALLABLE PX_FORCE_INLINE float floatFloor(float x) 00180 { 00181 return ::floorf(x); 00182 } 00183 00184 #define NS_EXPECT_TRUE(x) x 00185 #define NS_EXPECT_FALSE(x) x 00186 00187 } // namespace shdfnd 00188 } // namespace physx 00189 00190 #endif // #ifndef PSFOUNDATION_PSWINDOWSINTRINSICS_H
Generated on Tue Jul 28 14:21:55 2015 for NVIDIA(R) PsFoundation Reference by
