My current attempt:
/**simplified from
* https://software.intel.com/en-us/articles/how-to-detect-new-instruction-support-in-the-4th-generation-intel-core-processor-family
**/
#include <stdio.h>
#include <stdint.h>
#if defined(_MSC_VER)
# include <intrin.h>
#endif
void get_cpuid(uint32_t eax, uint32_t ecx, uint32_t *abcd){
#if defined(_MSC_VER)
__cpuidex(abcd,eax,ecx);
#else
uint32_t ebx,edx;
#if defined( __i386__ ) && defined ( __PIC__ )
/*in case of PIC, under 32-bit EBX cannot be clobbered*/
__asm__( "movl %%ebx, %%edi \n\t xchgl %%ebx, %%edi" : "=D"(ebx),
#else
__asm__( "cpuid" : "+b"(ebx),
#endif
"+a"(eax), "+c"(ecx), "=d"(edx));
abcd[0]=eax;abcd[1]=ebx;abcd[2]=ecx;abcd[3]=edx;
#endif
}
int has_RTM_support(){
uint32_t abcd[4];
/*processor supports RTM execution if CPUID.07H.EBX.RTM [bit 11] = 1*/
get_cpuid(0x7,0x0,abcd);
return (abcd[1] & (1 << 11)) != 0;
}
int main(int argc, char **argv){
if(has_RTM_support()){
printf("This CPU supports RTM.");
}else{
printf("This CPU does NOT support RTM.");
}
return 0;
}
I have an Intel® Core™ i7-7600U (cpuinfo below), and as you can see from the ark, it's supposed to support TSX-NI.
Still, above check will return
This CPU does NOT support RTM.
And the has_tsx
implementation from the tsx-tools agrees:
RTM: No
HLE: No
Yet at the same time, I can execute this snippet just fine...
#include <stdio.h>
int main()
{
volatile int i = 0;
while (i < 100000000) {
__asm__ ("xbegin ABORT");
i++;
__asm__ ("xend");
__asm__ ("ABORT:");
}
printf("%d\n", i);
return 0;
}
Where my understanding would've been these asm instructions "will generate a #UD exception when used on a processor that does not support RTM", or at least that's what the intel manual says on the matter (page 387).
I checked the asm code, too, and these instructions are still there (see below for the content of the .s file).
So since these instructions appear to be executed, are these checks simply wrong?
If so, how would you properly test for RTM support?
ASM Code of the snippet
.file "rtm_simple.c"
# GNU C11 (Ubuntu 6.3.0-12ubuntu2) version 6.3.0 20170406 (x86_64-linux-gnu)
# compiled by GNU C version 6.3.0 20170406, GMP version 6.1.2, MPFR version 3.1.5, MPC version 1.0.3, isl version 0.15
# GGC heuristics: --param ggc-min-expand=100 --param ggc-min-heapsize=131072
# options passed: -imultiarch x86_64-linux-gnu rtm_simple.c -mtune=generic
# -march=x86-64 -fverbose-asm -fstack-protector-strong -Wformat
# -Wformat-security
# options enabled: -fPIC -fPIE -faggressive-loop-optimizations
# -fasynchronous-unwind-tables -fauto-inc-dec -fchkp-check-incomplete-type
# -fchkp-check-read -fchkp-check-write -fchkp-instrument-calls
# -fchkp-narrow-bounds -fchkp-optimize -fchkp-store-bounds
# -fchkp-use-static-bounds -fchkp-use-static-const-bounds
# -fchkp-use-wrappers -fcommon -fdelete-null-pointer-checks
# -fdwarf2-cfi-asm -fearly-inlining -feliminate-unused-debug-types
# -ffunction-cse -fgcse-lm -fgnu-runtime -fgnu-unique -fident
# -finline-atomics -fira-hoist-pressure -fira-share-save-slots
# -fira-share-spill-slots -fivopts -fkeep-static-consts
# -fleading-underscore -flifetime-dse -flto-odr-type-merging -fmath-errno
# -fmerge-debug-strings -fpeephole -fplt -fprefetch-loop-arrays
# -freg-struct-return -fsched-critical-path-heuristic
# -fsched-dep-count-heuristic -fsched-group-heuristic -fsched-interblock
# -fsched-last-insn-heuristic -fsched-rank-heuristic -fsched-spec
# -fsched-spec-insn-heuristic -fsched-stalled-insns-dep -fschedule-fusion
# -fsemantic-interposition -fshow-column -fsigned-zeros
# -fsplit-ivs-in-unroller -fssa-backprop -fstack-protector-strong
# -fstdarg-opt -fstrict-volatile-bitfields -fsync-libcalls -ftrapping-math
# -ftree-cselim -ftree-forwprop -ftree-loop-if-convert -ftree-loop-im
# -ftree-loop-ivcanon -ftree-loop-optimize -ftree-parallelize-loops=
# -ftree-phiprop -ftree-reassoc -ftree-scev-cprop -funit-at-a-time
# -funwind-tables -fverbose-asm -fzero-initialized-in-bss
# -m128bit-long-double -m64 -m80387 -malign-stringops
# -mavx256-split-unaligned-load -mavx256-split-unaligned-store
# -mfancy-math-387 -mfp-ret-in-387 -mfxsr -mglibc -mieee-fp
# -mlong-double-80 -mmmx -mno-sse4 -mpush-args -mred-zone -msse -msse2
# -mstv -mtls-direct-seg-refs -mvzeroupper
.section .rodata
.LC0:
.string "%d\n"
.text
.globl main
.type main, @function
main:
.LFB0:
.cfi_startproc
pushq %rbp #
.cfi_def_cfa_offset 16
.cfi_offset 6, -16
movq %rsp, %rbp #,
.cfi_def_cfa_register 6
subq $16, %rsp #,
movl $0, -4(%rbp) #, i
jmp .L2 #
.L3:
#APP
# 7 "rtm_simple.c" 1
xbegin ABORT
# 0 "" 2
#NO_APP
movl -4(%rbp), %eax # i, i.0_5
addl $1, %eax #, i.1_6
movl %eax, -4(%rbp) # i.1_6, i
#APP
# 9 "rtm_simple.c" 1
xend
# 0 "" 2
# 10 "rtm_simple.c" 1
ABORT:
# 0 "" 2
#NO_APP
.L2:
movl -4(%rbp), %eax # i, i.2_4
cmpl $99999999, %eax #, i.2_4
jle .L3 #,
movl -4(%rbp), %eax # i, i.3_8
movl %eax, %esi # i.3_8,
leaq .LC0(%rip), %rdi #,
movl $0, %eax #,
call printf@PLT #
movl $0, %eax #, _10
leave
.cfi_def_cfa 7, 8
ret
.cfi_endproc
.LFE0:
.size main, .-main
.ident "GCC: (Ubuntu 6.3.0-12ubuntu2) 6.3.0 20170406"
.section .note.GNU-stack,"",@progbits
CPUINFO
processor : 0
vendor_id : GenuineIntel
cpu family : 6
model : 142
model name : Intel(R) Core(TM) i7-7600U CPU @ 2.80GHz
stepping : 9
cpu MHz : 2904.004
cache size : 4096 KB
physical id : 0
siblings : 2
core id : 0
cpu cores : 2
apicid : 0
initial apicid : 0
fpu : yes
fpu_exception : yes
cpuid level : 22
wp : yes
flags : fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr sse sse2 ht syscall nx rdtscp lm constant_tsc rep_good nopl xtopology nonstop_tsc pni pclmulqdq ssse3 cx16 sse4_1 sse4_2 x2apic movbe popcnt aes xsave avx rdrand hypervisor lahf_lm abm 3dnowprefetch rdseed clflushopt
bugs :
bogomips : 5808.00
clflush size : 64
cache_alignment : 64
address sizes : 39 bits physical, 48 bits virtual
power management:
processor : 1
vendor_id : GenuineIntel
cpu family : 6
model : 142
model name : Intel(R) Core(TM) i7-7600U CPU @ 2.80GHz
stepping : 9
cpu MHz : 2904.004
cache size : 4096 KB
physical id : 0
siblings : 2
core id : 1
cpu cores : 2
apicid : 1
initial apicid : 1
fpu : yes
fpu_exception : yes
cpuid level : 22
wp : yes
flags : fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr sse sse2 ht syscall nx rdtscp lm constant_tsc rep_good nopl xtopology nonstop_tsc pni pclmulqdq ssse3 cx16 sse4_1 sse4_2 x2apic movbe popcnt aes xsave avx rdrand hypervisor lahf_lm abm 3dnowprefetch rdseed clflushopt
bugs :
bogomips : 5808.00
clflush size : 64
cache_alignment : 64
address sizes : 39 bits physical, 48 bits virtual
power management: