Skip to content

JIT: Intrinsify Task/ValueTask factory methods#129810

Draft
jakobbotsch wants to merge 1 commit into
dotnet:mainfrom
jakobbotsch:optimize-async-task-valuetask
Draft

JIT: Intrinsify Task/ValueTask factory methods#129810
jakobbotsch wants to merge 1 commit into
dotnet:mainfrom
jakobbotsch:optimize-async-task-valuetask

Conversation

@jakobbotsch

Copy link
Copy Markdown
Member

Intrinsify and recognize the following methods when used directly in runtime async context:

  • Task.FromResult
  • Task.CompletedTask
  • ValueTask.FromResult
  • ValueTask.CompletedTask
  • new ValueTask()
  • ValueTask-typed default
  • new ValueTask<T>(T value)
Example

Before:

public static async Task Main()
{
    await TaskCallee1();
    await TaskCallee2();
    await ValueTaskCallee1();
    await ValueTaskCallee2();
    await ValueTaskCallee3();
    await ValueTaskCallee4();
    await ValueTaskCallee5();
    await ValueTaskCallee6();
}

[MethodImpl(MethodImplOptions.NoInlining)]
private static Task<string> TaskCallee1()
{
    return Task.FromResult("abc");
}

[MethodImpl(MethodImplOptions.NoInlining)]
private static Task TaskCallee2()
{
    return Task.CompletedTask;
}

[MethodImpl(MethodImplOptions.NoInlining)]
private static ValueTask ValueTaskCallee1()
{
    return ValueTask.CompletedTask;
}

[MethodImpl(MethodImplOptions.NoInlining)]
private static ValueTask ValueTaskCallee2()
{
    return default;
}

[MethodImpl(MethodImplOptions.NoInlining)]
private static ValueTask ValueTaskCallee3()
{
    return new ValueTask();
}

[MethodImpl(MethodImplOptions.NoInlining)]
private static ValueTask<string> ValueTaskCallee4()
{
    return ValueTask.FromResult("abc");
}

[MethodImpl(MethodImplOptions.NoInlining)]
private static ValueTask<string> ValueTaskCallee5()
{
    return new ValueTask<string>("abc");
}

[MethodImpl(MethodImplOptions.NoInlining)]
private static ValueTask<Guid> ValueTaskCallee6()
{
    return new ValueTask<Guid>(Guid.NewGuid());
}
; Assembly listing for method Program:TaskCallee1():System.String (FullOpts)

G_M49327_IG01:  ;; offset=0x0000
       push     rbx
       sub      rsp, 32
						;; size=5 bbWeight=1 PerfScore 1.25
G_M49327_IG02:  ;; offset=0x0005
       test     rcx, rcx
       jne      G_M49327_IG11
       mov      rcx, 0x7FF9D8B84FF0      ; System.Threading.Tasks.Task`1[System.String]
       call     CORINFO_HELP_NEWSFAST
       mov      rbx, rax
       test     byte  ptr [(reloc 0x7ff9d8b220e8)], 1      ; global ptr
       je       SHORT G_M49327_IG08
						;; size=36 bbWeight=1 PerfScore 5.75
G_M49327_IG03:  ;; offset=0x0029
       mov      dword ptr [rbx+0x34], 0x1000000
       mov      rcx, 0x2A1BCEF3500      ; 'abc'
       mov      gword ptr [rbx+0x38], rcx
       mov      rcx, 0x7FF9D8B66530      ; System.Threading.Tasks.Task:FromResult[System.String](System.String):System.String
       mov      rcx, qword ptr [rcx+0x48]
       mov      rcx, qword ptr [rcx+0x10]
       test     rcx, rcx
       je       SHORT G_M49327_IG07
						;; size=44 bbWeight=1 PerfScore 7.75
G_M49327_IG04:  ;; offset=0x0055
       mov      r8, rbx
       xor      rdx, rdx
       call     [System.Runtime.CompilerServices.AsyncHelpers:TransparentAwaitWithResult[System.__Canon](System.Threading.Tasks.Task`1[System.__Canon]):System.__Canon]
       test     rcx, rcx
       jne      SHORT G_M49327_IG09
						;; size=16 bbWeight=1 PerfScore 4.75
G_M49327_IG05:  ;; offset=0x0065
       xor      ecx, ecx
						;; size=2 bbWeight=1 PerfScore 0.25
G_M49327_IG06:  ;; offset=0x0067
       add      rsp, 32
       pop      rbx
       ret      
						;; size=6 bbWeight=1 PerfScore 1.75
G_M49327_IG07:  ;; offset=0x006D
       mov      rcx, 0x7FF9D8B66530      ; System.Threading.Tasks.Task:FromResult[System.String](System.String):System.String
       mov      rdx, 0x7FF9D8B978D8      ; global ptr
       call     [CORINFO_HELP_RUNTIMEHANDLE_METHOD]
       mov      rcx, rax
       jmp      SHORT G_M49327_IG04
						;; size=31 bbWeight=0.20 PerfScore 1.15
G_M49327_IG08:  ;; offset=0x008C
       mov      rcx, 0x7FF9D8B21DA0      ; System.Threading.Tasks.Task
       call     [CORINFO_HELP_GET_NONGCSTATIC_BASE]
       jmp      SHORT G_M49327_IG03
						;; size=18 bbWeight=0 PerfScore 0.00
G_M49327_IG09:  ;; offset=0x009E
       mov      rdx, 0x7FF9D8B88EC0      ; Continuation_16_0_2
       call     [CORINFO_HELP_ALLOC_CONTINUATION]
       mov      rbx, rax
       lea      rax, [reloc @RWD00]
       mov      qword ptr [rbx+0x10], rax
       mov      qword ptr [rbx+0x18], 0x808
       call     [System.Runtime.CompilerServices.AsyncHelpers:CaptureExecutionContext():System.Threading.ExecutionContext]
       mov      rdx, rax
       lea      rcx, bword ptr [rbx+0x20]
       call     CORINFO_HELP_ASSIGN_REF
       mov      rcx, rbx
       xor      eax, eax
						;; size=61 bbWeight=0 PerfScore 0.00
G_M49327_IG10:  ;; offset=0x00DB
       add      rsp, 32
       pop      rbx
       ret      
						;; size=6 bbWeight=0 PerfScore 0.00
G_M49327_IG11:  ;; offset=0x00E1
       mov      rax, gword ptr [rcx+0x28]
       xor      rdx, rdx
       mov      gword ptr [rcx+0x28], rdx
       jmp      G_M49327_IG05
						;; size=15 bbWeight=0 PerfScore 0.00
RWD00  	dq	(dynamicClass):IL_STUB_AsyncResume_TaskCallee1_Optimized(System.Object,byref):System.Object
	dq	G_M49327_IG09


; Total bytes of code 240, prolog size 5, PerfScore 22.65, instruction count 53, allocated bytes for code 240 (MethodHash=c5e23f50) for method Program:TaskCallee1():System.String (FullOpts)
; ============================================================

; Assembly listing for method Program:TaskCallee2() (FullOpts)

G_M59144_IG01:  ;; offset=0x0000
       push     rbx
       sub      rsp, 32
						;; size=5 bbWeight=1 PerfScore 1.25
G_M59144_IG02:  ;; offset=0x0005
       test     rcx, rcx
       jne      SHORT G_M59144_IG03
       mov      rdx, 0x291A6000258      ; const ptr
       mov      rdx, gword ptr [rdx]
       xor      rcx, rcx
       call     [System.Runtime.CompilerServices.AsyncHelpers:TransparentAwaitWithResult(System.Threading.Tasks.Task)]
       test     rcx, rcx
       jne      SHORT G_M59144_IG05
						;; size=31 bbWeight=1 PerfScore 8.00
G_M59144_IG03:  ;; offset=0x0024
       xor      ecx, ecx
						;; size=2 bbWeight=1 PerfScore 0.25
G_M59144_IG04:  ;; offset=0x0026
       add      rsp, 32
       pop      rbx
       ret      
						;; size=6 bbWeight=1 PerfScore 1.75
G_M59144_IG05:  ;; offset=0x002C
       mov      rdx, 0x7FF9D8BB2CD0      ; Continuation_8_0_1
       call     [CORINFO_HELP_ALLOC_CONTINUATION]
       mov      rbx, rax
       lea      rax, [reloc @RWD00]
       mov      qword ptr [rbx+0x10], rax
       mov      qword ptr [rbx+0x18], 8
       call     [System.Runtime.CompilerServices.AsyncHelpers:CaptureExecutionContext():System.Threading.ExecutionContext]
       mov      rdx, rax
       lea      rcx, bword ptr [rbx+0x20]
       call     CORINFO_HELP_ASSIGN_REF
       mov      rcx, rbx
						;; size=59 bbWeight=0 PerfScore 0.00
G_M59144_IG06:  ;; offset=0x0067
       add      rsp, 32
       pop      rbx
       ret      
						;; size=6 bbWeight=0 PerfScore 0.00
RWD00  	dq	(dynamicClass):IL_STUB_AsyncResume_TaskCallee2_Optimized(System.Object,byref):System.Object
	dq	G_M59144_IG05


; Total bytes of code 109, prolog size 5, PerfScore 11.25, instruction count 28, allocated bytes for code 109 (MethodHash=f60418f7) for method Program:TaskCallee2() (FullOpts)
; ============================================================

; Assembly listing for method Program:ValueTaskCallee1() (FullOpts)

G_M60608_IG01:  ;; offset=0x0000
       push     rbx
       sub      rsp, 48
       xor      eax, eax
       mov      qword ptr [rsp+0x20], rax
						;; size=12 bbWeight=1 PerfScore 2.50
G_M60608_IG02:  ;; offset=0x000C
       test     rcx, rcx
       jne      SHORT G_M60608_IG03
       xor      rdx, rdx
       mov      gword ptr [rsp+0x20], rdx
       mov      word  ptr [rsp+0x28], 0
       mov      byte  ptr [rsp+0x2A], 0
       lea      rdx, [rsp+0x20]
       xor      rcx, rcx
       call     [System.Runtime.CompilerServices.AsyncHelpers:TransparentAwaitWithResult(System.Threading.Tasks.ValueTask)]
       test     rcx, rcx
       jne      SHORT G_M60608_IG05
						;; size=42 bbWeight=1 PerfScore 9.50
G_M60608_IG03:  ;; offset=0x0036
       xor      ecx, ecx
						;; size=2 bbWeight=1 PerfScore 0.25
G_M60608_IG04:  ;; offset=0x0038
       add      rsp, 48
       pop      rbx
       ret      
						;; size=6 bbWeight=1 PerfScore 1.75
G_M60608_IG05:  ;; offset=0x003E
       mov      rdx, 0x7FF9D8BB2CD0      ; Continuation_8_0_1
       call     [CORINFO_HELP_ALLOC_CONTINUATION]
       mov      rbx, rax
       lea      rax, [reloc @RWD00]
       mov      qword ptr [rbx+0x10], rax
       mov      qword ptr [rbx+0x18], 8
       call     [System.Runtime.CompilerServices.AsyncHelpers:CaptureExecutionContext():System.Threading.ExecutionContext]
       mov      rdx, rax
       lea      rcx, bword ptr [rbx+0x20]
       call     CORINFO_HELP_ASSIGN_REF
       mov      rcx, rbx
						;; size=59 bbWeight=0 PerfScore 0.00
G_M60608_IG06:  ;; offset=0x0079
       add      rsp, 48
       pop      rbx
       ret      
						;; size=6 bbWeight=0 PerfScore 0.00
RWD00  	dq	(dynamicClass):IL_STUB_AsyncResume_ValueTaskCallee1_Optimized(System.Object,byref):System.Object
	dq	G_M60608_IG05


; Total bytes of code 127, prolog size 12, PerfScore 14.00, instruction count 33, allocated bytes for code 127 (MethodHash=fdfd133f) for method Program:ValueTaskCallee1() (FullOpts)
; ============================================================

; Assembly listing for method Program:ValueTaskCallee2() (FullOpts)

G_M59587_IG01:  ;; offset=0x0000
       sub      rsp, 56
       xor      eax, eax
       mov      qword ptr [rsp+0x28], rax
       mov      qword ptr [rsp+0x30], rax
						;; size=16 bbWeight=1 PerfScore 2.50
G_M59587_IG02:  ;; offset=0x0010
       xor      rdx, rdx
       mov      gword ptr [rsp+0x28], rdx
       mov      word  ptr [rsp+0x30], 0
       mov      byte  ptr [rsp+0x32], 0
       lea      rdx, [rsp+0x28]
       xor      rcx, rcx
       call     [System.Runtime.CompilerServices.AsyncHelpers:TransparentAwaitWithResult(System.Threading.Tasks.ValueTask)]
       test     rcx, rcx
       jne      SHORT G_M59587_IG04
       xor      ecx, ecx
						;; size=39 bbWeight=1 PerfScore 8.50
G_M59587_IG03:  ;; offset=0x0037
       add      rsp, 56
       ret      
						;; size=5 bbWeight=1 PerfScore 1.25
G_M59587_IG04:  ;; offset=0x003C
       add      rsp, 56
       ret      
						;; size=5 bbWeight=0 PerfScore 0.00

; Total bytes of code 65, prolog size 16, PerfScore 12.25, instruction count 18, allocated bytes for code 65 (MethodHash=fdfd173c) for method Program:ValueTaskCallee2() (FullOpts)
; ============================================================

; Assembly listing for method Program:ValueTaskCallee3() (FullOpts)

G_M62786_IG01:  ;; offset=0x0000
       sub      rsp, 56
       xor      eax, eax
       mov      qword ptr [rsp+0x28], rax
       mov      qword ptr [rsp+0x30], rax
						;; size=16 bbWeight=1 PerfScore 2.50
G_M62786_IG02:  ;; offset=0x0010
       xor      rdx, rdx
       mov      gword ptr [rsp+0x28], rdx
       mov      word  ptr [rsp+0x30], 0
       mov      byte  ptr [rsp+0x32], 0
       lea      rdx, [rsp+0x28]
       xor      rcx, rcx
       call     [System.Runtime.CompilerServices.AsyncHelpers:TransparentAwaitWithResult(System.Threading.Tasks.ValueTask)]
       test     rcx, rcx
       jne      SHORT G_M62786_IG04
       xor      ecx, ecx
						;; size=39 bbWeight=1 PerfScore 8.50
G_M62786_IG03:  ;; offset=0x0037
       add      rsp, 56
       ret      
						;; size=5 bbWeight=1 PerfScore 1.25
G_M62786_IG04:  ;; offset=0x003C
       add      rsp, 56
       ret      
						;; size=5 bbWeight=0 PerfScore 0.00

; Total bytes of code 65, prolog size 16, PerfScore 12.25, instruction count 18, allocated bytes for code 65 (MethodHash=fdfd0abd) for method Program:ValueTaskCallee3() (FullOpts)
; ============================================================

; Assembly listing for method Program:ValueTaskCallee4():System.String (FullOpts)

G_M17761_IG01:  ;; offset=0x0000
       push     rbx
       sub      rsp, 64
       vxorps   xmm4, xmm4, xmm4
       vmovdqu  xmmword ptr [rsp+0x28], xmm4
       xor      eax, eax
       mov      qword ptr [rsp+0x38], rax
						;; size=22 bbWeight=1 PerfScore 3.83
G_M17761_IG02:  ;; offset=0x0016
       test     rcx, rcx
       jne      G_M17761_IG09
       mov      rcx, 0x7FF9D8BB50C8      ; System.Threading.Tasks.ValueTask:FromResult[System.String](System.String):System.String
       mov      rcx, qword ptr [rcx+0x48]
       mov      rcx, qword ptr [rcx+0x10]
       test     rcx, rcx
       je       SHORT G_M17761_IG06
						;; size=32 bbWeight=1 PerfScore 6.75
G_M17761_IG03:  ;; offset=0x0036
       xor      r8, r8
       mov      gword ptr [rsp+0x28], r8
       mov      r8, 0x2A1BCEF3500      ; 'abc'
       mov      gword ptr [rsp+0x30], r8
       mov      word  ptr [rsp+0x38], 0
       mov      byte  ptr [rsp+0x3A], 1
       lea      r8, [rsp+0x28]
       xor      rdx, rdx
       call     [System.Runtime.CompilerServices.AsyncHelpers:TransparentAwaitWithResult[System.__Canon](System.Threading.Tasks.ValueTask`1[System.__Canon]):System.__Canon]
       test     rcx, rcx
       jne      SHORT G_M17761_IG07
						;; size=53 bbWeight=1 PerfScore 9.50
G_M17761_IG04:  ;; offset=0x006B
       xor      ecx, ecx
						;; size=2 bbWeight=1 PerfScore 0.25
G_M17761_IG05:  ;; offset=0x006D
       add      rsp, 64
       pop      rbx
       ret      
						;; size=6 bbWeight=1 PerfScore 1.75
G_M17761_IG06:  ;; offset=0x0073
       mov      rcx, 0x7FF9D8BB50C8      ; System.Threading.Tasks.ValueTask:FromResult[System.String](System.String):System.String
       mov      rdx, 0x7FF9D8BC17D0      ; global ptr
       call     CORINFO_HELP_RUNTIMEHANDLE_METHOD
       mov      rcx, rax
       jmp      SHORT G_M17761_IG03
						;; size=30 bbWeight=0.20 PerfScore 0.75
G_M17761_IG07:  ;; offset=0x0091
       mov      rdx, 0x7FF9D8B88EC0      ; Continuation_16_0_2
       call     [CORINFO_HELP_ALLOC_CONTINUATION]
       mov      rbx, rax
       lea      rax, [reloc @RWD00]
       mov      qword ptr [rbx+0x10], rax
       mov      qword ptr [rbx+0x18], 0x808
       call     [System.Runtime.CompilerServices.AsyncHelpers:CaptureExecutionContext():System.Threading.ExecutionContext]
       mov      rdx, rax
       lea      rcx, bword ptr [rbx+0x20]
       call     CORINFO_HELP_ASSIGN_REF
       mov      rcx, rbx
       xor      eax, eax
						;; size=61 bbWeight=0 PerfScore 0.00
G_M17761_IG08:  ;; offset=0x00CE
       add      rsp, 64
       pop      rbx
       ret      
						;; size=6 bbWeight=0 PerfScore 0.00
G_M17761_IG09:  ;; offset=0x00D4
       mov      rax, gword ptr [rcx+0x28]
       xor      rdx, rdx
       mov      gword ptr [rcx+0x28], rdx
       jmp      SHORT G_M17761_IG04
						;; size=12 bbWeight=0 PerfScore 0.00
RWD00  	dq	(dynamicClass):IL_STUB_AsyncResume_ValueTaskCallee4_Optimized(System.Object,byref):System.Object
	dq	G_M17761_IG07


; Total bytes of code 224, prolog size 22, PerfScore 22.83, instruction count 52, allocated bytes for code 224 (MethodHash=85dfba9e) for method Program:ValueTaskCallee4():System.String (FullOpts)
; ============================================================

; Assembly listing for method Program:ValueTaskCallee5():System.String (FullOpts)

G_M17504_IG01:  ;; offset=0x0000
       sub      rsp, 56
       vxorps   xmm4, xmm4, xmm4
       vmovdqa  xmmword ptr [rsp+0x20], xmm4
       xor      eax, eax
       mov      qword ptr [rsp+0x30], rax
						;; size=21 bbWeight=1 PerfScore 2.83
G_M17504_IG02:  ;; offset=0x0015
       xor      r8, r8
       mov      gword ptr [rsp+0x20], r8
       mov      r8, 0x2A1BCEF3500      ; 'abc'
       mov      gword ptr [rsp+0x28], r8
       mov      word  ptr [rsp+0x30], 0
       mov      byte  ptr [rsp+0x32], 1
       lea      r8, [rsp+0x20]
       mov      rcx, 0x7FF9D8BB8000      ; System.Runtime.CompilerServices.AsyncHelpers:TransparentAwaitWithResult[System.String](System.Threading.Tasks.ValueTask`1[System.String]):System.String
       xor      rdx, rdx
       call     [System.Runtime.CompilerServices.AsyncHelpers:TransparentAwaitWithResult[System.__Canon](System.Threading.Tasks.ValueTask`1[System.__Canon]):System.__Canon]
       test     rcx, rcx
       jne      SHORT G_M17504_IG04
       xor      ecx, ecx
						;; size=65 bbWeight=1 PerfScore 10.00
G_M17504_IG03:  ;; offset=0x0056
       add      rsp, 56
       ret      
						;; size=5 bbWeight=1 PerfScore 1.25
G_M17504_IG04:  ;; offset=0x005B
       xor      eax, eax
						;; size=2 bbWeight=0 PerfScore 0.00
G_M17504_IG05:  ;; offset=0x005D
       add      rsp, 56
       ret      
						;; size=5 bbWeight=0 PerfScore 0.00

; Total bytes of code 98, prolog size 21, PerfScore 14.08, instruction count 23, allocated bytes for code 98 (MethodHash=0100bb9f) for method Program:ValueTaskCallee5():System.String (FullOpts)
; ============================================================

; Assembly listing for method Program:ValueTaskCallee6():System.Guid (FullOpts)

G_M18761_IG01:  ;; offset=0x0000
       push     rbp
       push     r15
       push     r14
       push     r13
       push     r12
       push     rdi
       push     rsi
       push     rbx
       sub      rsp, 152
       vzeroupper 
       lea      rbp, [rsp+0xD0]
       xor      eax, eax
       mov      qword ptr [rbp-0x58], rax
       vxorps   xmm4, xmm4, xmm4
       vmovdqa  xmmword ptr [rbp-0x50], xmm4
       mov      qword ptr [rbp-0x40], rax
       mov      rbx, rcx
						;; size=52 bbWeight=1 PerfScore 13.58
G_M18761_IG02:  ;; offset=0x0034
       lea      rcx, [rbp-0xB0]
       call     CORINFO_HELP_INIT_PINVOKE_FRAME
       mov      rsi, rax
       mov      rcx, rsp
       mov      qword ptr [rbp-0x98], rcx
       mov      rcx, rbp
       mov      qword ptr [rbp-0x88], rcx
       lea      rcx, [rbp-0x78]
       mov      rax, 0x7FF9D8BBD8D8      ; function address
       mov      qword ptr [rbp-0xA0], rax
       lea      rax, G_M18761_IG04
       mov      qword ptr [rbp-0x90], rax
       lea      rax, [rbp-0xB0]
       mov      qword ptr [rsi+0x08], rax
       mov      byte  ptr [rsi+0x04], 0
						;; size=85 bbWeight=1 PerfScore 10.50
G_M18761_IG03:  ;; offset=0x0089
       call     [Interop+Ole32:CoCreateGuid(ptr):int]
						;; size=6 bbWeight=1 PerfScore 3.00
G_M18761_IG04:  ;; offset=0x008F
       mov      byte  ptr [rsi+0x04], 1
       cmp      dword ptr [(reloc 0x7ffa384dce38)], 0
       je       SHORT G_M18761_IG05
       call     [CORINFO_HELP_STOP_FOR_GC]
						;; size=19 bbWeight=1 PerfScore 7.00
G_M18761_IG05:  ;; offset=0x00A2
       mov      rcx, qword ptr [rbp-0xA8]
       mov      qword ptr [rsi+0x08], rcx
       test     eax, eax
       jne      SHORT G_M18761_IG07
       vmovups  xmm0, xmmword ptr [rbp-0x78]
       vmovups  xmmword ptr [rbp-0x4C], xmm0
       xor      rcx, rcx
       mov      gword ptr [rbp-0x58], rcx
       mov      byte  ptr [rbp-0x4E], 1
       mov      word  ptr [rbp-0x50], 0
       lea      rcx, [rbp-0x68]
       lea      r8, [rbp-0x58]
       xor      rdx, rdx
       call     [System.Runtime.CompilerServices.AsyncHelpers:TransparentAwaitWithResult[System.Guid](System.Threading.Tasks.ValueTask`1[System.Guid]):System.Guid]
       test     rcx, rcx
       jne      SHORT G_M18761_IG08
       vmovups  xmm0, xmmword ptr [rbp-0x68]
       vmovups  xmmword ptr [rbx], xmm0
       mov      rax, rbx
       xor      ecx, ecx
						;; size=76 bbWeight=1 PerfScore 16.50
G_M18761_IG06:  ;; offset=0x00EE
       add      rsp, 152
       pop      rbx
       pop      rsi
       pop      rdi
       pop      r12
       pop      r13
       pop      r14
       pop      r15
       pop      rbp
       ret      
						;; size=20 bbWeight=1 PerfScore 5.25
G_M18761_IG07:  ;; offset=0x0102
       mov      ecx, eax
       call     [System.Guid:ThrowForHr(int)]
       int3     
						;; size=9 bbWeight=0 PerfScore 0.00
G_M18761_IG08:  ;; offset=0x010B
       add      rsp, 152
       pop      rbx
       pop      rsi
       pop      rdi
       pop      r12
       pop      r13
       pop      r14
       pop      r15
       pop      rbp
       ret      
						;; size=20 bbWeight=0 PerfScore 0.00

; Total bytes of code 287, prolog size 49, PerfScore 55.83, instruction count 80, allocated bytes for code 287 (MethodHash=8d4cb6b6) for method Program:ValueTaskCallee6():System.Guid (FullOpts)
; ============================================================

After:

; Assembly listing for method Program:TaskCallee1():System.String (FullOpts)

G_M49327_IG01:  ;; offset=0x0000
						;; size=0 bbWeight=1 PerfScore 0.00
G_M49327_IG02:  ;; offset=0x0000
       mov      rax, 0x2E8B7A73500      ; 'abc'
       xor      ecx, ecx
						;; size=12 bbWeight=1 PerfScore 0.50
G_M49327_IG03:  ;; offset=0x000C
       ret      
						;; size=1 bbWeight=1 PerfScore 1.00

; Total bytes of code 13, prolog size 0, PerfScore 1.50, instruction count 3, allocated bytes for code 13 (MethodHash=c5e23f50) for method Program:TaskCallee1():System.String (FullOpts)
; ============================================================

; Assembly listing for method Program:TaskCallee2() (FullOpts)

G_M59144_IG01:  ;; offset=0x0000
						;; size=0 bbWeight=1 PerfScore 0.00
G_M59144_IG02:  ;; offset=0x0000
       xor      ecx, ecx
						;; size=2 bbWeight=1 PerfScore 0.25
G_M59144_IG03:  ;; offset=0x0002
       ret      
						;; size=1 bbWeight=1 PerfScore 1.00

; Total bytes of code 3, prolog size 0, PerfScore 1.25, instruction count 2, allocated bytes for code 3 (MethodHash=f60418f7) for method Program:TaskCallee2() (FullOpts)
; ============================================================

; Assembly listing for method Program:ValueTaskCallee1() (FullOpts)

G_M60608_IG01:  ;; offset=0x0000
						;; size=0 bbWeight=1 PerfScore 0.00
G_M60608_IG02:  ;; offset=0x0000
       xor      ecx, ecx
						;; size=2 bbWeight=1 PerfScore 0.25
G_M60608_IG03:  ;; offset=0x0002
       ret      
						;; size=1 bbWeight=1 PerfScore 1.00

; Total bytes of code 3, prolog size 0, PerfScore 1.25, instruction count 2, allocated bytes for code 3 (MethodHash=fdfd133f) for method Program:ValueTaskCallee1() (FullOpts)
; ============================================================

; Assembly listing for method Program:ValueTaskCallee2() (FullOpts)

G_M59587_IG01:  ;; offset=0x0000
						;; size=0 bbWeight=1 PerfScore 0.00
G_M59587_IG02:  ;; offset=0x0000
       xor      ecx, ecx
						;; size=2 bbWeight=1 PerfScore 0.25
G_M59587_IG03:  ;; offset=0x0002
       ret      
						;; size=1 bbWeight=1 PerfScore 1.00

; Total bytes of code 3, prolog size 0, PerfScore 1.25, instruction count 2, allocated bytes for code 3 (MethodHash=fdfd173c) for method Program:ValueTaskCallee2() (FullOpts)
; ============================================================

; Assembly listing for method Program:ValueTaskCallee3() (FullOpts)

G_M62786_IG01:  ;; offset=0x0000
						;; size=0 bbWeight=1 PerfScore 0.00
G_M62786_IG02:  ;; offset=0x0000
       xor      ecx, ecx
						;; size=2 bbWeight=1 PerfScore 0.25
G_M62786_IG03:  ;; offset=0x0002
       ret      
						;; size=1 bbWeight=1 PerfScore 1.00

; Total bytes of code 3, prolog size 0, PerfScore 1.25, instruction count 2, allocated bytes for code 3 (MethodHash=fdfd0abd) for method Program:ValueTaskCallee3() (FullOpts)
; ============================================================

; Assembly listing for method Program:ValueTaskCallee4():System.String (FullOpts)

G_M17761_IG01:  ;; offset=0x0000
						;; size=0 bbWeight=1 PerfScore 0.00
G_M17761_IG02:  ;; offset=0x0000
       mov      rax, 0x2E8B7A73500      ; 'abc'
       xor      ecx, ecx
						;; size=12 bbWeight=1 PerfScore 0.50
G_M17761_IG03:  ;; offset=0x000C
       ret      
						;; size=1 bbWeight=1 PerfScore 1.00

; Total bytes of code 13, prolog size 0, PerfScore 1.50, instruction count 3, allocated bytes for code 13 (MethodHash=85dfba9e) for method Program:ValueTaskCallee4():System.String (FullOpts)
; ============================================================

; Assembly listing for method Program:ValueTaskCallee5():System.String (FullOpts)

G_M17504_IG01:  ;; offset=0x0000
						;; size=0 bbWeight=1 PerfScore 0.00
G_M17504_IG02:  ;; offset=0x0000
       mov      rax, 0x2E8B7A73500      ; 'abc'
       xor      ecx, ecx
						;; size=12 bbWeight=1 PerfScore 0.50
G_M17504_IG03:  ;; offset=0x000C
       ret      
						;; size=1 bbWeight=1 PerfScore 1.00

; Total bytes of code 13, prolog size 0, PerfScore 1.50, instruction count 3, allocated bytes for code 13 (MethodHash=0100bb9f) for method Program:ValueTaskCallee5():System.String (FullOpts)
; ============================================================

; Assembly listing for method Program:ValueTaskCallee6():System.Guid (FullOpts)

G_M18761_IG01:  ;; offset=0x0000
       push     rbp
       push     r15
       push     r14
       push     r13
       push     r12
       push     rdi
       push     rsi
       push     rbx
       sub      rsp, 104
       vzeroupper 
       lea      rbp, [rsp+0xA0]
       mov      rbx, rcx
						;; size=30 bbWeight=1 PerfScore 10.00
G_M18761_IG02:  ;; offset=0x001E
       lea      rcx, [rbp-0x80]
       call     CORINFO_HELP_INIT_PINVOKE_FRAME
       mov      rsi, rax
       mov      rcx, rsp
       mov      qword ptr [rbp-0x68], rcx
       mov      rcx, rbp
       mov      qword ptr [rbp-0x58], rcx
       lea      rcx, [rbp-0x48]
       mov      rax, 0x7FF9D8B7A8D8      ; function address
       mov      qword ptr [rbp-0x70], rax
       lea      rax, G_M18761_IG04
       mov      qword ptr [rbp-0x60], rax
       lea      rax, [rbp-0x80]
       mov      qword ptr [rsi+0x08], rax
       mov      byte  ptr [rsi+0x04], 0
						;; size=67 bbWeight=1 PerfScore 10.50
G_M18761_IG03:  ;; offset=0x0061
       call     [Interop+Ole32:CoCreateGuid(ptr):int]
						;; size=6 bbWeight=1 PerfScore 3.00
G_M18761_IG04:  ;; offset=0x0067
       mov      byte  ptr [rsi+0x04], 1
       cmp      dword ptr [(reloc 0x7ffa384dcdb8)], 0
       je       SHORT G_M18761_IG05
       call     [CORINFO_HELP_STOP_FOR_GC]
						;; size=19 bbWeight=1 PerfScore 7.00
G_M18761_IG05:  ;; offset=0x007A
       mov      rcx, qword ptr [rbp-0x78]
       mov      qword ptr [rsi+0x08], rcx
       mov      ecx, eax
       test     ecx, ecx
       jne      SHORT G_M18761_IG07
       vmovups  xmm0, xmmword ptr [rbp-0x48]
       vmovups  xmmword ptr [rbx], xmm0
       mov      rax, rbx
       xor      ecx, ecx
						;; size=28 bbWeight=1 PerfScore 6.00
G_M18761_IG06:  ;; offset=0x0096
       add      rsp, 104
       pop      rbx
       pop      rsi
       pop      rdi
       pop      r12
       pop      r13
       pop      r14
       pop      r15
       pop      rbp
       ret      
						;; size=17 bbWeight=1 PerfScore 5.25
G_M18761_IG07:  ;; offset=0x00A7
       call     [System.Guid:ThrowForHr(int)]
       int3     
						;; size=7 bbWeight=0 PerfScore 0.00

; Total bytes of code 174, prolog size 27, PerfScore 41.75, instruction count 53, allocated bytes for code 174 (MethodHash=8d4cb6b6) for method Program:ValueTaskCallee6():System.Guid (FullOpts)
; ============================================================

Intrinsify and recognize the following methods when used directly in
runtime async variants:
- `Task.FromResult`
- `Task.CompletedTask`
- `ValueTask.FromResult`
- `ValueTask.CompletedTask`
- `new ValueTask()`
- `ValueTask`-typed `default`
- `new ValueTask<T>(T value)`
Copilot AI review requested due to automatic review settings June 24, 2026 17:37
@github-actions github-actions Bot added the area-CodeGen-coreclr CLR JIT compiler in src/coreclr/src/jit and related components such as SuperPMI label Jun 24, 2026
@dotnet-policy-service

Copy link
Copy Markdown
Contributor

Tagging subscribers to this area: @JulieLeeMSFT, @jakobbotsch
See info in area-owners.md if you want to be subscribed.

Copilot AI left a comment

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Pull request overview

This PR extends the CoreCLR JIT’s “runtime async” optimization pipeline by marking common Task/ValueTask factory helpers as intrinsics and teaching the importer to fold/avoid AsyncHelpers.TransparentAwaitWithResult in more cases, reducing call/alloc overhead in optimized async-return paths.

Changes:

  • Mark Task.CompletedTask, Task.FromResult, ValueTask.CompletedTask, ValueTask.FromResult, and ValueTask<TResult>(TResult) with [Intrinsic] to enable JIT recognition.
  • Add new JIT NamedIntrinsic IDs plus importer expansion logic for async-callconv FromResult / CompletedTask, and add a new impFoldAwaitedTopOfStack fast-path for certain ValueTask return patterns.
  • Improve debug local naming by labeling ASYNC_CONTINUATION_ILNUM locals as AsyncCont.

Reviewed changes

Copilot reviewed 7 out of 7 changed files in this pull request and generated 6 comments.

Show a summary per file
File Description
src/libraries/System.Private.CoreLib/src/System/Threading/Tasks/ValueTask.cs Adds [Intrinsic] to ValueTask.CompletedTask, ValueTask.FromResult, and ValueTask<TResult>(TResult) ctor to enable JIT recognition.
src/libraries/System.Private.CoreLib/src/System/Threading/Tasks/Task.cs Adds [Intrinsic] to Task.CompletedTask getter and Task.FromResult to enable async-callconv intrinsic expansion.
src/coreclr/jit/namedintrinsiclist.h Introduces new NamedIntrinsic entries for Task/ValueTask factory methods used by the importer.
src/coreclr/jit/importercalls.cpp Implements async-callconv intrinsic expansion/folding for FromResult/CompletedTask, and extends intrinsic lookup for Task/ValueTask methods.
src/coreclr/jit/importer.cpp Refactors return/await wrapping logic and adds impFoldAwaitedTopOfStack to avoid unnecessary TransparentAwaitWithResult calls.
src/coreclr/jit/gentree.cpp Improves local name printing by labeling async continuation locals as AsyncCont.
src/coreclr/jit/compiler.h Adds the impFoldAwaitedTopOfStack declaration.

Comment on lines +11796 to +11798
// Returns:
// True if the top of stack was folded and pushed on the stack.
//
impPushOnStack(gtNewLclVarNode(returnLcl), typeInfo(TYP_STRUCT));
}

JITDUMP("Optimized \"return new ValueTask() to return default\n");

StackEntry se = impPopStack();
impPushOnStack(value, se.seTypeInfo);
JITDUMP("Optimized \"return new ValueTask(value) to return value directly:\n");
Comment on lines +11846 to +11863
impPopStack();
if (info.compRetType == TYP_VOID)
{
lastTree->gtBashToNOP();
}
else if (info.compRetType != TYP_STRUCT)
{
impPushOnStack(gtNewZeroConNode(info.compRetType), typeInfo(info.compRetType));
lastTree->gtBashToNOP();
}
else
{
unsigned returnLcl = lvaGrabTemp(true DEBUGARG("Return temp"));
lvaSetStruct(returnLcl, info.compMethodInfo->args.retTypeClass, false);
// Reuse the ValueTask zeroing
lastTree->AsLclVar()->SetLclNum(returnLcl);
impPushOnStack(gtNewLclVarNode(returnLcl), typeInfo(TYP_STRUCT));
}
Comment on lines +5099 to +5109
case NI_System_Threading_Tasks_Task_FromResult:
case NI_System_Threading_Tasks_ValueTask_FromResult:
{
if ((sig->callConv & CORINFO_CALLCONV_ASYNCCALL) == 0)
{
break;
}

CORINFO_CLASS_HANDLE typeHnd = sig->sigInst.methInst[0];
ClassLayout* layout = nullptr;
var_types type = TypeHandleToVarType(typeHnd, &layout);
Comment on lines +11669 to +11683
else if (strcmp(className, "ValueTask") == 0)
{
if (strcmp(methodName, "FromResult") == 0)
{
result = NI_System_Threading_Tasks_ValueTask_FromResult;
}
else if (strcmp(methodName, "get_CompletedTask") == 0)
{
result = NI_System_Threading_Tasks_ValueTask_get_CompletedTask;
}
else if (strcmp(methodName, ".ctor") == 0)
{
result = NI_System_Threading_Tasks_ValueTask__ctor;
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Labels

area-CodeGen-coreclr CLR JIT compiler in src/coreclr/src/jit and related components such as SuperPMI

Projects

None yet

Development

Successfully merging this pull request may close these issues.

2 participants