Golang 源码分析
2017-05-10
Golang 函数调用规约
汇编相关的寄存器
函数内存布局
几个重要的汇编
// mcall switches from the g to the g0 stack and invokes fn(g), // where g is the goroutine that made the call. // mcall saves g's current PC/SP in g->sched so that it can be restored later. // It is up to fn to arrange for that later execution, typically by recording // g in a data structure, causing something to call ready(g) later. // mcall returns to the original goroutine g later, when g has been rescheduled. // fn must not return at all; typically it ends by calling schedule, to let the m // run other goroutines. // // mcall can only be called from g stacks (not g0, not gsignal). // // This must NOT be go:noescape: if fn is a stack-allocated closure, // fn puts g on a run queue, and g executes before fn returns, the // closure will be invalidated while it is still executing. func mcall(fn func(*g))
mcall
会从 g
栈切换到 g0
调用函数 fn(g)
g
就是当前的 goroutine。
mcall 会保存 g 的当前 PC/SP
到 g->sched
,所以 g 可以在随后重新被加载。
fn 必需不能有返回值,一般情况下这个由调用 schedule 结束,这样可以让 m 运行其他 goroutines。
也就是说 mcall 使得当前 goroutines 放弃当前的运行。
mcall 只能在 g 栈被调用,不能在 g0,gsingal 栈调用
// func mcall(fn func(*g))
// Switch to m->g0's stack, call fn(g).
// Fn must never return. It should gogo(&g->sched)
// to keep running g.
TEXT runtime·mcall(SB), NOSPLIT, $0-8
MOVQ fn+0(FP), DI //保存 fn 函数指针
get_tls(CX)
MOVQ g(CX), AX // save state in g->sched
MOVQ 0(SP), BX // caller's PC
MOVQ BX, (g_sched+gobuf_pc)(AX)
LEAQ fn+0(FP), BX // caller's SP
MOVQ BX, (g_sched+gobuf_sp)(AX)
MOVQ AX, (g_sched+gobuf_g)(AX)
MOVQ BP, (g_sched+gobuf_bp)(AX)
// switch to m->g0 & its stack, call fn
MOVQ g(CX), BX
MOVQ g_m(BX), BX
MOVQ m_g0(BX), SI
CMPQ SI, AX // if g == m->g0 call badmcall
JNE 3(PC)
MOVQ $runtime·badmcall(SB), AX
JMP AX
MOVQ SI, g(CX) // g = m->g0
MOVQ (g_sched+gobuf_sp)(SI), SP // sp = m->g0->sched.sp
PUSHQ AX //切换栈指针后,将 g 参数压如栈
MOVQ DI, DX
MOVQ 0(DI), DI
CALL DI //调用函数 fn
POPQ AX
MOVQ $runtime·badmcall2(SB), AX
JMP AX
RET
// systemstack runs fn on a system stack.
// If systemstack is called from the per-OS-thread (g0) stack, or
// if systemstack is called from the signal handling (gsignal) stack,
// systemstack calls fn directly and returns.
// Otherwise, systemstack is being called from the limited stack
// of an ordinary goroutine. In this case, systemstack switches
// to the per-OS-thread stack, calls fn, and switches back.
// It is common to use a func literal as the argument, in order
// to share inputs and outputs with the code around the call
// to system stack:
//
// ... set up y ...
// systemstack(func() {
// x = bigcall(y)
// })
// ... use x ...
//
//go:noescape
func systemstack(fn func())
systemstack 运行 fn 在系统栈上,如果 systemstack 是在 g0 栈或者 gsignal 栈上,直接调用 fn 函数然后返回。 否则,systemstack 将切换到 g0 栈上调用 fn 然后切换回来。
// func systemstack(fn func())
TEXT runtime·systemstack(SB), NOSPLIT, $0-8
MOVQ fn+0(FP), DI // DI = fn
get_tls(CX)
MOVQ g(CX), AX // AX = g
MOVQ g_m(AX), BX // BX = m
MOVQ m_gsignal(BX), DX // DX = gsignal
CMPQ AX, DX
JEQ noswitch
MOVQ m_g0(BX), DX // DX = g0
CMPQ AX, DX
JEQ noswitch
MOVQ m_curg(BX), R8
CMPQ AX, R8
JEQ switch
// Bad: g is not gsignal, not g0, not curg. What is it?
MOVQ $runtime·badsystemstack(SB), AX
CALL AX
switch:
// save our state in g->sched. Pretend to
// be systemstack_switch if the G stack is scanned.
MOVQ $runtime·systemstack_switch(SB), SI
MOVQ SI, (g_sched+gobuf_pc)(AX)
MOVQ SP, (g_sched+gobuf_sp)(AX)
MOVQ AX, (g_sched+gobuf_g)(AX)
MOVQ BP, (g_sched+gobuf_bp)(AX)
// switch to g0
MOVQ DX, g(CX) //保存 g0 栈到 tls 中
MOVQ (g_sched+gobuf_sp)(DX), BX
// make it look like mstart called systemstack on g0, to stop traceback
SUBQ $8, BX
MOVQ $runtime·mstart(SB), DX
MOVQ DX, 0(BX)
MOVQ BX, SP
// call target function
MOVQ DI, DX
MOVQ 0(DI), DI
CALL DI
// switch back to g
get_tls(CX)
MOVQ g(CX), AX
MOVQ g_m(AX), BX
MOVQ m_curg(BX), AX
MOVQ AX, g(CX)
MOVQ (g_sched+gobuf_sp)(AX), SP
MOVQ $0, (g_sched+gobuf_sp)(AX)
RET
noswitch:
// already on m stack, just call directly
MOVQ DI, DX
MOVQ 0(DI), DI
CALL DI
RET
// goexit is the return stub at the top of every goroutine call stack.
// Each goroutine stack is constructed as if goexit called the
// goroutine's entry point function, so that when the entry point
// function returns, it will return to goexit, which will call goexit1
// to perform the actual exit.
//
// This function must never be called directly. Call goexit1 instead.
// gentraceback assumes that goexit terminates the stack. A direct
// call on the stack will cause gentraceback to stop walking the stack
// prematurely and if there are leftover stack barriers it may panic.
func goexit(neverCallThisFunction)
goexit 返回 goroutines 的栈的顶部。每一个 goroutines 栈构造时,会压入栈顶。 当 goroutines 返回时 goexit 实际上会执行。
goexit 不能直接调用。必需使用 goexit1 代替。
// The top-most function running on a goroutine
// returns to goexit+PCQuantum.
TEXT runtime·goexit(SB),NOSPLIT,$0-0
BYTE $0x90 // NOP
CALL runtime·goexit1(SB) // does not return
// traceback from goexit1 must hit code range of goexit
BYTE $0x90 // NOP
// getcallerpc returns the program counter (PC) of its caller's caller.
// getcallersp returns the stack pointer (SP) of its caller's caller.
// For both, the argp must be a pointer to the caller's first function argument.
// The implementation may or may not use argp, depending on
// the architecture.
//
// For example:
//
// func f(arg1, arg2, arg3 int) {
// pc := getcallerpc(unsafe.Pointer(&arg1))
// sp := getcallersp(unsafe.Pointer(&arg1))
// }
//
// These two lines find the PC and SP immediately following
// the call to f (where f will return).
//
// The call to getcallerpc and getcallersp must be done in the
// frame being asked about. It would not be correct for f to pass &arg1
// to another function g and let g call getcallerpc/getcallersp.
// The call inside g might return information about g's caller or
// information about f's caller or complete garbage.
//
// The result of getcallersp is correct at the time of the return,
// but it may be invalidated by any subsequent call to a function
// that might relocate the stack in order to grow or shrink it.
// A general rule is that the result of getcallersp should be used
// immediately and can only be passed to nosplit functions.
//go:noescape
func getcallerpc(argp unsafe.Pointer) uintptr
//go:noescape
func getcallersp(argp unsafe.Pointer) uintptr
func gogo(buf *gobuf)
// void gogo(Gobuf*)
// restore state from Gobuf; longjmp
TEXT runtime·gogo(SB), NOSPLIT, $0-8
MOVQ buf+0(FP), BX // gobuf
MOVQ gobuf_g(BX), DX
MOVQ 0(DX), CX // make sure g != nil
get_tls(CX)
MOVQ DX, g(CX)
MOVQ gobuf_sp(BX), SP // restore SP
MOVQ gobuf_ret(BX), AX
MOVQ gobuf_ctxt(BX), DX
MOVQ gobuf_bp(BX), BP
MOVQ $0, gobuf_sp(BX) // clear to help garbage collector
MOVQ $0, gobuf_ret(BX)
MOVQ $0, gobuf_ctxt(BX)
MOVQ $0, gobuf_bp(BX)
MOVQ gobuf_pc(BX), BX
JMP BX
gogo 函数在 m 上切换栈到 g 上,执行函数。 注意这里最后使用到 JMP , 那么函数在最后到 return 后 pc 是啥? 我们可以查看下构造 goroutines 是栈上到布局
// Create a new g running fn with siz bytes of arguments.
// Put it on the queue of g's waiting to run.
// The compiler turns a go statement into a call to this.
// Cannot split the stack because it assumes that the arguments
// are available sequentially after &fn; they would not be
// copied if a stack split occurred.
//go:nosplit
func newproc(siz int32, fn *funcval) {
argp := add(unsafe.Pointer(&fn), sys.PtrSize)
pc := getcallerpc(unsafe.Pointer(&siz))
systemstack(func() {
newproc1(fn, (*uint8)(argp), siz, 0, pc)
})
}
// Create a new g running fn with narg bytes of arguments starting
// at argp and returning nret bytes of results. callerpc is the
// address of the go statement that created this. The new g is put
// on the queue of g's waiting to run.
func newproc1(fn *funcval, argp *uint8, narg int32, nret int32, callerpc uintptr) *g {
_g_ := getg()
if fn == nil {
_g_.m.throwing = -1 // do not dump full stacks
throw("go of nil func value")
}
_g_.m.locks++ // disable preemption because it can be holding p in a local var
siz := narg + nret
siz = (siz + 7) &^ 7
// We could allocate a larger initial stack if necessary.
// Not worth it: this is almost always an error.
// 4*sizeof(uintreg): extra space added below
// sizeof(uintreg): caller's LR (arm) or return address (x86, in gostartcall).
if siz >= _StackMin-4*sys.RegSize-sys.RegSize {
throw("newproc: function arguments too large for new goroutine")
}
_p_ := _g_.m.p.ptr()
newg := gfget(_p_)
if newg == nil {
newg = malg(_StackMin)
casgstatus(newg, _Gidle, _Gdead)
newg.gcRescan = -1
allgadd(newg) // publishes with a g->status of Gdead so GC scanner doesn't look at uninitialized stack.
}
if newg.stack.hi == 0 {
throw("newproc1: newg missing stack")
}
if readgstatus(newg) != _Gdead {
throw("newproc1: new g is not Gdead")
}
totalSize := 4*sys.RegSize + uintptr(siz) + sys.MinFrameSize // extra space in case of reads slightly beyond frame
totalSize += -totalSize & (sys.SpAlign - 1) // align to spAlign
sp := newg.stack.hi - totalSize
spArg := sp
if usesLR {
// caller's LR
*(*unsafe.Pointer)(unsafe.Pointer(sp)) = nil
prepGoExitFrame(sp)
spArg += sys.MinFrameSize
}
memmove(unsafe.Pointer(spArg), unsafe.Pointer(argp), uintptr(narg))
memclr(unsafe.Pointer(&newg.sched), unsafe.Sizeof(newg.sched))
newg.sched.sp = sp
newg.stktopsp = sp
newg.sched.pc = funcPC(goexit) + sys.PCQuantum // +PCQuantum so that previous instruction is in same function
newg.sched.g = guintptr(unsafe.Pointer(newg))
gostartcallfn(&newg.sched, fn)
newg.gopc = callerpc
newg.startpc = fn.fn
if isSystemGoroutine(newg) {
atomic.Xadd(&sched.ngsys, +1)
}
// The stack is dirty from the argument frame, so queue it for
// scanning. Do this before setting it to runnable so we still
// own the G. If we're recycling a G, it may already be on the
// rescan list.
if newg.gcRescan == -1 {
queueRescan(newg)
} else {
// The recycled G is already on the rescan list. Just
// mark the stack dirty.
newg.gcscanvalid = false
}
casgstatus(newg, _Gdead, _Grunnable)
if _p_.goidcache == _p_.goidcacheend {
// Sched.goidgen is the last allocated id,
// this batch must be [sched.goidgen+1, sched.goidgen+GoidCacheBatch].
// At startup sched.goidgen=0, so main goroutine receives goid=1.
_p_.goidcache = atomic.Xadd64(&sched.goidgen, _GoidCacheBatch)
_p_.goidcache -= _GoidCacheBatch - 1
_p_.goidcacheend = _p_.goidcache + _GoidCacheBatch
}
newg.goid = int64(_p_.goidcache)
_p_.goidcache++
if raceenabled {
newg.racectx = racegostart(callerpc)
}
if trace.enabled {
traceGoCreate(newg, newg.startpc)
}
runqput(_p_, newg, true)
if atomic.Load(&sched.npidle) != 0 && atomic.Load(&sched.nmspinning) == 0 && unsafe.Pointer(fn.fn) != unsafe.Pointer(funcPC(main)) { // TODO: fast atomic
wakep()
}
_g_.m.locks--
if _g_.m.locks == 0 && _g_.preempt { // restore the preemption request in case we've cleared it in newstack
_g_.stackguard0 = stackPreempt
}
return newg
}
// adjust Gobuf as it if executed a call to fn with context ctxt
// and then did an immediate gosave.
func gostartcall(buf *gobuf, fn, ctxt unsafe.Pointer) {
sp := buf.sp
if sys.RegSize > sys.PtrSize {
sp -= sys.PtrSize
*(*uintptr)(unsafe.Pointer(sp)) = 0
}
sp -= sys.PtrSize
*(*uintptr)(unsafe.Pointer(sp)) = buf.pc
buf.sp = sp
buf.pc = uintptr(fn)
buf.ctxt = ctxt
}
func gosave(buf *gobuf)
// Save state of caller into g->sched. Smashes R8, R9.
TEXT gosave<>(SB),NOSPLIT,$0
get_tls(R8)
MOVQ g(R8), R8
MOVQ 0(SP), R9
MOVQ R9, (g_sched+gobuf_pc)(R8)
LEAQ 8(SP), R9
MOVQ R9, (g_sched+gobuf_sp)(R8)
MOVQ $0, (g_sched+gobuf_ret)(R8)
MOVQ $0, (g_sched+gobuf_ctxt)(R8)
MOVQ BP, (g_sched+gobuf_bp)(R8)
RET
Goroutine 需要解决的问题
Metadata 组织
syscall 和 network 的问题
goroutine 之间的数据同步问题
##