Source file src/pkg/runtime/pprof/pprof.go
1 // Copyright 2010 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4
5 // Package pprof writes runtime profiling data in the format expected
6 // by the pprof visualization tool.
7 // For more information about pprof, see
8 // http://code.google.com/p/google-perftools/.
9 package pprof
10
11 import (
12 "bufio"
13 "bytes"
14 "fmt"
15 "io"
16 "runtime"
17 "sort"
18 "strings"
19 "sync"
20 "text/tabwriter"
21 )
22
23 // BUG(rsc): A bug in the OS X Snow Leopard 64-bit kernel prevents
24 // CPU profiling from giving accurate results on that system.
25
26 // A Profile is a collection of stack traces showing the call sequences
27 // that led to instances of a particular event, such as allocation.
28 // Packages can create and maintain their own profiles; the most common
29 // use is for tracking resources that must be explicitly closed, such as files
30 // or network connections.
31 //
32 // A Profile's methods can be called from multiple goroutines simultaneously.
33 //
34 // Each Profile has a unique name. A few profiles are predefined:
35 //
36 // goroutine - stack traces of all current goroutines
37 // heap - a sampling of all heap allocations
38 // threadcreate - stack traces that led to the creation of new OS threads
39 //
40 // These predefine profiles maintain themselves and panic on an explicit
41 // Add or Remove method call.
42 //
43 // The CPU profile is not available as a Profile. It has a special API,
44 // the StartCPUProfile and StopCPUProfile functions, because it streams
45 // output to a writer during profiling.
46 //
47 type Profile struct {
48 name string
49 mu sync.Mutex
50 m map[interface{}][]uintptr
51 count func() int
52 write func(io.Writer, int) error
53 }
54
55 // profiles records all registered profiles.
56 var profiles struct {
57 mu sync.Mutex
58 m map[string]*Profile
59 }
60
61 var goroutineProfile = &Profile{
62 name: "goroutine",
63 count: countGoroutine,
64 write: writeGoroutine,
65 }
66
67 var threadcreateProfile = &Profile{
68 name: "threadcreate",
69 count: countThreadCreate,
70 write: writeThreadCreate,
71 }
72
73 var heapProfile = &Profile{
74 name: "heap",
75 count: countHeap,
76 write: writeHeap,
77 }
78
79 func lockProfiles() {
80 profiles.mu.Lock()
81 if profiles.m == nil {
82 // Initial built-in profiles.
83 profiles.m = map[string]*Profile{
84 "goroutine": goroutineProfile,
85 "threadcreate": threadcreateProfile,
86 "heap": heapProfile,
87 }
88 }
89 }
90
91 func unlockProfiles() {
92 profiles.mu.Unlock()
93 }
94
95 // NewProfile creates a new profile with the given name.
96 // If a profile with that name already exists, NewProfile panics.
97 // The convention is to use a 'import/path.' prefix to create
98 // separate name spaces for each package.
99 func NewProfile(name string) *Profile {
100 lockProfiles()
101 defer unlockProfiles()
102 if name == "" {
103 panic("pprof: NewProfile with empty name")
104 }
105 if profiles.m[name] != nil {
106 panic("pprof: NewProfile name already in use: " + name)
107 }
108 p := &Profile{
109 name: name,
110 m: map[interface{}][]uintptr{},
111 }
112 profiles.m[name] = p
113 return p
114 }
115
116 // Lookup returns the profile with the given name, or nil if no such profile exists.
117 func Lookup(name string) *Profile {
118 lockProfiles()
119 defer unlockProfiles()
120 return profiles.m[name]
121 }
122
123 // Profiles returns a slice of all the known profiles, sorted by name.
124 func Profiles() []*Profile {
125 lockProfiles()
126 defer unlockProfiles()
127
128 var all []*Profile
129 for _, p := range profiles.m {
130 all = append(all, p)
131 }
132
133 sort.Sort(byName(all))
134 return all
135 }
136
137 type byName []*Profile
138
139 func (x byName) Len() int { return len(x) }
140 func (x byName) Swap(i, j int) { x[i], x[j] = x[j], x[i] }
141 func (x byName) Less(i, j int) bool { return x[i].name < x[j].name }
142
143 // Name returns this profile's name, which can be passed to Lookup to reobtain the profile.
144 func (p *Profile) Name() string {
145 return p.name
146 }
147
148 // Count returns the number of execution stacks currently in the profile.
149 func (p *Profile) Count() int {
150 p.mu.Lock()
151 defer p.mu.Unlock()
152 if p.count != nil {
153 return p.count()
154 }
155 return len(p.m)
156 }
157
158 // Add adds the current execution stack to the profile, associated with value.
159 // Add stores value in an internal map, so value must be suitable for use as
160 // a map key and will not be garbage collected until the corresponding
161 // call to Remove. Add panics if the profile already contains a stack for value.
162 //
163 // The skip parameter has the same meaning as runtime.Caller's skip
164 // and controls where the stack trace begins. Passing skip=0 begins the
165 // trace in the function calling Add. For example, given this
166 // execution stack:
167 //
168 // Add
169 // called from rpc.NewClient
170 // called from mypkg.Run
171 // called from main.main
172 //
173 // Passing skip=0 begins the stack trace at the call to Add inside rpc.NewClient.
174 // Passing skip=1 begins the stack trace at the call to NewClient inside mypkg.Run.
175 //
176 func (p *Profile) Add(value interface{}, skip int) {
177 if p.name == "" {
178 panic("pprof: use of uninitialized Profile")
179 }
180 if p.write != nil {
181 panic("pprof: Add called on built-in Profile " + p.name)
182 }
183
184 stk := make([]uintptr, 32)
185 n := runtime.Callers(skip+1, stk[:])
186
187 p.mu.Lock()
188 defer p.mu.Unlock()
189 if p.m[value] != nil {
190 panic("pprof: Profile.Add of duplicate value")
191 }
192 p.m[value] = stk[:n]
193 }
194
195 // Remove removes the execution stack associated with value from the profile.
196 // It is a no-op if the value is not in the profile.
197 func (p *Profile) Remove(value interface{}) {
198 p.mu.Lock()
199 defer p.mu.Unlock()
200 delete(p.m, value)
201 }
202
203 // WriteTo writes a pprof-formatted snapshot of the profile to w.
204 // If a write to w returns an error, WriteTo returns that error.
205 // Otherwise, WriteTo returns nil.
206 //
207 // The debug parameter enables additional output.
208 // Passing debug=0 prints only the hexadecimal addresses that pprof needs.
209 // Passing debug=1 adds comments translating addresses to function names
210 // and line numbers, so that a programmer can read the profile without tools.
211 //
212 // The predefined profiles may assign meaning to other debug values;
213 // for example, when printing the "goroutine" profile, debug=2 means to
214 // print the goroutine stacks in the same form that a Go program uses
215 // when dying due to an unrecovered panic.
216 func (p *Profile) WriteTo(w io.Writer, debug int) error {
217 if p.name == "" {
218 panic("pprof: use of zero Profile")
219 }
220 if p.write != nil {
221 return p.write(w, debug)
222 }
223
224 // Obtain consistent snapshot under lock; then process without lock.
225 var all [][]uintptr
226 p.mu.Lock()
227 for _, stk := range p.m {
228 all = append(all, stk)
229 }
230 p.mu.Unlock()
231
232 // Map order is non-deterministic; make output deterministic.
233 sort.Sort(stackProfile(all))
234
235 return printCountProfile(w, debug, p.name, stackProfile(all))
236 }
237
238 type stackProfile [][]uintptr
239
240 func (x stackProfile) Len() int { return len(x) }
241 func (x stackProfile) Stack(i int) []uintptr { return x[i] }
242 func (x stackProfile) Swap(i, j int) { x[i], x[j] = x[j], x[i] }
243 func (x stackProfile) Less(i, j int) bool {
244 t, u := x[i], x[j]
245 for k := 0; k < len(t) && k < len(u); k++ {
246 if t[k] != u[k] {
247 return t[k] < u[k]
248 }
249 }
250 return len(t) < len(u)
251 }
252
253 // A countProfile is a set of stack traces to be printed as counts
254 // grouped by stack trace. There are multiple implementations:
255 // all that matters is that we can find out how many traces there are
256 // and obtain each trace in turn.
257 type countProfile interface {
258 Len() int
259 Stack(i int) []uintptr
260 }
261
262 // printCountProfile prints a countProfile at the specified debug level.
263 func printCountProfile(w io.Writer, debug int, name string, p countProfile) error {
264 b := bufio.NewWriter(w)
265 var tw *tabwriter.Writer
266 w = b
267 if debug > 0 {
268 tw = tabwriter.NewWriter(w, 1, 8, 1, '\t', 0)
269 w = tw
270 }
271
272 fmt.Fprintf(w, "%s profile: total %d\n", name, p.Len())
273
274 // Build count of each stack.
275 var buf bytes.Buffer
276 key := func(stk []uintptr) string {
277 buf.Reset()
278 fmt.Fprintf(&buf, "@")
279 for _, pc := range stk {
280 fmt.Fprintf(&buf, " %#x", pc)
281 }
282 return buf.String()
283 }
284 m := map[string]int{}
285 n := p.Len()
286 for i := 0; i < n; i++ {
287 m[key(p.Stack(i))]++
288 }
289
290 // Print stacks, listing count on first occurrence of a unique stack.
291 for i := 0; i < n; i++ {
292 stk := p.Stack(i)
293 s := key(stk)
294 if count := m[s]; count != 0 {
295 fmt.Fprintf(w, "%d %s\n", count, s)
296 if debug > 0 {
297 printStackRecord(w, stk, false)
298 }
299 delete(m, s)
300 }
301 }
302
303 if tw != nil {
304 tw.Flush()
305 }
306 return b.Flush()
307 }
308
309 // printStackRecord prints the function + source line information
310 // for a single stack trace.
311 func printStackRecord(w io.Writer, stk []uintptr, allFrames bool) {
312 show := allFrames
313 for _, pc := range stk {
314 f := runtime.FuncForPC(pc)
315 if f == nil {
316 show = true
317 fmt.Fprintf(w, "#\t%#x\n", pc)
318 } else {
319 file, line := f.FileLine(pc)
320 name := f.Name()
321 // Hide runtime.goexit and any runtime functions at the beginning.
322 // This is useful mainly for allocation traces.
323 if name == "runtime.goexit" || !show && strings.HasPrefix(name, "runtime.") {
324 continue
325 }
326 show = true
327 fmt.Fprintf(w, "#\t%#x\t%s+%#x\t%s:%d\n", pc, f.Name(), pc-f.Entry(), file, line)
328 }
329 }
330 if !show {
331 // We didn't print anything; do it again,
332 // and this time include runtime functions.
333 printStackRecord(w, stk, true)
334 return
335 }
336 fmt.Fprintf(w, "\n")
337 }
338
339 // Interface to system profiles.
340
341 type byInUseBytes []runtime.MemProfileRecord
342
343 func (x byInUseBytes) Len() int { return len(x) }
344 func (x byInUseBytes) Swap(i, j int) { x[i], x[j] = x[j], x[i] }
345 func (x byInUseBytes) Less(i, j int) bool { return x[i].InUseBytes() > x[j].InUseBytes() }
346
347 // WriteHeapProfile is shorthand for Lookup("heap").WriteTo(w, 0).
348 // It is preserved for backwards compatibility.
349 func WriteHeapProfile(w io.Writer) error {
350 return writeHeap(w, 0)
351 }
352
353 // countHeap returns the number of records in the heap profile.
354 func countHeap() int {
355 n, _ := runtime.MemProfile(nil, false)
356 return n
357 }
358
359 // writeHeapProfile writes the current runtime heap profile to w.
360 func writeHeap(w io.Writer, debug int) error {
361 // Find out how many records there are (MemProfile(nil, false)),
362 // allocate that many records, and get the data.
363 // There's a race—more records might be added between
364 // the two calls—so allocate a few extra records for safety
365 // and also try again if we're very unlucky.
366 // The loop should only execute one iteration in the common case.
367 var p []runtime.MemProfileRecord
368 n, ok := runtime.MemProfile(nil, false)
369 for {
370 // Allocate room for a slightly bigger profile,
371 // in case a few more entries have been added
372 // since the call to MemProfile.
373 p = make([]runtime.MemProfileRecord, n+50)
374 n, ok = runtime.MemProfile(p, false)
375 if ok {
376 p = p[0:n]
377 break
378 }
379 // Profile grew; try again.
380 }
381
382 sort.Sort(byInUseBytes(p))
383
384 b := bufio.NewWriter(w)
385 var tw *tabwriter.Writer
386 w = b
387 if debug > 0 {
388 tw = tabwriter.NewWriter(w, 1, 8, 1, '\t', 0)
389 w = tw
390 }
391
392 var total runtime.MemProfileRecord
393 for i := range p {
394 r := &p[i]
395 total.AllocBytes += r.AllocBytes
396 total.AllocObjects += r.AllocObjects
397 total.FreeBytes += r.FreeBytes
398 total.FreeObjects += r.FreeObjects
399 }
400
401 // Technically the rate is MemProfileRate not 2*MemProfileRate,
402 // but early versions of the C++ heap profiler reported 2*MemProfileRate,
403 // so that's what pprof has come to expect.
404 fmt.Fprintf(w, "heap profile: %d: %d [%d: %d] @ heap/%d\n",
405 total.InUseObjects(), total.InUseBytes(),
406 total.AllocObjects, total.AllocBytes,
407 2*runtime.MemProfileRate)
408
409 for i := range p {
410 r := &p[i]
411 fmt.Fprintf(w, "%d: %d [%d: %d] @",
412 r.InUseObjects(), r.InUseBytes(),
413 r.AllocObjects, r.AllocBytes)
414 for _, pc := range r.Stack() {
415 fmt.Fprintf(w, " %#x", pc)
416 }
417 fmt.Fprintf(w, "\n")
418 if debug > 0 {
419 printStackRecord(w, r.Stack(), false)
420 }
421 }
422
423 // Print memstats information too.
424 // Pprof will ignore, but useful for people
425 if debug > 0 {
426 s := new(runtime.MemStats)
427 runtime.ReadMemStats(s)
428 fmt.Fprintf(w, "\n# runtime.MemStats\n")
429 fmt.Fprintf(w, "# Alloc = %d\n", s.Alloc)
430 fmt.Fprintf(w, "# TotalAlloc = %d\n", s.TotalAlloc)
431 fmt.Fprintf(w, "# Sys = %d\n", s.Sys)
432 fmt.Fprintf(w, "# Lookups = %d\n", s.Lookups)
433 fmt.Fprintf(w, "# Mallocs = %d\n", s.Mallocs)
434
435 fmt.Fprintf(w, "# HeapAlloc = %d\n", s.HeapAlloc)
436 fmt.Fprintf(w, "# HeapSys = %d\n", s.HeapSys)
437 fmt.Fprintf(w, "# HeapIdle = %d\n", s.HeapIdle)
438 fmt.Fprintf(w, "# HeapInuse = %d\n", s.HeapInuse)
439
440 fmt.Fprintf(w, "# Stack = %d / %d\n", s.StackInuse, s.StackSys)
441 fmt.Fprintf(w, "# MSpan = %d / %d\n", s.MSpanInuse, s.MSpanSys)
442 fmt.Fprintf(w, "# MCache = %d / %d\n", s.MCacheInuse, s.MCacheSys)
443 fmt.Fprintf(w, "# BuckHashSys = %d\n", s.BuckHashSys)
444
445 fmt.Fprintf(w, "# NextGC = %d\n", s.NextGC)
446 fmt.Fprintf(w, "# PauseNs = %d\n", s.PauseNs)
447 fmt.Fprintf(w, "# NumGC = %d\n", s.NumGC)
448 fmt.Fprintf(w, "# EnableGC = %v\n", s.EnableGC)
449 fmt.Fprintf(w, "# DebugGC = %v\n", s.DebugGC)
450 }
451
452 if tw != nil {
453 tw.Flush()
454 }
455 return b.Flush()
456 }
457
458 // countThreadCreate returns the size of the current ThreadCreateProfile.
459 func countThreadCreate() int {
460 n, _ := runtime.ThreadCreateProfile(nil)
461 return n
462 }
463
464 // writeThreadCreate writes the current runtime ThreadCreateProfile to w.
465 func writeThreadCreate(w io.Writer, debug int) error {
466 return writeRuntimeProfile(w, debug, "threadcreate", runtime.ThreadCreateProfile)
467 }
468
469 // countGoroutine returns the number of goroutines.
470 func countGoroutine() int {
471 return runtime.NumGoroutine()
472 }
473
474 // writeGoroutine writes the current runtime GoroutineProfile to w.
475 func writeGoroutine(w io.Writer, debug int) error {
476 if debug >= 2 {
477 return writeGoroutineStacks(w)
478 }
479 return writeRuntimeProfile(w, debug, "goroutine", runtime.GoroutineProfile)
480 }
481
482 func writeGoroutineStacks(w io.Writer) error {
483 // We don't know how big the buffer needs to be to collect
484 // all the goroutines. Start with 1 MB and try a few times, doubling each time.
485 // Give up and use a truncated trace if 64 MB is not enough.
486 buf := make([]byte, 1<<20)
487 for i := 0; ; i++ {
488 n := runtime.Stack(buf, true)
489 if n < len(buf) {
490 buf = buf[:n]
491 break
492 }
493 if len(buf) >= 64<<20 {
494 // Filled 64 MB - stop there.
495 break
496 }
497 buf = make([]byte, 2*len(buf))
498 }
499 _, err := w.Write(buf)
500 return err
501 }
502
503 func writeRuntimeProfile(w io.Writer, debug int, name string, fetch func([]runtime.StackRecord) (int, bool)) error {
504 // Find out how many records there are (fetch(nil)),
505 // allocate that many records, and get the data.
506 // There's a race—more records might be added between
507 // the two calls—so allocate a few extra records for safety
508 // and also try again if we're very unlucky.
509 // The loop should only execute one iteration in the common case.
510 var p []runtime.StackRecord
511 n, ok := fetch(nil)
512 for {
513 // Allocate room for a slightly bigger profile,
514 // in case a few more entries have been added
515 // since the call to ThreadProfile.
516 p = make([]runtime.StackRecord, n+10)
517 n, ok = fetch(p)
518 if ok {
519 p = p[0:n]
520 break
521 }
522 // Profile grew; try again.
523 }
524
525 return printCountProfile(w, debug, name, runtimeProfile(p))
526 }
527
528 type runtimeProfile []runtime.StackRecord
529
530 func (p runtimeProfile) Len() int { return len(p) }
531 func (p runtimeProfile) Stack(i int) []uintptr { return p[i].Stack() }
532
533 var cpu struct {
534 sync.Mutex
535 profiling bool
536 done chan bool
537 }
538
539 // StartCPUProfile enables CPU profiling for the current process.
540 // While profiling, the profile will be buffered and written to w.
541 // StartCPUProfile returns an error if profiling is already enabled.
542 func StartCPUProfile(w io.Writer) error {
543 // The runtime routines allow a variable profiling rate,
544 // but in practice operating systems cannot trigger signals
545 // at more than about 500 Hz, and our processing of the
546 // signal is not cheap (mostly getting the stack trace).
547 // 100 Hz is a reasonable choice: it is frequent enough to
548 // produce useful data, rare enough not to bog down the
549 // system, and a nice round number to make it easy to
550 // convert sample counts to seconds. Instead of requiring
551 // each client to specify the frequency, we hard code it.
552 const hz = 100
553
554 // Avoid queueing behind StopCPUProfile.
555 // Could use TryLock instead if we had it.
556 if cpu.profiling {
557 return fmt.Errorf("cpu profiling already in use")
558 }
559
560 cpu.Lock()
561 defer cpu.Unlock()
562 if cpu.done == nil {
563 cpu.done = make(chan bool)
564 }
565 // Double-check.
566 if cpu.profiling {
567 return fmt.Errorf("cpu profiling already in use")
568 }
569 cpu.profiling = true
570 runtime.SetCPUProfileRate(hz)
571 go profileWriter(w)
572 return nil
573 }
574
575 func profileWriter(w io.Writer) {
576 for {
577 data := runtime.CPUProfile()
578 if data == nil {
579 break
580 }
581 w.Write(data)
582 }
583 cpu.done <- true
584 }
585
586 // StopCPUProfile stops the current CPU profile, if any.
587 // StopCPUProfile only returns after all the writes for the
588 // profile have completed.
589 func StopCPUProfile() {
590 cpu.Lock()
591 defer cpu.Unlock()
592
593 if !cpu.profiling {
594 return
595 }
596 cpu.profiling = false
597 runtime.SetCPUProfileRate(0)
598 <-cpu.done
599 }