src/pkg/strings/replace.go - The Go Programming Language

Golang

Source file src/pkg/strings/replace.go

     1	// Copyright 2011 The Go Authors. All rights reserved.
     2	// Use of this source code is governed by a BSD-style
     3	// license that can be found in the LICENSE file.
     4	
     5	package strings
     6	
     7	import "io"
     8	
     9	// A Replacer replaces a list of strings with replacements.
    10	type Replacer struct {
    11		r replacer
    12	}
    13	
    14	// replacer is the interface that a replacement algorithm needs to implement.
    15	type replacer interface {
    16		Replace(s string) string
    17		WriteString(w io.Writer, s string) (n int, err error)
    18	}
    19	
    20	// byteBitmap represents bytes which are sought for replacement.
    21	// byteBitmap is 256 bits wide, with a bit set for each old byte to be
    22	// replaced.
    23	type byteBitmap [256 / 32]uint32
    24	
    25	func (m *byteBitmap) set(b byte) {
    26		m[b>>5] |= uint32(1 << (b & 31))
    27	}
    28	
    29	// NewReplacer returns a new Replacer from a list of old, new string pairs.
    30	// Replacements are performed in order, without overlapping matches.
    31	func NewReplacer(oldnew ...string) *Replacer {
    32		if len(oldnew)%2 == 1 {
    33			panic("strings.NewReplacer: odd argument count")
    34		}
    35	
    36		// Possible implementations.
    37		var (
    38			bb  byteReplacer
    39			bs  byteStringReplacer
    40			gen genericReplacer
    41		)
    42	
    43		allOldBytes, allNewBytes := true, true
    44		for len(oldnew) > 0 {
    45			old, new := oldnew[0], oldnew[1]
    46			oldnew = oldnew[2:]
    47			if len(old) != 1 {
    48				allOldBytes = false
    49			}
    50			if len(new) != 1 {
    51				allNewBytes = false
    52			}
    53	
    54			// generic
    55			gen.p = append(gen.p, pair{old, new})
    56	
    57			// byte -> string
    58			if allOldBytes {
    59				bs.old.set(old[0])
    60				bs.new[old[0]] = []byte(new)
    61			}
    62	
    63			// byte -> byte
    64			if allOldBytes && allNewBytes {
    65				bb.old.set(old[0])
    66				bb.new[old[0]] = new[0]
    67			}
    68		}
    69	
    70		if allOldBytes && allNewBytes {
    71			return &Replacer{r: &bb}
    72		}
    73		if allOldBytes {
    74			return &Replacer{r: &bs}
    75		}
    76		return &Replacer{r: &gen}
    77	}
    78	
    79	// Replace returns a copy of s with all replacements performed.
    80	func (r *Replacer) Replace(s string) string {
    81		return r.r.Replace(s)
    82	}
    83	
    84	// WriteString writes s to w with all replacements performed.
    85	func (r *Replacer) WriteString(w io.Writer, s string) (n int, err error) {
    86		return r.r.WriteString(w, s)
    87	}
    88	
    89	// genericReplacer is the fully generic (and least optimized) algorithm.
    90	// It's used as a fallback when nothing faster can be used.
    91	type genericReplacer struct {
    92		p []pair
    93	}
    94	
    95	type pair struct{ old, new string }
    96	
    97	type appendSliceWriter struct {
    98		b []byte
    99	}
   100	
   101	func (w *appendSliceWriter) Write(p []byte) (int, error) {
   102		w.b = append(w.b, p...)
   103		return len(p), nil
   104	}
   105	
   106	func (r *genericReplacer) Replace(s string) string {
   107		// TODO(bradfitz): optimized version
   108		n, _ := r.WriteString(discard, s)
   109		w := appendSliceWriter{make([]byte, 0, n)}
   110		r.WriteString(&w, s)
   111		return string(w.b)
   112	}
   113	
   114	func (r *genericReplacer) WriteString(w io.Writer, s string) (n int, err error) {
   115		lastEmpty := false // the last replacement was of the empty string
   116	Input:
   117		// TODO(bradfitz): optimized version
   118		for i := 0; i < len(s); {
   119			for _, p := range r.p {
   120				if p.old == "" && lastEmpty {
   121					// Don't let old match twice in a row.
   122					// (it doesn't advance the input and
   123					// would otherwise loop forever)
   124					continue
   125				}
   126				if HasPrefix(s[i:], p.old) {
   127					if p.new != "" {
   128						wn, err := w.Write([]byte(p.new))
   129						n += wn
   130						if err != nil {
   131							return n, err
   132						}
   133					}
   134					i += len(p.old)
   135					lastEmpty = p.old == ""
   136					continue Input
   137				}
   138			}
   139			wn, err := w.Write([]byte{s[i]})
   140			n += wn
   141			if err != nil {
   142				return n, err
   143			}
   144			i++
   145		}
   146	
   147		// Final empty match at end.
   148		for _, p := range r.p {
   149			if p.old == "" {
   150				if p.new != "" {
   151					wn, err := w.Write([]byte(p.new))
   152					n += wn
   153					if err != nil {
   154						return n, err
   155					}
   156				}
   157				break
   158			}
   159		}
   160	
   161		return n, nil
   162	}
   163	
   164	// byteReplacer is the implementation that's used when all the "old"
   165	// and "new" values are single ASCII bytes.
   166	type byteReplacer struct {
   167		// old has a bit set for each old byte that should be replaced.
   168		old byteBitmap
   169	
   170		// replacement byte, indexed by old byte. only valid if
   171		// corresponding old bit is set.
   172		new [256]byte
   173	}
   174	
   175	func (r *byteReplacer) Replace(s string) string {
   176		var buf []byte // lazily allocated
   177		for i := 0; i < len(s); i++ {
   178			b := s[i]
   179			if r.old[b>>5]&uint32(1<<(b&31)) != 0 {
   180				if buf == nil {
   181					buf = []byte(s)
   182				}
   183				buf[i] = r.new[b]
   184			}
   185		}
   186		if buf == nil {
   187			return s
   188		}
   189		return string(buf)
   190	}
   191	
   192	func (r *byteReplacer) WriteString(w io.Writer, s string) (n int, err error) {
   193		// TODO(bradfitz): use io.WriteString with slices of s, avoiding allocation.
   194		bufsize := 32 << 10
   195		if len(s) < bufsize {
   196			bufsize = len(s)
   197		}
   198		buf := make([]byte, bufsize)
   199	
   200		for len(s) > 0 {
   201			ncopy := copy(buf, s[:])
   202			s = s[ncopy:]
   203			for i, b := range buf[:ncopy] {
   204				if r.old[b>>5]&uint32(1<<(b&31)) != 0 {
   205					buf[i] = r.new[b]
   206				}
   207			}
   208			wn, err := w.Write(buf[:ncopy])
   209			n += wn
   210			if err != nil {
   211				return n, err
   212			}
   213		}
   214		return n, nil
   215	}
   216	
   217	// byteStringReplacer is the implementation that's used when all the
   218	// "old" values are single ASCII bytes but the "new" values vary in
   219	// size.
   220	type byteStringReplacer struct {
   221		// old has a bit set for each old byte that should be replaced.
   222		old byteBitmap
   223	
   224		// replacement string, indexed by old byte. only valid if
   225		// corresponding old bit is set.
   226		new [256][]byte
   227	}
   228	
   229	func (r *byteStringReplacer) Replace(s string) string {
   230		newSize := 0
   231		anyChanges := false
   232		for i := 0; i < len(s); i++ {
   233			b := s[i]
   234			if r.old[b>>5]&uint32(1<<(b&31)) != 0 {
   235				anyChanges = true
   236				newSize += len(r.new[b])
   237			} else {
   238				newSize++
   239			}
   240		}
   241		if !anyChanges {
   242			return s
   243		}
   244		buf := make([]byte, newSize)
   245		bi := buf
   246		for i := 0; i < len(s); i++ {
   247			b := s[i]
   248			if r.old[b>>5]&uint32(1<<(b&31)) != 0 {
   249				n := copy(bi[:], r.new[b])
   250				bi = bi[n:]
   251			} else {
   252				bi[0] = b
   253				bi = bi[1:]
   254			}
   255		}
   256		return string(buf)
   257	}
   258	
   259	// WriteString maintains one buffer that's at most 32KB.  The bytes in
   260	// s are enumerated and the buffer is filled.  If it reaches its
   261	// capacity or a byte has a replacement, the buffer is flushed to w.
   262	func (r *byteStringReplacer) WriteString(w io.Writer, s string) (n int, err error) {
   263		// TODO(bradfitz): use io.WriteString with slices of s instead.
   264		bufsize := 32 << 10
   265		if len(s) < bufsize {
   266			bufsize = len(s)
   267		}
   268		buf := make([]byte, bufsize)
   269		bi := buf[:0]
   270	
   271		for i := 0; i < len(s); i++ {
   272			b := s[i]
   273			var new []byte
   274			if r.old[b>>5]&uint32(1<<(b&31)) != 0 {
   275				new = r.new[b]
   276			} else {
   277				bi = append(bi, b)
   278			}
   279			if len(bi) == cap(bi) || (len(bi) > 0 && len(new) > 0) {
   280				nw, err := w.Write(bi)
   281				n += nw
   282				if err != nil {
   283					return n, err
   284				}
   285				bi = buf[:0]
   286			}
   287			if len(new) > 0 {
   288				nw, err := w.Write(new)
   289				n += nw
   290				if err != nil {
   291					return n, err
   292				}
   293			}
   294		}
   295		if len(bi) > 0 {
   296			nw, err := w.Write(bi)
   297			n += nw
   298			if err != nil {
   299				return n, err
   300			}
   301		}
   302		return n, nil
   303	}
   304	
   305	// strings is too low-level to import io/ioutil
   306	var discard io.Writer = devNull(0)
   307	
   308	type devNull int
   309	
   310	func (devNull) Write(p []byte) (int, error) {
   311		return len(p), nil
   312	}