Source file src/pkg/strings/replace.go
1 // Copyright 2011 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package strings 6 7 import "io" 8 9 // A Replacer replaces a list of strings with replacements. 10 type Replacer struct { 11 r replacer 12 } 13 14 // replacer is the interface that a replacement algorithm needs to implement. 15 type replacer interface { 16 Replace(s string) string 17 WriteString(w io.Writer, s string) (n int, err error) 18 } 19 20 // byteBitmap represents bytes which are sought for replacement. 21 // byteBitmap is 256 bits wide, with a bit set for each old byte to be 22 // replaced. 23 type byteBitmap [256 / 32]uint32 24 25 func (m *byteBitmap) set(b byte) { 26 m[b>>5] |= uint32(1 << (b & 31)) 27 } 28 29 // NewReplacer returns a new Replacer from a list of old, new string pairs. 30 // Replacements are performed in order, without overlapping matches. 31 func NewReplacer(oldnew ...string) *Replacer { 32 if len(oldnew)%2 == 1 { 33 panic("strings.NewReplacer: odd argument count") 34 } 35 36 // Possible implementations. 37 var ( 38 bb byteReplacer 39 bs byteStringReplacer 40 gen genericReplacer 41 ) 42 43 allOldBytes, allNewBytes := true, true 44 for len(oldnew) > 0 { 45 old, new := oldnew[0], oldnew[1] 46 oldnew = oldnew[2:] 47 if len(old) != 1 { 48 allOldBytes = false 49 } 50 if len(new) != 1 { 51 allNewBytes = false 52 } 53 54 // generic 55 gen.p = append(gen.p, pair{old, new}) 56 57 // byte -> string 58 if allOldBytes { 59 bs.old.set(old[0]) 60 bs.new[old[0]] = []byte(new) 61 } 62 63 // byte -> byte 64 if allOldBytes && allNewBytes { 65 bb.old.set(old[0]) 66 bb.new[old[0]] = new[0] 67 } 68 } 69 70 if allOldBytes && allNewBytes { 71 return &Replacer{r: &bb} 72 } 73 if allOldBytes { 74 return &Replacer{r: &bs} 75 } 76 return &Replacer{r: &gen} 77 } 78 79 // Replace returns a copy of s with all replacements performed. 80 func (r *Replacer) Replace(s string) string { 81 return r.r.Replace(s) 82 } 83 84 // WriteString writes s to w with all replacements performed. 85 func (r *Replacer) WriteString(w io.Writer, s string) (n int, err error) { 86 return r.r.WriteString(w, s) 87 } 88 89 // genericReplacer is the fully generic (and least optimized) algorithm. 90 // It's used as a fallback when nothing faster can be used. 91 type genericReplacer struct { 92 p []pair 93 } 94 95 type pair struct{ old, new string } 96 97 type appendSliceWriter struct { 98 b []byte 99 } 100 101 func (w *appendSliceWriter) Write(p []byte) (int, error) { 102 w.b = append(w.b, p...) 103 return len(p), nil 104 } 105 106 func (r *genericReplacer) Replace(s string) string { 107 // TODO(bradfitz): optimized version 108 n, _ := r.WriteString(discard, s) 109 w := appendSliceWriter{make([]byte, 0, n)} 110 r.WriteString(&w, s) 111 return string(w.b) 112 } 113 114 func (r *genericReplacer) WriteString(w io.Writer, s string) (n int, err error) { 115 lastEmpty := false // the last replacement was of the empty string 116 Input: 117 // TODO(bradfitz): optimized version 118 for i := 0; i < len(s); { 119 for _, p := range r.p { 120 if p.old == "" && lastEmpty { 121 // Don't let old match twice in a row. 122 // (it doesn't advance the input and 123 // would otherwise loop forever) 124 continue 125 } 126 if HasPrefix(s[i:], p.old) { 127 if p.new != "" { 128 wn, err := w.Write([]byte(p.new)) 129 n += wn 130 if err != nil { 131 return n, err 132 } 133 } 134 i += len(p.old) 135 lastEmpty = p.old == "" 136 continue Input 137 } 138 } 139 wn, err := w.Write([]byte{s[i]}) 140 n += wn 141 if err != nil { 142 return n, err 143 } 144 i++ 145 } 146 147 // Final empty match at end. 148 for _, p := range r.p { 149 if p.old == "" { 150 if p.new != "" { 151 wn, err := w.Write([]byte(p.new)) 152 n += wn 153 if err != nil { 154 return n, err 155 } 156 } 157 break 158 } 159 } 160 161 return n, nil 162 } 163 164 // byteReplacer is the implementation that's used when all the "old" 165 // and "new" values are single ASCII bytes. 166 type byteReplacer struct { 167 // old has a bit set for each old byte that should be replaced. 168 old byteBitmap 169 170 // replacement byte, indexed by old byte. only valid if 171 // corresponding old bit is set. 172 new [256]byte 173 } 174 175 func (r *byteReplacer) Replace(s string) string { 176 var buf []byte // lazily allocated 177 for i := 0; i < len(s); i++ { 178 b := s[i] 179 if r.old[b>>5]&uint32(1<<(b&31)) != 0 { 180 if buf == nil { 181 buf = []byte(s) 182 } 183 buf[i] = r.new[b] 184 } 185 } 186 if buf == nil { 187 return s 188 } 189 return string(buf) 190 } 191 192 func (r *byteReplacer) WriteString(w io.Writer, s string) (n int, err error) { 193 // TODO(bradfitz): use io.WriteString with slices of s, avoiding allocation. 194 bufsize := 32 << 10 195 if len(s) < bufsize { 196 bufsize = len(s) 197 } 198 buf := make([]byte, bufsize) 199 200 for len(s) > 0 { 201 ncopy := copy(buf, s[:]) 202 s = s[ncopy:] 203 for i, b := range buf[:ncopy] { 204 if r.old[b>>5]&uint32(1<<(b&31)) != 0 { 205 buf[i] = r.new[b] 206 } 207 } 208 wn, err := w.Write(buf[:ncopy]) 209 n += wn 210 if err != nil { 211 return n, err 212 } 213 } 214 return n, nil 215 } 216 217 // byteStringReplacer is the implementation that's used when all the 218 // "old" values are single ASCII bytes but the "new" values vary in 219 // size. 220 type byteStringReplacer struct { 221 // old has a bit set for each old byte that should be replaced. 222 old byteBitmap 223 224 // replacement string, indexed by old byte. only valid if 225 // corresponding old bit is set. 226 new [256][]byte 227 } 228 229 func (r *byteStringReplacer) Replace(s string) string { 230 newSize := 0 231 anyChanges := false 232 for i := 0; i < len(s); i++ { 233 b := s[i] 234 if r.old[b>>5]&uint32(1<<(b&31)) != 0 { 235 anyChanges = true 236 newSize += len(r.new[b]) 237 } else { 238 newSize++ 239 } 240 } 241 if !anyChanges { 242 return s 243 } 244 buf := make([]byte, newSize) 245 bi := buf 246 for i := 0; i < len(s); i++ { 247 b := s[i] 248 if r.old[b>>5]&uint32(1<<(b&31)) != 0 { 249 n := copy(bi[:], r.new[b]) 250 bi = bi[n:] 251 } else { 252 bi[0] = b 253 bi = bi[1:] 254 } 255 } 256 return string(buf) 257 } 258 259 // WriteString maintains one buffer that's at most 32KB. The bytes in 260 // s are enumerated and the buffer is filled. If it reaches its 261 // capacity or a byte has a replacement, the buffer is flushed to w. 262 func (r *byteStringReplacer) WriteString(w io.Writer, s string) (n int, err error) { 263 // TODO(bradfitz): use io.WriteString with slices of s instead. 264 bufsize := 32 << 10 265 if len(s) < bufsize { 266 bufsize = len(s) 267 } 268 buf := make([]byte, bufsize) 269 bi := buf[:0] 270 271 for i := 0; i < len(s); i++ { 272 b := s[i] 273 var new []byte 274 if r.old[b>>5]&uint32(1<<(b&31)) != 0 { 275 new = r.new[b] 276 } else { 277 bi = append(bi, b) 278 } 279 if len(bi) == cap(bi) || (len(bi) > 0 && len(new) > 0) { 280 nw, err := w.Write(bi) 281 n += nw 282 if err != nil { 283 return n, err 284 } 285 bi = buf[:0] 286 } 287 if len(new) > 0 { 288 nw, err := w.Write(new) 289 n += nw 290 if err != nil { 291 return n, err 292 } 293 } 294 } 295 if len(bi) > 0 { 296 nw, err := w.Write(bi) 297 n += nw 298 if err != nil { 299 return n, err 300 } 301 } 302 return n, nil 303 } 304 305 // strings is too low-level to import io/ioutil 306 var discard io.Writer = devNull(0) 307 308 type devNull int 309 310 func (devNull) Write(p []byte) (int, error) { 311 return len(p), nil 312 }