Source file src/pkg/strings/replace.go
1 // Copyright 2011 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4
5 package strings
6
7 import "io"
8
9 // A Replacer replaces a list of strings with replacements.
10 type Replacer struct {
11 r replacer
12 }
13
14 // replacer is the interface that a replacement algorithm needs to implement.
15 type replacer interface {
16 Replace(s string) string
17 WriteString(w io.Writer, s string) (n int, err error)
18 }
19
20 // byteBitmap represents bytes which are sought for replacement.
21 // byteBitmap is 256 bits wide, with a bit set for each old byte to be
22 // replaced.
23 type byteBitmap [256 / 32]uint32
24
25 func (m *byteBitmap) set(b byte) {
26 m[b>>5] |= uint32(1 << (b & 31))
27 }
28
29 // NewReplacer returns a new Replacer from a list of old, new string pairs.
30 // Replacements are performed in order, without overlapping matches.
31 func NewReplacer(oldnew ...string) *Replacer {
32 if len(oldnew)%2 == 1 {
33 panic("strings.NewReplacer: odd argument count")
34 }
35
36 // Possible implementations.
37 var (
38 bb byteReplacer
39 bs byteStringReplacer
40 gen genericReplacer
41 )
42
43 allOldBytes, allNewBytes := true, true
44 for len(oldnew) > 0 {
45 old, new := oldnew[0], oldnew[1]
46 oldnew = oldnew[2:]
47 if len(old) != 1 {
48 allOldBytes = false
49 }
50 if len(new) != 1 {
51 allNewBytes = false
52 }
53
54 // generic
55 gen.p = append(gen.p, pair{old, new})
56
57 // byte -> string
58 if allOldBytes {
59 bs.old.set(old[0])
60 bs.new[old[0]] = []byte(new)
61 }
62
63 // byte -> byte
64 if allOldBytes && allNewBytes {
65 bb.old.set(old[0])
66 bb.new[old[0]] = new[0]
67 }
68 }
69
70 if allOldBytes && allNewBytes {
71 return &Replacer{r: &bb}
72 }
73 if allOldBytes {
74 return &Replacer{r: &bs}
75 }
76 return &Replacer{r: &gen}
77 }
78
79 // Replace returns a copy of s with all replacements performed.
80 func (r *Replacer) Replace(s string) string {
81 return r.r.Replace(s)
82 }
83
84 // WriteString writes s to w with all replacements performed.
85 func (r *Replacer) WriteString(w io.Writer, s string) (n int, err error) {
86 return r.r.WriteString(w, s)
87 }
88
89 // genericReplacer is the fully generic (and least optimized) algorithm.
90 // It's used as a fallback when nothing faster can be used.
91 type genericReplacer struct {
92 p []pair
93 }
94
95 type pair struct{ old, new string }
96
97 type appendSliceWriter struct {
98 b []byte
99 }
100
101 func (w *appendSliceWriter) Write(p []byte) (int, error) {
102 w.b = append(w.b, p...)
103 return len(p), nil
104 }
105
106 func (r *genericReplacer) Replace(s string) string {
107 // TODO(bradfitz): optimized version
108 n, _ := r.WriteString(discard, s)
109 w := appendSliceWriter{make([]byte, 0, n)}
110 r.WriteString(&w, s)
111 return string(w.b)
112 }
113
114 func (r *genericReplacer) WriteString(w io.Writer, s string) (n int, err error) {
115 lastEmpty := false // the last replacement was of the empty string
116 Input:
117 // TODO(bradfitz): optimized version
118 for i := 0; i < len(s); {
119 for _, p := range r.p {
120 if p.old == "" && lastEmpty {
121 // Don't let old match twice in a row.
122 // (it doesn't advance the input and
123 // would otherwise loop forever)
124 continue
125 }
126 if HasPrefix(s[i:], p.old) {
127 if p.new != "" {
128 wn, err := w.Write([]byte(p.new))
129 n += wn
130 if err != nil {
131 return n, err
132 }
133 }
134 i += len(p.old)
135 lastEmpty = p.old == ""
136 continue Input
137 }
138 }
139 wn, err := w.Write([]byte{s[i]})
140 n += wn
141 if err != nil {
142 return n, err
143 }
144 i++
145 }
146
147 // Final empty match at end.
148 for _, p := range r.p {
149 if p.old == "" {
150 if p.new != "" {
151 wn, err := w.Write([]byte(p.new))
152 n += wn
153 if err != nil {
154 return n, err
155 }
156 }
157 break
158 }
159 }
160
161 return n, nil
162 }
163
164 // byteReplacer is the implementation that's used when all the "old"
165 // and "new" values are single ASCII bytes.
166 type byteReplacer struct {
167 // old has a bit set for each old byte that should be replaced.
168 old byteBitmap
169
170 // replacement byte, indexed by old byte. only valid if
171 // corresponding old bit is set.
172 new [256]byte
173 }
174
175 func (r *byteReplacer) Replace(s string) string {
176 var buf []byte // lazily allocated
177 for i := 0; i < len(s); i++ {
178 b := s[i]
179 if r.old[b>>5]&uint32(1<<(b&31)) != 0 {
180 if buf == nil {
181 buf = []byte(s)
182 }
183 buf[i] = r.new[b]
184 }
185 }
186 if buf == nil {
187 return s
188 }
189 return string(buf)
190 }
191
192 func (r *byteReplacer) WriteString(w io.Writer, s string) (n int, err error) {
193 // TODO(bradfitz): use io.WriteString with slices of s, avoiding allocation.
194 bufsize := 32 << 10
195 if len(s) < bufsize {
196 bufsize = len(s)
197 }
198 buf := make([]byte, bufsize)
199
200 for len(s) > 0 {
201 ncopy := copy(buf, s[:])
202 s = s[ncopy:]
203 for i, b := range buf[:ncopy] {
204 if r.old[b>>5]&uint32(1<<(b&31)) != 0 {
205 buf[i] = r.new[b]
206 }
207 }
208 wn, err := w.Write(buf[:ncopy])
209 n += wn
210 if err != nil {
211 return n, err
212 }
213 }
214 return n, nil
215 }
216
217 // byteStringReplacer is the implementation that's used when all the
218 // "old" values are single ASCII bytes but the "new" values vary in
219 // size.
220 type byteStringReplacer struct {
221 // old has a bit set for each old byte that should be replaced.
222 old byteBitmap
223
224 // replacement string, indexed by old byte. only valid if
225 // corresponding old bit is set.
226 new [256][]byte
227 }
228
229 func (r *byteStringReplacer) Replace(s string) string {
230 newSize := 0
231 anyChanges := false
232 for i := 0; i < len(s); i++ {
233 b := s[i]
234 if r.old[b>>5]&uint32(1<<(b&31)) != 0 {
235 anyChanges = true
236 newSize += len(r.new[b])
237 } else {
238 newSize++
239 }
240 }
241 if !anyChanges {
242 return s
243 }
244 buf := make([]byte, newSize)
245 bi := buf
246 for i := 0; i < len(s); i++ {
247 b := s[i]
248 if r.old[b>>5]&uint32(1<<(b&31)) != 0 {
249 n := copy(bi[:], r.new[b])
250 bi = bi[n:]
251 } else {
252 bi[0] = b
253 bi = bi[1:]
254 }
255 }
256 return string(buf)
257 }
258
259 // WriteString maintains one buffer that's at most 32KB. The bytes in
260 // s are enumerated and the buffer is filled. If it reaches its
261 // capacity or a byte has a replacement, the buffer is flushed to w.
262 func (r *byteStringReplacer) WriteString(w io.Writer, s string) (n int, err error) {
263 // TODO(bradfitz): use io.WriteString with slices of s instead.
264 bufsize := 32 << 10
265 if len(s) < bufsize {
266 bufsize = len(s)
267 }
268 buf := make([]byte, bufsize)
269 bi := buf[:0]
270
271 for i := 0; i < len(s); i++ {
272 b := s[i]
273 var new []byte
274 if r.old[b>>5]&uint32(1<<(b&31)) != 0 {
275 new = r.new[b]
276 } else {
277 bi = append(bi, b)
278 }
279 if len(bi) == cap(bi) || (len(bi) > 0 && len(new) > 0) {
280 nw, err := w.Write(bi)
281 n += nw
282 if err != nil {
283 return n, err
284 }
285 bi = buf[:0]
286 }
287 if len(new) > 0 {
288 nw, err := w.Write(new)
289 n += nw
290 if err != nil {
291 return n, err
292 }
293 }
294 }
295 if len(bi) > 0 {
296 nw, err := w.Write(bi)
297 n += nw
298 if err != nil {
299 return n, err
300 }
301 }
302 return n, nil
303 }
304
305 // strings is too low-level to import io/ioutil
306 var discard io.Writer = devNull(0)
307
308 type devNull int
309
310 func (devNull) Write(p []byte) (int, error) {
311 return len(p), nil
312 }