src/pkg/net/http/sniff.go - The Go Programming Language

Golang

Source file src/pkg/net/http/sniff.go

     1	// Copyright 2011 The Go Authors.  All rights reserved.
     2	// Use of this source code is governed by a BSD-style
     3	// license that can be found in the LICENSE file.
     4	
     5	package http
     6	
     7	import (
     8		"bytes"
     9		"encoding/binary"
    10	)
    11	
    12	// The algorithm uses at most sniffLen bytes to make its decision.
    13	const sniffLen = 512
    14	
    15	// DetectContentType implements the algorithm described
    16	// at http://mimesniff.spec.whatwg.org/ to determine the
    17	// Content-Type of the given data.  It considers at most the
    18	// first 512 bytes of data.  DetectContentType always returns
    19	// a valid MIME type: if it cannot determine a more specific one, it
    20	// returns "application/octet-stream".
    21	func DetectContentType(data []byte) string {
    22		if len(data) > sniffLen {
    23			data = data[:sniffLen]
    24		}
    25	
    26		// Index of the first non-whitespace byte in data.
    27		firstNonWS := 0
    28		for ; firstNonWS < len(data) && isWS(data[firstNonWS]); firstNonWS++ {
    29		}
    30	
    31		for _, sig := range sniffSignatures {
    32			if ct := sig.match(data, firstNonWS); ct != "" {
    33				return ct
    34			}
    35		}
    36	
    37		return "application/octet-stream" // fallback
    38	}
    39	
    40	func isWS(b byte) bool {
    41		return bytes.IndexByte([]byte("\t\n\x0C\r "), b) != -1
    42	}
    43	
    44	type sniffSig interface {
    45		// match returns the MIME type of the data, or "" if unknown.
    46		match(data []byte, firstNonWS int) string
    47	}
    48	
    49	// Data matching the table in section 6.
    50	var sniffSignatures = []sniffSig{
    51		htmlSig("<!DOCTYPE HTML"),
    52		htmlSig("<HTML"),
    53		htmlSig("<HEAD"),
    54		htmlSig("<SCRIPT"),
    55		htmlSig("<IFRAME"),
    56		htmlSig("<H1"),
    57		htmlSig("<DIV"),
    58		htmlSig("<FONT"),
    59		htmlSig("<TABLE"),
    60		htmlSig("<A"),
    61		htmlSig("<STYLE"),
    62		htmlSig("<TITLE"),
    63		htmlSig("<B"),
    64		htmlSig("<BODY"),
    65		htmlSig("<BR"),
    66		htmlSig("<P"),
    67		htmlSig("<!--"),
    68	
    69		&maskedSig{mask: []byte("\xFF\xFF\xFF\xFF\xFF"), pat: []byte("<?xml"), skipWS: true, ct: "text/xml; charset=utf-8"},
    70	
    71		&exactSig{[]byte("%PDF-"), "application/pdf"},
    72		&exactSig{[]byte("%!PS-Adobe-"), "application/postscript"},
    73	
    74		// UTF BOMs.
    75		&maskedSig{mask: []byte("\xFF\xFF\x00\x00"), pat: []byte("\xFE\xFF\x00\x00"), ct: "text/plain; charset=utf-16be"},
    76		&maskedSig{mask: []byte("\xFF\xFF\x00\x00"), pat: []byte("\xFF\xFE\x00\x00"), ct: "text/plain; charset=utf-16le"},
    77		&maskedSig{mask: []byte("\xFF\xFF\xFF\x00"), pat: []byte("\xEF\xBB\xBF\x00"), ct: "text/plain; charset=utf-8"},
    78	
    79		&exactSig{[]byte("GIF87a"), "image/gif"},
    80		&exactSig{[]byte("GIF89a"), "image/gif"},
    81		&exactSig{[]byte("\x89\x50\x4E\x47\x0D\x0A\x1A\x0A"), "image/png"},
    82		&exactSig{[]byte("\xFF\xD8\xFF"), "image/jpeg"},
    83		&exactSig{[]byte("BM"), "image/bmp"},
    84		&maskedSig{
    85			mask: []byte("\xFF\xFF\xFF\xFF\x00\x00\x00\x00\xFF\xFF\xFF\xFF\xFF\xFF"),
    86			pat:  []byte("RIFF\x00\x00\x00\x00WEBPVP"),
    87			ct:   "image/webp",
    88		},
    89		&exactSig{[]byte("\x00\x00\x01\x00"), "image/vnd.microsoft.icon"},
    90		&exactSig{[]byte("\x4F\x67\x67\x53\x00"), "application/ogg"},
    91		&maskedSig{
    92			mask: []byte("\xFF\xFF\xFF\xFF\x00\x00\x00\x00\xFF\xFF\xFF\xFF"),
    93			pat:  []byte("RIFF\x00\x00\x00\x00WAVE"),
    94			ct:   "audio/wave",
    95		},
    96		&exactSig{[]byte("\x1A\x45\xDF\xA3"), "video/webm"},
    97		&exactSig{[]byte("\x52\x61\x72\x20\x1A\x07\x00"), "application/x-rar-compressed"},
    98		&exactSig{[]byte("\x50\x4B\x03\x04"), "application/zip"},
    99		&exactSig{[]byte("\x1F\x8B\x08"), "application/x-gzip"},
   100	
   101		// TODO(dsymonds): Re-enable this when the spec is sorted w.r.t. MP4.
   102		//mp4Sig(0),
   103	
   104		textSig(0), // should be last
   105	}
   106	
   107	type exactSig struct {
   108		sig []byte
   109		ct  string
   110	}
   111	
   112	func (e *exactSig) match(data []byte, firstNonWS int) string {
   113		if bytes.HasPrefix(data, e.sig) {
   114			return e.ct
   115		}
   116		return ""
   117	}
   118	
   119	type maskedSig struct {
   120		mask, pat []byte
   121		skipWS    bool
   122		ct        string
   123	}
   124	
   125	func (m *maskedSig) match(data []byte, firstNonWS int) string {
   126		if m.skipWS {
   127			data = data[firstNonWS:]
   128		}
   129		if len(data) < len(m.mask) {
   130			return ""
   131		}
   132		for i, mask := range m.mask {
   133			db := data[i] & mask
   134			if db != m.pat[i] {
   135				return ""
   136			}
   137		}
   138		return m.ct
   139	}
   140	
   141	type htmlSig []byte
   142	
   143	func (h htmlSig) match(data []byte, firstNonWS int) string {
   144		data = data[firstNonWS:]
   145		if len(data) < len(h)+1 {
   146			return ""
   147		}
   148		for i, b := range h {
   149			db := data[i]
   150			if 'A' <= b && b <= 'Z' {
   151				db &= 0xDF
   152			}
   153			if b != db {
   154				return ""
   155			}
   156		}
   157		// Next byte must be space or right angle bracket.
   158		if db := data[len(h)]; db != ' ' && db != '>' {
   159			return ""
   160		}
   161		return "text/html; charset=utf-8"
   162	}
   163	
   164	type mp4Sig int
   165	
   166	func (mp4Sig) match(data []byte, firstNonWS int) string {
   167		// c.f. section 6.1.
   168		if len(data) < 8 {
   169			return ""
   170		}
   171		boxSize := int(binary.BigEndian.Uint32(data[:4]))
   172		if boxSize%4 != 0 || len(data) < boxSize {
   173			return ""
   174		}
   175		if !bytes.Equal(data[4:8], []byte("ftyp")) {
   176			return ""
   177		}
   178		for st := 8; st < boxSize; st += 4 {
   179			if st == 12 {
   180				// minor version number
   181				continue
   182			}
   183			seg := string(data[st : st+3])
   184			switch seg {
   185			case "mp4", "iso", "M4V", "M4P", "M4B":
   186				return "video/mp4"
   187				/* The remainder are not in the spec.
   188				case "M4A":
   189					return "audio/mp4"
   190				case "3gp":
   191					return "video/3gpp"
   192				case "jp2":
   193					return "image/jp2" // JPEG 2000
   194				*/
   195			}
   196		}
   197		return ""
   198	}
   199	
   200	type textSig int
   201	
   202	func (textSig) match(data []byte, firstNonWS int) string {
   203		// c.f. section 5, step 4.
   204		for _, b := range data[firstNonWS:] {
   205			switch {
   206			case 0x00 <= b && b <= 0x08,
   207				b == 0x0B,
   208				0x0E <= b && b <= 0x1A,
   209				0x1C <= b && b <= 0x1F:
   210				return ""
   211			}
   212		}
   213		return "text/plain; charset=utf-8"
   214	}