📦 EqualifyEverything / dockerized-wappalyzer

📄 fingerprint_body.go · 157 lines
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157package wappalyzer

import (
	"bytes"
	"unsafe"

	"golang.org/x/net/html"
)

// checkBody checks for fingerprints in the HTML body
func (s *Wappalyze) checkBody(body []byte) []string {
	var technologies []string

	bodyString := unsafeToString(body)

	technologies = append(
		technologies,
		s.fingerprints.matchString(bodyString, htmlPart)...,
	)

	// Tokenize the HTML document and check for fingerprints as required
	tokenizer := html.NewTokenizer(bytes.NewReader(body))

	for {
		tt := tokenizer.Next()
		switch tt {
		case html.ErrorToken:
			return technologies
		case html.StartTagToken:
			token := tokenizer.Token()
			switch token.Data {
			case "script":
				// Check if the script tag has a source file to check
				source, found := getScriptSource(token)
				if found {
					// Check the script tags for script fingerprints
					technologies = append(
						technologies,
						s.fingerprints.matchString(source, scriptPart)...,
					)
					continue
				}

				// Check the text attribute of the tag for javascript based technologies.
				// The next token should be the contents of the script tag
				if tokenType := tokenizer.Next(); tokenType != html.TextToken {
					continue
				}

				// TODO: JS requires a running VM, for checking properties. Only
				// possible with headless for now :(

				// data := tokenizer.Token().Data
				// technologies = append(
				// 	technologies,
				// 	s.fingerprints.matchString(data, jsPart)...,
				// )
			case "meta":
				// For meta tag, we are only interested in name and content attributes.
				name, content, found := getMetaNameAndContent(token)
				if !found {
					continue
				}
				technologies = append(
					technologies,
					s.fingerprints.matchKeyValueString(name, content, metaPart)...,
				)
			}
		case html.SelfClosingTagToken:
			token := tokenizer.Token()
			if token.Data != "meta" {
				continue
			}

			// Parse the meta tag and check for tech
			name, content, found := getMetaNameAndContent(token)
			if !found {
				continue
			}
			technologies = append(
				technologies,
				s.fingerprints.matchKeyValueString(name, content, metaPart)...,
			)
		}
	}
}

func (s *Wappalyze) getTitle(body []byte) string {
	var title string

	// Tokenize the HTML document and check for fingerprints as required
	tokenizer := html.NewTokenizer(bytes.NewReader(body))

	for {
		tt := tokenizer.Next()
		switch tt {
		case html.ErrorToken:
			return title
		case html.StartTagToken:
			token := tokenizer.Token()
			switch token.Data {
			case "title":
				// Next text token will be the actual title of the page
				if tokenType := tokenizer.Next(); tokenType != html.TextToken {
					continue
				}
				title = tokenizer.Token().Data
			}
		}
	}
}

// getMetaNameAndContent gets name and content attributes from meta html token
func getMetaNameAndContent(token html.Token) (string, string, bool) {
	if len(token.Attr) < keyValuePairLength {
		return "", "", false
	}

	var name, content string
	for _, attr := range token.Attr {
		switch attr.Key {
		case "name":
			name = attr.Val
		case "content":
			content = attr.Val
		}
	}
	return name, content, true
}

// getScriptSource gets src tag from a script tag
func getScriptSource(token html.Token) (string, bool) {
	if len(token.Attr) < 1 {
		return "", false
	}

	var source string
	for _, attr := range token.Attr {
		switch attr.Key {
		case "src":
			source = attr.Val
		}
	}
	return source, true
}

// unsafeToString converts a byte slice to string and does it with
// zero allocations.
//
// NOTE: This function should only be used if its certain that the underlying
// array has not been manipulated.
//
// Reference - https://github.com/golang/go/issues/25484
func unsafeToString(data []byte) string {
	return *(*string)(unsafe.Pointer(&data))
}