80 lines
1.5 KiB
Go
80 lines
1.5 KiB
Go
|
// Complexity performs some rudimentary entropy calculations based on ASCII bi-grams, which can be
|
||
|
// used to censor potential secret keys or passwords in text, and to estimate password strength.
|
||
|
//
|
||
|
// The matrix it uses only covers printable ASCII text, other runes are all interpreted as being \x127.
|
||
|
//
|
||
|
// The package isn't well tested, and the source data the matrix tabe
|
||
|
|
||
|
package complexity
|
||
|
|
||
|
import (
|
||
|
"bufio"
|
||
|
"io"
|
||
|
"unicode/utf8"
|
||
|
)
|
||
|
|
||
|
type state struct {
|
||
|
prev rune
|
||
|
count int64
|
||
|
entropy float64
|
||
|
}
|
||
|
|
||
|
func runeIdx(r rune) int {
|
||
|
if r < firstRune || r > lastRune {
|
||
|
return sz - 1
|
||
|
}
|
||
|
|
||
|
return int(r - firstRune)
|
||
|
}
|
||
|
|
||
|
func (s *state) put(next rune) {
|
||
|
s.entropy += float64(matrix[runeIdx(s.prev)*sz+runeIdx(next)])
|
||
|
s.count++
|
||
|
s.prev = next
|
||
|
}
|
||
|
|
||
|
func runeReader(r io.RuneReader) (float32, int64, error) {
|
||
|
s := state{prev: lastRune}
|
||
|
for {
|
||
|
r, _, err := r.ReadRune()
|
||
|
if err != nil {
|
||
|
if err == io.EOF {
|
||
|
err = nil
|
||
|
}
|
||
|
return float32(s.entropy), s.count, err
|
||
|
}
|
||
|
|
||
|
s.put(r)
|
||
|
}
|
||
|
}
|
||
|
|
||
|
func Reader(r io.Reader) (float32, int64, error) {
|
||
|
if r, ok := r.(io.RuneReader); ok {
|
||
|
return runeReader(r)
|
||
|
}
|
||
|
|
||
|
return runeReader(bufio.NewReader(r))
|
||
|
}
|
||
|
|
||
|
func Bytes(text []byte) (float32, int) {
|
||
|
s := state{prev: lastRune}
|
||
|
|
||
|
for i := 0; i != len(text); {
|
||
|
r, sz := utf8.DecodeRune(text[i:])
|
||
|
i += sz
|
||
|
s.put(r)
|
||
|
}
|
||
|
|
||
|
return float32(s.entropy), int(s.count)
|
||
|
}
|
||
|
|
||
|
func String(text string) (float32, int) {
|
||
|
s := state{prev: lastRune}
|
||
|
|
||
|
for _, r := range text {
|
||
|
s.put(r)
|
||
|
}
|
||
|
|
||
|
return float32(s.entropy), int(s.count)
|
||
|
}
|