//go:build ignore package main import ( "bufio" "fmt" "io" "log" "math" "os" "regexp/syntax" ) const ( runeStart rune = 32 runeLast rune = 127 sz = int(runeLast + 1 - runeStart) ) func getCounts(r io.Reader) (matrix [sz * sz]uint64, err error) { b := bufio.NewReader(r) prev := runeLast for { r, _, err := b.ReadRune() if err == io.EOF { return matrix, nil } if err != nil { return matrix, err } if r < runeStart || r > runeLast { r = runeLast } matrix[int(prev-runeStart)*sz+int(r-runeStart)]++ if prev != runeLast && !syntax.IsWordChar(prev) && syntax.IsWordChar(r) { // when transitioning to a non-word to a word, train this as if the previous // rune were lastRune, as this seems like a sensible way to start a piece of random text. matrix[(sz-1)*sz+int(r-runeStart)]++ } prev = r } } func calcEntropy(in [sz * sz]uint64) (out [sz * sz]float64) { var fallbackCount [sz]uint64 var fallbackSum uint64 for i, cnt := range in { fallbackCount[i%sz] += cnt fallbackSum += cnt } for row := range sz { var sum uint64 for _, c := range in[row*sz : row*sz+sz] { sum += c } for column, c := range in[row*sz : row*sz+sz] { p := (float64(2*(fallbackSum+1))*float64(c) + float64(2*fallbackCount[column]+1)) / float64(float64(2*(fallbackSum+1))*float64(sum+1)) if p <= 0 || p >= 1 { log.Printf("c=%d fallbackSum=%d fallbackCount[column]=%d sum=%d\n", c, fallbackSum, fallbackCount[column], sum) log.Fatalf("got p=%f for %c->%c\n", p, runeStart+rune(row), runeStart+rune(column)) } out[row*sz+column] = -math.Log2(p) } } return } func calcMeanEntropy(counts [sz * sz]uint64, entropies [sz * sz]float64) float64 { var sumEntropy float64 var sumCount uint64 for i, c := range counts { sumCount += c sumEntropy += entropies[i] * float64(c) } return sumEntropy / float64(sumCount) } func main() { cnts, err := getCounts(os.Stdin) if err != nil { log.Fatal(err) } entropy := calcEntropy(cnts) fmt.Printf("var matrix = [%d*%d]float32{", sz, sz) for row := range sz { for i, v := range entropy[row*sz : row*sz+sz] { if i == 0 { fmt.Printf("\n ") } else { fmt.Printf(" ") } fmt.Printf("%f,", v) } } fmt.Printf("}\n\nconst meanEntropy = %f\n", calcMeanEntropy(cnts, entropy)) }