以太坊源码解读（17）ethash挖矿原理

POW的本质是基于算力解决一个数学上困难的问题，解决问题关键点是除了暴力枚举，没有任何办法可以找到我们所需要的nonce值，但对于验证输出的结果是非常简单容易的。

经典的比特币POW的算法原理是对block的header加上循环更新的nonce去进行hash运算，运算的target是hash值的前n位为0。这个计算只能通过暴力枚举来进行，验证也很容易，只要使用最终的nonce代入header按照之前的算法验证即可。

以太坊采用的ethash算法与比特币不同，但基本过程类似，都是找到一个nonce值输入到算法中，得到的结果低于一个基于特定困难值的阈值。

RAND(h, n) <= M/d

RAND是一系列复杂的运算
h: header 不包含nonce
n: nonce

M: 2^256
d: 难度值，该难度值基于父区块的时间戳和难度而得到

上面的公式意思是我们用header和nonce经过复杂的计算，如果得到的结果小于或等于M/d，该nonce就是可用的，意味着挖矿成功。

上图是ethash算法在以太坊源码中的实现，我们一个个看。

一、Seal方法

Seal主要的任务是： 1、获得种子seed； 2、基于seed获得Rand对象，rand值将作为初始化nonce进行挖矿； 3、启动mine函数，执行挖矿。

ethash.lock.Lock()
// 挖矿的线程
threads := ethash.threads
if ethash.rand == nil {
    // 获得种子seed
    seed, err := crand.Int(crand.Reader, big.NewInt(math.MaxInt64))
    if err != nil {
        ethash.lock.Unlock()
        return err
    }
    // 执行成功，生成rand对象，并赋值
    ethash.rand = rand.New(rand.NewSource(seed.Int64()))
}
ethash.lock.Unlock()
if threads == 0 {
    threads = runtime.NumCPU()
}
if threads < 0 {
    threads = 0 // Allows disabling local mining without extra logic around local/remote
}

var (
    pend   sync.WaitGroup
    locals = make(chan *types.Block)
)
// 闭包多线程处理
for i := 0; i < threads; i++ {
    pend.Add(1)
    go func(id int, nonce uint64) {
        defer pend.Done()
        ethash.mine(block, id, nonce, abort, locals) // Seal核心工作
    }(i, uint64(ethash.rand.Int63()))
}

二、mine函数

这个函数的主要任务是： 1、取出block的header； 2、取出没有nonce时的区块hash； 3、设置目标target，即上面公式中的M/td； 4、获得dataset，即数据集 5、开启无限循环，计算每一轮nonce值的POW结果，知道获得正确解。

补充：DAG和epoch

1、上面的dataset就来自内存中的一组数据或者硬盘里的DAG。 2、DAG是有向无环图，以太坊的DAG是基于区块高度生成的。 3、以太坊中每3万个块会生成一代DAG，这一代就成称为一个epoch。 4、挖矿的时候需要从DAG中随机选取dataset，所以挖矿工作只能在现世DAG创建以后才能开始。

var (
    header  = block.Header()
    hash    = ethash.SealHash(header).Bytes()
    target  = new(big.Int).Div(two256, header.Difficulty)
    number  = header.Number.Uint64()
    dataset = ethash.dataset(number, false)
)

var (
    attempts = int64(0)
    nonce    = seed
)

for {
    select {
    case <-abort:
        // Mining terminated, update stats and abort
        logger.Trace("Ethash nonce search aborted", "attempts", nonce-seed)
        ethash.hashrate.Mark(attempts)
        break search

    default:
        ...

        // 计算当前nonce的POW值
        digest, result := hashimotoFull(dataset.dataset, hash, nonce) // 真正执行POW计算的函数
        if new(big.Int).SetBytes(result).Cmp(target) <= 0 {
            // 找到nonce后重构header，更新nonce和hash
            header = types.CopyHeader(header)
            header.Nonce = types.EncodeNonce(nonce)
            header.MixDigest = common.BytesToHash(digest)

            // Seal and return a block (if still needed)
            select {
            case found <- block.WithSeal(header):
                logger.Trace("Ethash nonce found and reported", "attempts", nonce-seed, "nonce", nonce)
            case <-abort:
                logger.Trace("Ethash nonce found but discarded", "attempts", nonce-seed, "nonce", nonce)
            }
            break search
        }
        nonce++
    }
}

三、hashimotoFull和hashimoto

这两个函数是ethash算法的核心，hashimotoFull中定义了lookup函数，用于从dataset中找到数据子集：

func hashimotoFull(dataset []uint32, hash []byte, nonce uint64) ([]byte, []byte) {
	lookup := func(index uint32) []uint32 {
		offset := index * hashWords
		return dataset[offset : offset+hashWords]
	}
	return hashimoto(hash, nonce, uint64(len(dataset))*4, lookup)
}

hashimoto则是真正对nonce的运算：

func hashimoto(hash []byte, nonce uint64, size uint64, lookup func(index uint32) []uint32) ([]byte, []byte) {
	// 计算数据集的理论行数
	rows := uint32(size / mixBytes)

	// 将header+nonce合并成一个40字节的seed
	seed := make([]byte, 40)
	copy(seed, hash)
	binary.LittleEndian.PutUint64(seed[32:], nonce)

        // 经历一遍Keccak512加密，生成64字节的seed
	seed = crypto.Keccak512(seed)
        // 从seed中获取seedHead用于后面数据集的lookup
	seedHead := binary.LittleEndian.Uint32(seed)

	// 开始重复混合seed
	mix := make([]uint32, mixBytes/4) // mixBytes常量= 128，mix的长度为32，元素为uint32，是32位，对应为4字节大小。所以mix总共大小为4*32=128字节大小
	for i := 0; i < len(mix); i++ {
		mix[i] = binary.LittleEndian.Uint32(seed[i%16*4:]) // 共循环32次，前16和后16位的元素值相同
	}
	// 再与随机的dataset做混合
	temp := make([]uint32, len(mix)) // 新建一个temp，结构与mix相同

	for i := 0; i < loopAccesses; i++ {  // loopAccesses是常量 = 64
		parent := fnv(uint32(i)^seedHead, mix[i%len(mix)]) % rows
		for j := uint32(0); j < mixBytes/hashBytes; j++ {
			copy(temp[j*hashWords:], lookup(2*parent+j)) // 通过seed生成parent，再基于parent去源数据中查找数据子集，最后复制到temp中
		}
		fnvHash(mix, temp) // 对mix和temp进行fnvHash处理
	}
	// 压缩mix，先对mix进行一次大混淆，然后去最前面8个有效位置的数据
	for i := 0; i < len(mix); i += 4 {
		mix[i/4] = fnv(fnv(fnv(mix[i], mix[i+1]), mix[i+2]), mix[i+3])
	}
	mix = mix[:len(mix)/4]
        
        // 最后得到digest，并基于seed+digest得到我们想要的值，如果这个值小于target，就结束循环
        // 这里的digest就是我们要的区块最终的hash
	digest := make([]byte, common.HashLength)
	for i, val := range mix {
		binary.LittleEndian.PutUint32(digest[i*4:], val)
	}
	return digest, crypto.Keccak256(append(seed, digest...))
}

FNV hash 算法

func fnv(a, b uint32) uint32 {
    return a*0x01000193 ^ b
}

func fnvHash(mix []uint32, data []uint32) {
    for i := 0; i < len(mix); i++ {
        mix[i] = mix[i]*0x01000193 ^ data[i]
    }
}

0x01000193是FNV hash算法的一个hash质数（Prime number，又叫素数，只能被1和其本身整除），哈希算法会基于一个常数来做散列操作。0x01000193是FNV针对32 bit数据的散列质数。

下一章：以太坊源码解读（18）以太坊交易执行流程

之前分析挖矿模块，miner从TxPool拿来的交易，交给worker对象。后者要调用commitTransaction在本地执行交易，生成receipt，更改世界状态，打包成挖矿的block最后递 ...

上一章：以太坊源码解读（16）miner模块和Worker模块

下一章：以太坊源码解读（18）以太坊交易执行流程