お題

GAとかで使うルーレット選択を書く．いわゆる重み付きランダム選択．重みWがx:y:z=1:2:3の場合，x=1/6, y=2/6, z=3/6の確率で生成する．色んな言語で累積和，２分探索，乱数を扱う良い練習問題なのかな．

python

やり方としては重みの累積和の配列を作っておいて，乱数を１つ生成して，それがどこに入るかを探索する． pythonのnumpyを使うと累積和はcumsumで用意されているのでそれを使うと簡単．また，二部探索もbisectが用意されている．こうゆのはどんどん使う．

import random
import numpy as np
import bisect

def roulette_choice1(w):
    # generate accumulate weight list.
    # It can be also made by using numpy.cumsum()
    tot = []
    acc = 0
    for e in w:
        acc += e
        tot.append(acc)

    r = random.random() * acc
    # generate a random integer and search it in linear manner.
    # It can be also made by using bisect.bisect_right(tot, r)
    for i, e in enumerate(tot):
        if r < e:
            return i

if __name__ == "__main__":
    w = [1, 2, 3, 4, 5]
    test = [0]*len(w)
    for x in range(100000):
        i = roulette_choice1(w)
        test[i] += 1
    print test

C++でpythonの例と同じことをやる場合には，cumsumは無いので適当に作るしかないけど，bisect相当のものとしてupper_bound(またはlower_bound)が使える．upper_boundはソート済みの配列に対して，指定したval以上となる最小の要素のイテレータを返す．探索は２分探索でやってくれる．upper_boundとlower_boundの違いは，”以上”か”より大きい”かの違い．

#include <vector>
#include <random>
#include <algorithm>
#include <iostream>

std::random_device rnd;
std::mt19937 mt(rnd());

template<typename T>
std::vector<T> cumsum(const std::vector<T> v)
{
  std::vector<T> ret;
  double sum = 0;
  for (auto d : v) {
    ret.push_back(sum+d);
    sum += d;
  }
  return ret;
}

template<typename T> 
size_t rouletteSelection(const std::vector<T> weight)
{
  std::vector<T> cumsum_weight = cumsum(weight);
  std::uniform_real_distribution<> rng(0, cumsum_weight.back());

  double r = rng(mt);
  auto it = std::upper_bound(cumsum_weight.begin(), cumsum_weight.end(), r);
  return (it - cumsum_weight.begin());
}


int main()
{
  std::vector<int> v(5);
  std::iota(v.begin(), v.end(), 1);

  std::vector<int> count = {0,0,0,0,0};
  for (int i=0; i<10000; ++i) {
    size_t j = rouletteSelection(v);
    count[j] += 1;
  }
  dumpVector(count); // dump a vector 
  return 0;
}