2017-07-05

VivadoHLSで射影変換を実装してみた-1-

台形補正のアルゴリズムを調べてたら射影変換でやるみたいなのでFPGAに実装してみた。

OpenCV

まずはOpenCVでさくっとテスト

#include <opencv2/opencv.hpp>
#include <opencv2/highgui/highgui.hpp>
int main(void)
{
    //cv::Mat src_img;
    //src_img = cv::imread("test.jpg", 1);
    IplImage *pImg;
    pImg = cvLoadImage("test.jpg");
    //IplImage -> Mat
    cv::Mat mat(pImg);
    cv::Mat src_img;
    src_img = pImg;

    cv::Mat dst_img = src_img.clone();

    if (src_img.empty()){
        return -1;
    }

    const cv::Point2f src_pt[]={
        cv::Point2f(256, 128),
        cv::Point2f(768, 128),
        cv::Point2f(768, 640),
        cv::Point2f(256, 640) };

    const cv::Point2f dst_pt[]={
        cv::Point2f(356, 228),
        cv::Point2f(768, 228),
        cv::Point2f(668, 640),
        cv::Point2f(256, 540)};

        //パラメータ計算
    cv::Mat homography_matrix = cv::getPerspectiveTransform(src_pt,dst_pt);
        //射影変換
    cv::warpPerspective( src_img, dst_img, homography_matrix,src_img.size());

    cv::namedWindow("Image", CV_WINDOW_AUTOSIZE | CV_WINDOW_FREERATIO);
    cv::imshow("Image", dst_img);
    cv::waitKey(0);
}

f:id:ginnyu-tei:20170705220240j:plain f:id:ginnyu-tei:20170705220131j:plain こんな風に変換前の4点座標と変換後の4点座標を指定することで
座標に合うように画像を変形してくれます。

C++で実装

C++での実装は↓のページを~~丸パクリ~~参考にしました。
mf-atelier.sakura.ne.jp

C++で実装する際、ついでに16ビットシフトして整数演算化しました。
多分小数点そこまでなくても問題ないと思う

void homography(
    int x,
    int y,
    int a,
    int b,
    int c,
    int d,
    int e,
    int f,
    int g,
    int h,
    int *u,
    int *v)
{
    *u = (x*a + y*b + c) / (x*g + y*h + 65536);
    *v = (x*d + y*e + f) / (x*g + y*h + 65536);
}


int main(void)
{

    double a, b, c, d, e, f, g, h;
    int i_a, i_b, i_c, i_d, i_e, i_f, i_g, i_h;
    int u, v;

    int    naOrig[4][2] = { { 356, 228 }, { 768, 228 }, { 668, 640 }, { 256, 540 } };
    int    naTran[4][2] = { { 256, 128 }, { 768, 128 }, { 768, 640 }, { 256, 640 } };

    //cv::Mat src_img;
    //src_img = cv::imread("test.jpg", 1);

    IplImage *pImg;
    pImg = cvLoadImage("test.jpg");
    //IplImage -> Mat
    cv::Mat mat(pImg);
    cv::Mat src_img;
    src_img = pImg;

    cv::Mat dst_img = src_img.clone();

    if (src_img.empty()){
        return -1;
    }

    homography_param
        (
        naOrig,
        naTran,
        &a,
        &b,
        &c,
        &d,
        &e,
        &f,
        &g,
        &h
        );

    std::cout <<
        "a=" << a << std::endl <<
        "b=" << b << std::endl <<
        "c=" << c << std::endl <<
        "d=" << d << std::endl <<
        "e=" << e << std::endl <<
        "f=" << f << std::endl <<
        "g=" << g << std::endl <<
        "h=" << h <<
        std::endl;

    i_a = (int)(a * 65536);
    i_b = (int)(b * 65536);
    i_c = (int)(c * 65536);
    i_d = (int)(d * 65536);
    i_e = (int)(e * 65536);
    i_f = (int)(f * 65536);
    i_g = (int)(g * 65536);
    i_h = (int)(h * 65536);

    std::cout <<
        "i_a=" << i_a << std::endl <<
        "i_b=" << i_b << std::endl <<
        "i_c=" << i_c << std::endl <<
        "i_d=" << i_d << std::endl <<
        "i_e=" << i_e << std::endl <<
        "i_f=" << i_f << std::endl <<
        "i_g=" << i_g << std::endl <<
        "i_h=" << i_h <<
        std::endl;

    cv::Vec3b *src = src_img.ptr<cv::Vec3b>(0);
    cv::Vec3b *dst = dst_img.ptr<cv::Vec3b>(0);

    for (int y = 0; y < 768; y++){
        for (int x = 0; x < 1024; x++){
            homography(x, y, i_a, i_b, i_c, i_d, i_e, i_f, i_g, i_h, &u, &v);
            if ((u >= 0) & (u < 1024) &
                (v >= 0) & (v < 768)){
                dst[y * 1024 + x] = src[v * 1024 + u];
            }
            else {
                dst[y * 1024 + x] = cv::Vec3b(0, 0, 0);
            }
        }
    }
    cv::namedWindow("Image", CV_WINDOW_AUTOSIZE | CV_WINDOW_FREERATIO);
    cv::imshow("Image", dst_img);
    cv::waitKey(0);
}

パラメータ変換式は参考元のコードをほぼそのまま使用した。 (関数名をhomography_paramに変更)
逆行列計算はOpenCVを使って↓の用に実装

 // 逆行列
    //matinv(8, dATA, dATA_I);
    cv::Mat matA = (cv::Mat_<double>(8, 8));

    for (int j = 0; j < 8; j++){
        for (int i = 0; i < 8; i++){
            matA.at<double>(i, j) = dATA[i][j];
        }
    }

    matA = matA.inv();

    for (int j = 0; j < 8; j++){
        for (int i = 0; i < 8; i++){
            dATA_I[i][j] = matA.at<double>(i, j);
        }
    }

パラメータをどうやって計算するか知りたかったので数式を見たら
8次元連立一次方程式で自分で解くのは諦めました・・・

それと大事な事なんですが、ここでは変換後座標から変換前座標を計算したいので
変換前座標と変換後座標を入れ替えて計算しています。

こうする事で変換後画像内に変換前画像が当てはまらず
ピクセルが抜けてしまう事が防げます。
その為、ピクセルを補完をしなくても良くなります。
(正確には一番近い画素を変換後座標にコピーしているので
ニアレストネイバーを使ってる言える。)

余談射影変換の逆計算方法が初め分からず逆行列の計算を
するのかとげんなりしていましたが以下ページを見て解決しました。

detail.chiebukuro.yahoo.co.jp

長くなったのでVivado HLSのコードは次回

2016-11-01

多層パーセプトロンをPythonで実装してみた-手書き数字認識-

前回(↓)の続き。

se.hatenablog.jp

簡単な分類で動作を確認できたので次は数字認識をやってみる。

多層パーセプトロンをPythonで実装してみた-1- - 日曜技術者のメモ
で書いたクラスは出力層が二個以上に対応していなかったので修正して使った。
ブログは修正済み。

数字の分類では出力層にソフトマックス関数を使って出力を
[-1,1]ではなく確立にするのが普通の様だが今回は出力層をそのままにして
学習データは正しい数字は0.5、その他の数字は0として学習した。

sklearn.datasets

sklearn.datasetsに手書き文字があったのでまずはそれでやってみる。

The Digit Dataset — scikit-learn 0.18 documentation

8x8の数字なのでかなり荒い。

入力層は8pix×8pix=64個
出力層は0から9の10個
学習率0.03
600文字学習を繰り返す。

実装コード

def digit_sepalate():
  #テストデータ作成
  iteration_num = 100000
  sample_num = 600

  digits = load_digits()

  x = digits.data
  y = digits.target
  x_max = x.max()

  x /= x.max()
  x *= 0.5

  iLayer = nn.inputLayer(64,10,10)
  oLayer = nn.outputLayer(64,10,1,0.05)

  inputLayerY = np.zeros(64)
  dut_output = np.zeros(10)

  x1_array = np.arange(0,8,1)
  x2_array = np.arange(0,8,1)
  pos_x1,pos_x2 = np.meshgrid(x1_array,x2_array)
  val_y = np.identity(40)


  plt.ion()

  match_list = []
  cnt_list = []

  shuffle_list = list(range(sample_num))

  plt.pause(0.05)

  for i in range(iteration_num):
    match = 0
    for j in range(sample_num):
      inputLayerY = x[j]

      iLayer.foward(inputLayerY)
      oLayer.foward(iLayer.getY())

      exp_y = np.zeros(10) - 1


      dut_output = oLayer.getY()

      exp_y[y[j]] = 1.0

      oLayer.backprop(exp_y)

      oLayer.updateWeight()

      if(dut_output.argmax() == digits.target[j]):
         match += 1
      
    #次の学習時に順番をシャッフルする
    random.shuffle(shuffle_list)
    
    if (1):
      plt.clf()
      match_list.append(match/sample_num)
      cnt_list.append(i)

      for i in range(10):
        plt.subplot(3,5,6+i)
        plt.xlim(0,7)
        plt.ylim(0,7)
        plt.pcolor(pos_x1,pos_x2,oLayer._weight[i].reshape(8,8))

      plt.subplot(3,1,1)
      plt.plot(cnt_list,match_list)
      plt.pause(0.05)


  while True:
    plt.pause(0.05)

↓は実行結果
f:id:ginnyu-tei:20161101215515j:plain:w400
10個のヒートマップは各出力層の重み値を出力している。
うまくいけば数字に見えるが・・・微妙。
数回の学習でかなりの精度になった。

MNIST

次は有名なMNISTでやってみる。

データは以下にあるがsklearnで関数が容易されているのでそっちを使った。
MNIST handwritten digit database, Yann LeCun, Corinna Cortes and Chris Burges

入力層は28pix×28pix=784個
出力層は0から9の10個
学習率0.03
1000文字学習を繰り返す。

↓は実行結果
f:id:ginnyu-tei:20161101220631j:plain:w400

10回の学習で正解率91%!!
幾ら何でも学習速すぎないかと思っていたが
これは学習した数字で学習率を見てるので速いのは当たり前だった

次は学習する数字と正解率を評価する数字を別々にしてみる。
正解率は100文字中の正解数

1000文字学習
f:id:ginnyu-tei:20161101224349j:plain:w400

5000文字学習
f:id:ginnyu-tei:20161101224406j:plain:w400

10000文字学習
f:id:ginnyu-tei:20161101224421j:plain:w400

50000文字学習
f:id:ginnyu-tei:20161101225229j:plain:w400

さっきよりは学習に時間がかかっているが問題なく分類できてる
重みのヒートマップも0,3,8はそれっぽい。

2016-10-19

多層パーセプトロンをPythonで実装してみた-2-

前回(↓)の続き。

se.hatenablog.jp

多層パーセプトロン各層の実装ができたので実際に動かしてみる。
パーセプトロンの数や層の構成は↓を参考にした。

googlecloudplatform-japan.blogspot.jp

簡単な分類

直線分類

まず↓をやってみる。

f:id:ginnyu-tei:20161019194257g:plain:w400

実装は↓

# -*- coding: utf-8 -*-

import numpy as np
import matplotlib.pyplot as plt
import random as random
from sklearn.datasets import make_blobs
from sklearn.datasets import make_circles

import NNLayer as nn

def linearlySeparable():
  
  #学習数、ループ回数
  iteration_num = 10
  sample_num = 100
  
  #学習結果表示グラフ設定
  x1_array = np.arange(-8,8,0.4)
  x2_array = np.arange(8,-8,-0.4)
  pos_x1,pos_x2 = np.meshgrid(x1_array,x2_array)
  val_y = np.identity(40)

  shuffle_list = list(range(sample_num))
  match_list = []
  cnt_list = []
  inputLayerY = np.zeros(2)

  #NN作成
  iLayer = nn.inputLayer(2,2,1)
  oLayer = nn.outputLayer(2,1,1,0.03)

  #テストデータ作成
  centers = [(3,3),(-3,-3)]
  #centers = [(-3,-3),(3,3)]
  #centers = [(0,-3),(0,3)]
  #centers = [(-3,0),(3,0)]
  x_exp,y_exp = make_blobs(n_samples= sample_num,centers=centers,random_state=42,cluster_std=1.5)

  #yの値を[0,1]から[-1,1]にする
  y_exp = [x if x != 0 else -1 for x in y_exp]
  
  #初期値で一度グラフ描画
  plt.ion()

  for index_x1 in range(len(x1_array)):
    for index_x2 in range(len(x2_array)):

      #X1
      inputLayerY[0] = x1_array[index_x1]
      #X2
      inputLayerY[1] = x2_array[index_x2]

      iLayer.foward(inputLayerY)
      oLayer.foward(iLayer.getY())

      val_y[index_x1][index_x2] = oLayer.getY()
  #グラフ表示
  plt.clf()
  plt.subplot(2,1,1)
  plt.xlim(-8,8)
  plt.ylim(-8,8)
  plt.pcolor(pos_x1,pos_x2,val_y)
  plt.colorbar()
  plt.scatter(x_exp[:,0],x_exp[:,1],c=y_exp,s=80)
  plt.subplot(2,1,2)
  plt.plot(cnt_list,match_list)
  plt.pause(0.05)

  
  for i in range(iteration_num):
    match = 0
    #学習フェーズ
    for j in shuffle_list:
      
      #X1
      inputLayerY[0] = x_exp[j][0]
      #X2
      inputLayerY[1] = x_exp[j][1]

      iLayer.foward(inputLayerY)
      oLayer.foward(iLayer.getY())

      if (((y_exp[j] < 0) & (oLayer.getY() < 0)) |
          ((y_exp[j] > 0) & (oLayer.getY() > 0))):
        match += 1

      oLayer.backprop(y_exp[j])
      oLayer.updateWeight()

    #次の学習時に順番をシャッフルする
    random.shuffle(shuffle_list)
    
    match_list.append(match/sample_num)
    cnt_list.append(i)
    #学習結果(ヒートマップ)
    for index_x1 in range(len(x1_array)):
      for index_x2 in range(len(x2_array)):

        #X1
        inputLayerY[0] = x1_array[index_x1]
        #X2
        inputLayerY[1] = x2_array[index_x2]

        iLayer.foward(inputLayerY)
        oLayer.foward(iLayer.getY())

        val_y[index_x2][index_x1] = oLayer.getY()

    #結果グラフ表示
    plt.clf()
    plt.subplot(2,1,1)
    plt.xlim(-8,8)
    plt.ylim(-8,8)
    plt.pcolor(pos_x1,pos_x2,val_y)
    plt.colorbar()
    plt.scatter(x_exp[:,0],x_exp[:,1],c=y_exp,s=80)
    plt.subplot(2,1,2)
    plt.plot(cnt_list,match_list)
    plt.pause(0.05)

  while True:
    plt.pause(0.05)

if __name__ == '__main__': 
  linearlySeparable()

オンライン学習、学習率は0.03固定で実行
実行結果は↓
f:id:ginnyu-tei:20161019194635j:plain:w400

悪いときでも100データ×10回学習すればほぼ100パーセントになった。

円形分類

次は隠れ層を追加する。
f:id:ginnyu-tei:20161019195229g:plain:w400

実装は直線分類とほぼ同じ。
異なるのはパーセプトロンの構成とループ数とテストデータ作成

テストデータ

  #テストデータ作成
  x_exp,y_exp = make_circles(n_samples = sample_num,factor=0.3,noise=0.05)

  x_exp *= 3

  #yの値を[0,1]から[-1,1]にする
  y_exp = [x if x != 0 else -1 for x in y_exp]

3倍しているのはそのままだと円が小さかったので適当に大きくしている。
パーセプトロン構成

  #NN作成
  iLayer  = nn.inputLayer(2,4,4)
  h0Layer = nn.hiddenLayer(2,4,1,0.03,True)
  oLayer  = nn.outputLayer(4,1,1,0.03)

結果は↓
f:id:ginnyu-tei:20161019195458j:plain:w400

100データ×50回学習すればほぼ100パーセントになった。
ただ、初期値がランダムの為か、たまに↓みたいに収束しない時がある。
f:id:ginnyu-tei:20161019195709j:plain:w400

渦巻き分類

もっと隠れ層を追加する。
f:id:ginnyu-tei:20161019200151g:plain:w400

sklearnに渦巻きのデータセットがなさそうだったので数式を実装した。

テストデータ

  x_exp = np.zeros((sample_num,2))
  y_exp = np.zeros(sample_num)
  for i in range (sample_num):
    theta = random.uniform(-3*math.pi,3*math.pi)
    if (theta > 0):
      x_exp[i][0] = 0.5*math.cos(theta)*theta
      x_exp[i][1] = 0.5*math.sin(theta)*theta
      y_exp[i] = 0.5
    else:
      x_exp[i][0] = 0.5*math.cos(theta)*theta
      x_exp[i][1] = 0.5*math.sin(theta)*-theta
      y_exp[i] = -0.5

テストデータは[-1,1]ではなく[-0.5,0.5]にしている。
これは[-1,1]でなかなか収束しない時に↓を見たら、

バックプロパゲーション - Wikipedia

tanhの時は出力を-0.65848～0.6584にすると良いと
書いてあったのでそうすると収束率がかなり改善した。

パーセプトロン構成

  #NN作成
  iLayer  = nn.inputLayer(2,8,8)
  h0Layer = nn.hiddenLayer(2,8,8,0.03,True)
  h1Layer = nn.hiddenLayer(8,8,5,0.03,True)
  h2Layer = nn.hiddenLayer(8,5,1,0.03,True)
  oLayer  = nn.outputLayer(5,1,1,0.03)

結果は↓
f:id:ginnyu-tei:20161019201319j:plain:w400

データ数が100だと渦巻きがかなりまばらになったので
これはデータ数を200にしている。
200データ×300回くらいでほぼ100パーセントになる。

長くなったので今回はここまで
次回こそはMINSTやるよ！

2016-10-08

多層パーセプトロンをPythonで実装してみた-1-

A Neural Network Playgroundのデモが素晴らしかったので
Pythonを使ってそれっぽく動く物を作ってみた。

開発環境

Visual studio 2015 community
- PTVS
Python3.5.2
- numpy 1.11.1
- scipy 0.18.0
- scikit-learn 0.18
- matplotlib 1.5.3

多層パーセプトロン

ニューラルネットワークはまったくの初心者だったので色々調べると
多層パーセプトロンを使えばできそうなのでそれについて調べた。

順方向の計算は簡単

f:id:ginnyu-tei:20161008103403g:plain:w400

逆誤差伝播法はかなり難しい・・・
出力層はなんとか自分で計算できたが隠れ層は無理。
↓も正解か自信がない(手書き文字認識は問題なく出来たから大丈夫だとは思う)

+出力層
f:id:ginnyu-tei:20161008111856g:plain:w400

+隠れ層
f:id:ginnyu-tei:20161008110211g:plain:w400

学習は1データ(x,yの値と期待値y)毎に更新するオンライン学習
重み更新は学習率固定

ニューロンレイヤークラス

まず基本となるレイヤークラスを作成して
それを継承して出力層クラスと隠れ層クラスを作成した。
playgroundのデモは値の範囲が-1～+1までなので活性化関数はtanh(x)を使った。

※11/1複数の出力層に対応する様修正

# -*- coding: utf-8 -*-

import numpy as np

class neuronLayer(object):
  """レイヤー基本クラス"""
  def __init__(self,input_num,neuron_num,output_num,e,bias=None):
    self._input_num = input_num
    self._neuron_num = neuron_num
    self._ouput_num = output_num
    self._e = e

    if bias is None:
      self._isBias = False
    else:
      self._isBias = True
      #biasを追加する拡張
      self._input_num += 1
      self._bias = np.ones([self._neuron_num,1]).reshape(self._neuron_num,1)

    #初期化
    self._x = np.zeros(self._input_num*self._neuron_num).reshape(self._neuron_num,self._input_num)
    self._s = np.zeros(self._neuron_num)
    self._y = np.zeros(self._ouput_num*self._neuron_num).reshape(self._neuron_num,self._ouput_num)
    self._delWdelE = np.zeros(self._input_num*self._neuron_num).reshape(self._neuron_num,self._input_num)
    self._delSdelE = np.zeros(self._ouput_num*self._neuron_num).reshape(self._neuron_num,self._ouput_num)

    #重みランダム初期化
    #self._weight = np.random.rand(self._neuron_num,self._input_num)*2-1.0
    #http://deeplearning.net/tutorial/mlp.html
    self._weight = np.asarray(
                np.random.uniform(
                    low=-np.sqrt(6. / (self._input_num + self._ouput_num)),
                    high=np.sqrt(6. / (self._input_num + self._ouput_num)),
                    size=(self._neuron_num, self._input_num)
                )
            )

  def activationFunction(self,x):
    return self.tanh(x)

  def dActivationFunction(self,x):
    return self.dtanh(x)

  def sigmoid(self,x):
    return(1.0/(1.0+np.exp(-x)))

  def dsigmoid(self,x):
    return self.sigmoid(x)*(1-self.sigmoid(x))

  def tanh(self,x):
    return np.tanh(x)

  def dtanh(self,x):
    return 1.0-self.tanh(x)*self.tanh(x)
  
  def ReLu(self,x):
    return x * (x > 0)

  def dReLu(self,x):
    return 1.0 * (x > 0)

  def foward(self,x):
    #biasが有効のときはbiasを追加
    if(self._isBias == True):
      self._x = np.hstack((x,self._bias))
    else:
      self._x = x

    # s = Σ(x*w)
    if(self._neuron_num == 1):
      self._s = (self._x * self._weight).sum()
    else:
      self._s = (self._x * self._weight).sum(axis=1)+np.zeros(self._ouput_num).reshape(self._ouput_num,1)
    self._y = self.activationFunction(self._s)

  def updateWeight(self):
    #print(self._weight)
    self._weight -= self._e * self._delWdelE

  def getY(self):
    return self._y
  
  def getWeight(self):
    if(self._isBias == True):
      #前段はbiasの重みが不要なので削除して返す
      return np.delete(self._weight,self._input_num-1,1)
    else:
      return self._weight

  def getDelSdelE(self):
    return self._delSdelE


class inputLayer(object):
  def __init__(self,input_num,neuron_num,output_num):
    self._input_num = input_num
    self._neuron_num = neuron_num
    self._ouput_num = output_num

    self._x = np.zeros(self._ouput_num).reshape(self._ouput_num,1)
    self._y = np.zeros(self._input_num*self._ouput_num).reshape(self._input_num,self._ouput_num)

  def foward(self,x):
      self._y = x + self._x

  def getY(self):
    return self._y


class outputLayer(neuronLayer):
  def backprop(self,y):
    if(self._neuron_num==1):
      self._delSdelE = (self._y -y) * self.dActivationFunction(self._s)
    else:
      self._delSdelE = (self._y -y) * self.dActivationFunction(self._s)
      self._delSdelE = self._delSdelE.T

    self._delWdelE = self._delSdelE * self._x



class hiddenLayer(neuronLayer):
  def backdrop(self,delSdelE,wn):
    if(self._ouput_num==1):
      self._delSdelE = (delSdelE * wn).sum() * self.dActivationFunction(self._s)
      self._delSdelE = self._delSdelE.reshape(self._neuron_num,1)
    else:
      self._delSdelE = (delSdelE * wn).sum(axis=0).reshape(1,self._neuron_num)
      self._delSdelE *= self.dActivationFunction(self._s[0,:])
      self._delSdelE = self._delSdelE.T

    self._delWdelE = self._delSdelE * self._x