/* * Perceptron.cl * * Created on: Jan 22, 2012 * Author: scannon */ #ifndef PERCEPTRON_CL #define PERCEPTRON_CL //#include "ranluxcl.cl" typedef struct __PerceptronNode__ { float16 w; float bias; } PerceptronNode; typedef struct __Perceptron__ { PerceptronNode hiddenNodes[16]; PerceptronNode outputNodes[48]; } Perceptron; /** * O(1) * horizontal asymptotes at 0 and 1 * sigmoid(0) = 0.5 */ float sigmoid(float x) { return 1.0f / (1.0f + exp(-1.0f * x)); } PerceptronNode * PerceptronNode_init(PerceptronNode *self, float16 w, float bias) { self->w = w; self->bias = bias; return self; } PerceptronNode * PerceptronNode_init2(PerceptronNode *self, int numInputNodes) { self->w.s0 = numInputNodes >= 1 ? 1.0f : 0.0f; self->w.s1 = numInputNodes >= 2 ? 1.0f : 0.0f; self->w.s2 = numInputNodes >= 3 ? 1.0f : 0.0f; self->w.s3 = numInputNodes >= 4 ? 1.0f : 0.0f; self->w.s4 = numInputNodes >= 5 ? 1.0f : 0.0f; self->w.s5 = numInputNodes >= 6 ? 1.0f : 0.0f; self->w.s6 = numInputNodes >= 7 ? 1.0f : 0.0f; self->w.s7 = numInputNodes >= 8 ? 1.0f : 0.0f; self->w.s8 = numInputNodes >= 9 ? 1.0f : 0.0f; self->w.s9 = numInputNodes >= 10 ? 1.0f : 0.0f; self->w.sA = numInputNodes >= 11 ? 1.0f : 0.0f; self->w.sB = numInputNodes >= 12 ? 1.0f : 0.0f; self->w.sC = numInputNodes >= 13 ? 1.0f : 0.0f; self->w.sD = numInputNodes >= 14 ? 1.0f : 0.0f; self->w.sE = numInputNodes >= 15 ? 1.0f : 0.0f; self->w.sF = numInputNodes >= 16 ? 1.0f : 0.0f; self->bias = -1.0f * numInputNodes/2.0f; return self; } PerceptronNode * PerceptronNode_init3(PerceptronNode *self, int numInputNodes, ranluxcl_state_t *rst) { float4 rnr1 = ranluxcl32(rst); float4 rnr2 = ranluxcl32(rst); float4 rnr3 = ranluxcl32(rst); float4 rnr4 = ranluxcl32(rst); self->w.s0 = numInputNodes >= 1 ? rnr1.s0 * 20.0f - 10.0f : 0.0f; self->w.s1 = numInputNodes >= 2 ? rnr1.s1 * 20.0f - 10.0f : 0.0f; self->w.s2 = numInputNodes >= 3 ? rnr1.s2 * 20.0f - 10.0f : 0.0f; self->w.s3 = numInputNodes >= 4 ? rnr1.s3 * 20.0f - 10.0f : 0.0f; self->w.s4 = numInputNodes >= 5 ? rnr2.s0 * 20.0f - 10.0f: 0.0f; self->w.s5 = numInputNodes >= 6 ? rnr2.s1 * 20.0f - 10.0f : 0.0f; self->w.s6 = numInputNodes >= 7 ? rnr2.s2 * 20.0f - 10.0f : 0.0f; self->w.s7 = numInputNodes >= 8 ? rnr2.s3 * 20.0f - 10.0f : 0.0f; self->w.s8 = numInputNodes >= 9 ? rnr3.s0 * 20.0f - 10.0f : 0.0f; self->w.s9 = numInputNodes >= 10 ? rnr3.s1 * 20.0f - 10.0f : 0.0f; self->w.sA = numInputNodes >= 11 ? rnr3.s2 * 20.0f - 10.0f : 0.0f; self->w.sB = numInputNodes >= 12 ? rnr3.s3 * 20.0f - 10.0f : 0.0f; self->w.sC = numInputNodes >= 13 ? rnr4.s0 * 20.0f - 10.0f : 0.0f; self->w.sD = numInputNodes >= 14 ? rnr4.s1 * 20.0f - 10.0f : 0.0f; self->w.sE = numInputNodes >= 15 ? rnr4.s2 * 20.0f - 10.0f : 0.0f; self->w.sF = numInputNodes >= 16 ? rnr4.s3 * 20.0f - 10.0f : 0.0f; self->bias = (ranluxcl32(rst).s0 * 20.0f - 10.0f) * numInputNodes/2.0f; return self; } /** * f(x) = sigmoid(sum(w[0]*x[0]...w[n-1]*x[n-1] - 7)) * f(x) = sigmoid(dot(w, x) - 7) * O(numInputs) */ float PerceptronNode_calc(PerceptronNode *self, float16 x) { float sum = 0.0f; sum += self->w.s0*x.s0; sum += self->w.s1*x.s1; sum += self->w.s2*x.s2; sum += self->w.s3*x.s3; sum += self->w.s4*x.s4; sum += self->w.s5*x.s5; sum += self->w.s6*x.s6; sum += self->w.s7*x.s7; sum += self->w.s8*x.s8; sum += self->w.s9*x.s9; sum += self->w.sA*x.sA; sum += self->w.sB*x.sB; sum += self->w.sC*x.sC; sum += self->w.sD*x.sD; sum += self->w.sE*x.sE; sum += self->w.sF*x.sF; return sigmoid(sum + self->bias); } // TODO Want coefficient and intercept Perceptron * Perceptron_init(Perceptron *self, PerceptronNode hiddenNodes[16], PerceptronNode outputNodes[48]) { int i = 0; for(i = 0; i < 16; i++) self->hiddenNodes[i] = hiddenNodes[i]; for(i = 0; i < 48; i++) self->outputNodes[i] = outputNodes[i]; return self; } Perceptron * Perceptron_init2(Perceptron *self, int numInputNodes) { int i = 0; for(i = 0; i < 16; i++) PerceptronNode_init2(&self->hiddenNodes[i], numInputNodes); for(i = 0; i < 48; i++) PerceptronNode_init2(&self->outputNodes[i], 16); return self; } Perceptron * Perceptron_init3(Perceptron *self, int numInputNodes, ranluxcl_state_t *rst) { int i = 0; for(i = 0; i < 16; i++) PerceptronNode_init3(&self->hiddenNodes[i], numInputNodes, rst); for(i = 0; i < 48; i++) PerceptronNode_init3(&self->outputNodes[i], 16, rst); return self; } /** * O(numHiddenNodes + numOutputNodes) */ void Perceptron_calc(Perceptron *self, float16 x, int outNodeIndex, float y[3]) { float16 hiddenNodeVals; // Calculate hidden nodes followed by output nodes hiddenNodeVals.s0 = PerceptronNode_calc(&self->hiddenNodes[0], x); hiddenNodeVals.s1 = PerceptronNode_calc(&self->hiddenNodes[1], x); hiddenNodeVals.s2 = PerceptronNode_calc(&self->hiddenNodes[2], x); hiddenNodeVals.s3 = PerceptronNode_calc(&self->hiddenNodes[3], x); hiddenNodeVals.s4 = PerceptronNode_calc(&self->hiddenNodes[4], x); hiddenNodeVals.s5 = PerceptronNode_calc(&self->hiddenNodes[5], x); hiddenNodeVals.s6 = PerceptronNode_calc(&self->hiddenNodes[6], x); hiddenNodeVals.s7 = PerceptronNode_calc(&self->hiddenNodes[7], x); hiddenNodeVals.s8 = PerceptronNode_calc(&self->hiddenNodes[8], x); hiddenNodeVals.s9 = PerceptronNode_calc(&self->hiddenNodes[9], x); hiddenNodeVals.sA = PerceptronNode_calc(&self->hiddenNodes[10], x); hiddenNodeVals.sB = PerceptronNode_calc(&self->hiddenNodes[11], x); hiddenNodeVals.sC = PerceptronNode_calc(&self->hiddenNodes[12], x); hiddenNodeVals.sD = PerceptronNode_calc(&self->hiddenNodes[13], x); hiddenNodeVals.sE = PerceptronNode_calc(&self->hiddenNodes[14], x); hiddenNodeVals.sF = PerceptronNode_calc(&self->hiddenNodes[15], x); y[0] = PerceptronNode_calc(&self->outputNodes[outNodeIndex], hiddenNodeVals); y[1] = PerceptronNode_calc(&self->outputNodes[outNodeIndex + 1], hiddenNodeVals); y[2] = PerceptronNode_calc(&self->outputNodes[outNodeIndex + 2], hiddenNodeVals); } /** * O(numHiddenNodes + numOutputNodes) */ void Perceptron_calc2(Perceptron *self, float16 x, float y[48]) { int i = 0; float16 hiddenNodeVals; // Calculate hidden nodes followed by output nodes hiddenNodeVals.s0 = PerceptronNode_calc(&self->hiddenNodes[0], x); hiddenNodeVals.s1 = PerceptronNode_calc(&self->hiddenNodes[1], x); hiddenNodeVals.s2 = PerceptronNode_calc(&self->hiddenNodes[2], x); hiddenNodeVals.s3 = PerceptronNode_calc(&self->hiddenNodes[3], x); hiddenNodeVals.s4 = PerceptronNode_calc(&self->hiddenNodes[4], x); hiddenNodeVals.s5 = PerceptronNode_calc(&self->hiddenNodes[5], x); hiddenNodeVals.s6 = PerceptronNode_calc(&self->hiddenNodes[6], x); hiddenNodeVals.s7 = PerceptronNode_calc(&self->hiddenNodes[7], x); hiddenNodeVals.s8 = PerceptronNode_calc(&self->hiddenNodes[8], x); hiddenNodeVals.s9 = PerceptronNode_calc(&self->hiddenNodes[9], x); hiddenNodeVals.sA = PerceptronNode_calc(&self->hiddenNodes[10], x); hiddenNodeVals.sB = PerceptronNode_calc(&self->hiddenNodes[11], x); hiddenNodeVals.sC = PerceptronNode_calc(&self->hiddenNodes[12], x); hiddenNodeVals.sD = PerceptronNode_calc(&self->hiddenNodes[13], x); hiddenNodeVals.sE = PerceptronNode_calc(&self->hiddenNodes[14], x); hiddenNodeVals.sF = PerceptronNode_calc(&self->hiddenNodes[15], x); for(i = 0; i < 48; i++) y[i] = PerceptronNode_calc(&self->outputNodes[i], hiddenNodeVals); } #endif // PERCEPTRON_CL