# Monte Carlo Multilayer Perceptron Gaussian Mixture Model Distribution

## Explanation of the Monte Carlo Multilayer Perceptron Gaussian Mixture Model Distribution (MCMPGMMDist).

A Bayesian network node conditional probability distribution is represented by a Gaussian mixture models. A Gaussian mixture model is defined for every combination of states the parent nodes of the given node can take on. The parameters for each Gaussian mixture model is determined from a multilayer perceptron. As the mixture models for each set of parent states are not independent of each other, the neural network serves to also define and maintain the relationship of the mixture models of the conditional probability distribution.

Ultimately, the states of the parents of the given node are fed into the neural network. There is an input node for each parent node. The output nodes of the neural network then provide the parameters of the Gaussian mixture model for the given state set of the parent nodes. These parameters are the weight, mean, and standard deviation of each normal distribution in the Gaussian mixture model. This set of parameters define the Gaussian mixture model for the particular state set of the parent nodes of the given node.

The advantage of this methodology is that any continuous probability distribution can be approximated by the combination of the multilayer perceptron and the Gaussian mixture model. Furthermore, the relationship between the distributions for different parent state sets is maintained by the neural network. This approach is able to recognize how parents interact in the child node, and thus, we are able to train this distribution more easily and represent the node's conditional probability distribution more compactly.

Below are comparative estimates of the performance and memory footprint of this approach versus using a frequency table:

 Read Performance Memory Footprint O(1) Polynomial and/or Exponential O(1) 4k + 4 O(1) 4k + 77

There are three drawbacks for this approach. First, the mechanics of the relationship between the conditionals to produce the given node cannot be easily decomposed to component parts when the node is recursively expanded to a MEBN. Second, a Gaussian mixture model might provide more expressive power for the distribution than needed, leading to learning inefficiencies. A four parameter beta distribution might be better. Finally, observations cannot be explicitly added to the distribution as can be done with a frequency table.

## OpenCL Code for MCMPGMMDist

Perceptron.cl
/*
* Perceptron.cl
*
*  Created on: Jan 22, 2012
*      Author: scannon
*/

#ifndef PERCEPTRON_CL
#define PERCEPTRON_CL

//#include "ranluxcl.cl"

typedef struct __PerceptronNode__
{
float16 w;
float bias;
} PerceptronNode;

typedef struct __Perceptron__
{
PerceptronNode hiddenNodes[16];
PerceptronNode outputNodes[48];
} Perceptron;

/**
* O(1)
* horizontal asymptotes at 0 and 1
* sigmoid(0) = 0.5
*/
float sigmoid(float x)
{
return 1.0f / (1.0f + exp(-1.0f * x));
}

PerceptronNode * PerceptronNode_init(PerceptronNode *self, float16 w, float bias)
{
self->w = w;
self->bias = bias;

return self;
}

PerceptronNode * PerceptronNode_init2(PerceptronNode *self, int numInputNodes)
{
self->w.s0 = numInputNodes >= 1 ? 1.0f : 0.0f;
self->w.s1 = numInputNodes >= 2 ? 1.0f : 0.0f;
self->w.s2 = numInputNodes >= 3 ? 1.0f : 0.0f;
self->w.s3 = numInputNodes >= 4 ? 1.0f : 0.0f;
self->w.s4 = numInputNodes >= 5 ? 1.0f : 0.0f;
self->w.s5 = numInputNodes >= 6 ? 1.0f : 0.0f;
self->w.s6 = numInputNodes >= 7 ? 1.0f : 0.0f;
self->w.s7 = numInputNodes >= 8 ? 1.0f : 0.0f;
self->w.s8 = numInputNodes >= 9 ? 1.0f : 0.0f;
self->w.s9 = numInputNodes >= 10 ? 1.0f : 0.0f;
self->w.sA = numInputNodes >= 11 ? 1.0f : 0.0f;
self->w.sB = numInputNodes >= 12 ? 1.0f : 0.0f;
self->w.sC = numInputNodes >= 13 ? 1.0f : 0.0f;
self->w.sD = numInputNodes >= 14 ? 1.0f : 0.0f;
self->w.sE = numInputNodes >= 15 ? 1.0f : 0.0f;
self->w.sF = numInputNodes >= 16 ? 1.0f : 0.0f;
self->bias = -1.0f * numInputNodes/2.0f;

return self;
}

PerceptronNode * PerceptronNode_init3(PerceptronNode *self,
int numInputNodes,
ranluxcl_state_t *rst)
{
float4 rnr1 = ranluxcl32(rst);
float4 rnr2 = ranluxcl32(rst);
float4 rnr3 = ranluxcl32(rst);
float4 rnr4 = ranluxcl32(rst);

self->w.s0 = numInputNodes >= 1 ? rnr1.s0 * 20.0f - 10.0f : 0.0f;
self->w.s1 = numInputNodes >= 2 ? rnr1.s1 * 20.0f - 10.0f : 0.0f;
self->w.s2 = numInputNodes >= 3 ? rnr1.s2 * 20.0f - 10.0f : 0.0f;
self->w.s3 = numInputNodes >= 4 ? rnr1.s3 * 20.0f - 10.0f : 0.0f;
self->w.s4 = numInputNodes >= 5 ? rnr2.s0  * 20.0f - 10.0f: 0.0f;
self->w.s5 = numInputNodes >= 6 ? rnr2.s1 * 20.0f - 10.0f : 0.0f;
self->w.s6 = numInputNodes >= 7 ? rnr2.s2 * 20.0f - 10.0f : 0.0f;
self->w.s7 = numInputNodes >= 8 ? rnr2.s3 * 20.0f - 10.0f : 0.0f;
self->w.s8 = numInputNodes >= 9 ? rnr3.s0 * 20.0f - 10.0f : 0.0f;
self->w.s9 = numInputNodes >= 10 ? rnr3.s1 * 20.0f - 10.0f : 0.0f;
self->w.sA = numInputNodes >= 11 ? rnr3.s2 * 20.0f - 10.0f : 0.0f;
self->w.sB = numInputNodes >= 12 ? rnr3.s3 * 20.0f - 10.0f : 0.0f;
self->w.sC = numInputNodes >= 13 ? rnr4.s0 * 20.0f - 10.0f : 0.0f;
self->w.sD = numInputNodes >= 14 ? rnr4.s1 * 20.0f - 10.0f : 0.0f;
self->w.sE = numInputNodes >= 15 ? rnr4.s2 * 20.0f - 10.0f : 0.0f;
self->w.sF = numInputNodes >= 16 ? rnr4.s3 * 20.0f - 10.0f : 0.0f;
self->bias = (ranluxcl32(rst).s0 * 20.0f - 10.0f) * numInputNodes/2.0f;

return self;
}

/**
* f(x) = sigmoid(sum(w[0]*x[0]...w[n-1]*x[n-1] - 7))
* f(x) = sigmoid(dot(w, x) - 7)
* O(numInputs)
*/
float PerceptronNode_calc(PerceptronNode *self, float16 x)
{
float sum = 0.0f;

sum += self->w.s0*x.s0;
sum += self->w.s1*x.s1;
sum += self->w.s2*x.s2;
sum += self->w.s3*x.s3;
sum += self->w.s4*x.s4;
sum += self->w.s5*x.s5;
sum += self->w.s6*x.s6;
sum += self->w.s7*x.s7;
sum += self->w.s8*x.s8;
sum += self->w.s9*x.s9;
sum += self->w.sA*x.sA;
sum += self->w.sB*x.sB;
sum += self->w.sC*x.sC;
sum += self->w.sD*x.sD;
sum += self->w.sE*x.sE;
sum += self->w.sF*x.sF;

return sigmoid(sum + self->bias);
}

// TODO Want coefficient and intercept
Perceptron * Perceptron_init(Perceptron *self,
PerceptronNode hiddenNodes[16],
PerceptronNode outputNodes[48])
{
int i = 0;

for(i = 0; i < 16; i++)
self->hiddenNodes[i] = hiddenNodes[i];
for(i = 0; i < 48; i++)
self->outputNodes[i] = outputNodes[i];

return self;
}

Perceptron * Perceptron_init2(Perceptron *self, int numInputNodes)
{
int i = 0;

for(i = 0; i < 16; i++)
PerceptronNode_init2(&self->hiddenNodes[i], numInputNodes);
for(i = 0; i < 48; i++)
PerceptronNode_init2(&self->outputNodes[i], 16);

return self;
}

Perceptron * Perceptron_init3(Perceptron *self, int numInputNodes, ranluxcl_state_t *rst)
{
int i = 0;

for(i = 0; i < 16; i++)
PerceptronNode_init3(&self->hiddenNodes[i], numInputNodes, rst);
for(i = 0; i < 48; i++)
PerceptronNode_init3(&self->outputNodes[i], 16, rst);

return self;
}

/**
* O(numHiddenNodes + numOutputNodes)
*/
void Perceptron_calc(Perceptron *self, float16 x, int outNodeIndex, float y[3])
{
float16 hiddenNodeVals;

// Calculate hidden nodes followed by output nodes
hiddenNodeVals.s0 = PerceptronNode_calc(&self->hiddenNodes[0], x);
hiddenNodeVals.s1 = PerceptronNode_calc(&self->hiddenNodes[1], x);
hiddenNodeVals.s2 = PerceptronNode_calc(&self->hiddenNodes[2], x);
hiddenNodeVals.s3 = PerceptronNode_calc(&self->hiddenNodes[3], x);
hiddenNodeVals.s4 = PerceptronNode_calc(&self->hiddenNodes[4], x);
hiddenNodeVals.s5 = PerceptronNode_calc(&self->hiddenNodes[5], x);
hiddenNodeVals.s6 = PerceptronNode_calc(&self->hiddenNodes[6], x);
hiddenNodeVals.s7 = PerceptronNode_calc(&self->hiddenNodes[7], x);
hiddenNodeVals.s8 = PerceptronNode_calc(&self->hiddenNodes[8], x);
hiddenNodeVals.s9 = PerceptronNode_calc(&self->hiddenNodes[9], x);
hiddenNodeVals.sA = PerceptronNode_calc(&self->hiddenNodes[10], x);
hiddenNodeVals.sB = PerceptronNode_calc(&self->hiddenNodes[11], x);
hiddenNodeVals.sC = PerceptronNode_calc(&self->hiddenNodes[12], x);
hiddenNodeVals.sD = PerceptronNode_calc(&self->hiddenNodes[13], x);
hiddenNodeVals.sE = PerceptronNode_calc(&self->hiddenNodes[14], x);
hiddenNodeVals.sF = PerceptronNode_calc(&self->hiddenNodes[15], x);
y[0] = PerceptronNode_calc(&self->outputNodes[outNodeIndex], hiddenNodeVals);
y[1] = PerceptronNode_calc(&self->outputNodes[outNodeIndex + 1],
hiddenNodeVals);
y[2] = PerceptronNode_calc(&self->outputNodes[outNodeIndex + 2],
hiddenNodeVals);
}

/**
* O(numHiddenNodes + numOutputNodes)
*/
void Perceptron_calc2(Perceptron *self, float16 x, float y[48])
{
int i = 0;
float16 hiddenNodeVals;

// Calculate hidden nodes followed by output nodes
hiddenNodeVals.s0 = PerceptronNode_calc(&self->hiddenNodes[0], x);
hiddenNodeVals.s1 = PerceptronNode_calc(&self->hiddenNodes[1], x);
hiddenNodeVals.s2 = PerceptronNode_calc(&self->hiddenNodes[2], x);
hiddenNodeVals.s3 = PerceptronNode_calc(&self->hiddenNodes[3], x);
hiddenNodeVals.s4 = PerceptronNode_calc(&self->hiddenNodes[4], x);
hiddenNodeVals.s5 = PerceptronNode_calc(&self->hiddenNodes[5], x);
hiddenNodeVals.s6 = PerceptronNode_calc(&self->hiddenNodes[6], x);
hiddenNodeVals.s7 = PerceptronNode_calc(&self->hiddenNodes[7], x);
hiddenNodeVals.s8 = PerceptronNode_calc(&self->hiddenNodes[8], x);
hiddenNodeVals.s9 = PerceptronNode_calc(&self->hiddenNodes[9], x);
hiddenNodeVals.sA = PerceptronNode_calc(&self->hiddenNodes[10], x);
hiddenNodeVals.sB = PerceptronNode_calc(&self->hiddenNodes[11], x);
hiddenNodeVals.sC = PerceptronNode_calc(&self->hiddenNodes[12], x);
hiddenNodeVals.sD = PerceptronNode_calc(&self->hiddenNodes[13], x);
hiddenNodeVals.sE = PerceptronNode_calc(&self->hiddenNodes[14], x);
hiddenNodeVals.sF = PerceptronNode_calc(&self->hiddenNodes[15], x);
for(i = 0; i < 48; i++)
y[i] = PerceptronNode_calc(&self->outputNodes[i], hiddenNodeVals);
}

#endif	// PERCEPTRON_CL
GMM.cl
/*
* GMM.cl
*
*  Created on: Jan 7, 2012
*      Author: scannon
*/

#ifndef GMM_CL
#define GMM_CL

//#include "ranluxcl.cl"
//#include "Perceptron.cl"

struct __GMM__;
typedef struct __GMM__
{
// 1 input per conditional variable
// 3 outputs per normal:  mean, standard deviation, and weight
Perceptron *perceptron;
struct __GMM__ * conditionals[16];
int numConditionals;
char sampled;
float value;
} GMM;

GMM * GMM__init(GMM *self,
Perceptron *perceptron,
GMM * conditionals[16],
int numConditionals)
{
int i = 0;

if(numConditionals > 16 || numConditionals < 0)
return 0;

self->perceptron = perceptron;
for(i = 0; i < numConditionals; i++)
self->conditionals[i] = conditionals[i];
self->numConditionals = numConditionals;
self->sampled = 0;

return self;
}

/**
* O(O(Perceptron_calc()) + 16 + 16 + O(ranluxcl32norm())) => O(1)
* M(sizeof(Perceptron) + sizeof(ptr)*16 + sizeof(int) + sizeof(char) + sizeof(float)) => M(4k + 77)
*/
float GMM_sample(GMM *self, ranluxcl_state_t *rst)
{
float normParams[48];
int outNodeIndex = 0;
float4 ranluxRet;
float mean = 0.0f;
float sd = 0.0f;
float weightSum = 0.0f;
float16 cVals;
int i = 0;

if(self->sampled)
return self->value;

// Sample parents first
cVals.s0 = self->numConditionals >= 1 ? GMM_sample(self->conditionals[0], rst) : 0.0f;
cVals.s1 = self->numConditionals >= 2 ? GMM_sample(self->conditionals[1], rst) : 0.0f;
cVals.s2 = self->numConditionals >= 3 ? GMM_sample(self->conditionals[2], rst) : 0.0f;
cVals.s3 = self->numConditionals >= 4 ? GMM_sample(self->conditionals[3], rst) : 0.0f;
cVals.s4 = self->numConditionals >= 5 ? GMM_sample(self->conditionals[4], rst) : 0.0f;
cVals.s5 = self->numConditionals >= 6 ? GMM_sample(self->conditionals[5], rst) : 0.0f;
cVals.s6 = self->numConditionals >= 7 ? GMM_sample(self->conditionals[6], rst) : 0.0f;
cVals.s7 = self->numConditionals >= 8 ? GMM_sample(self->conditionals[7], rst) : 0.0f;
cVals.s8 = self->numConditionals >= 9 ? GMM_sample(self->conditionals[8], rst) : 0.0f;
cVals.s9 = self->numConditionals >= 10 ? GMM_sample(self->conditionals[9], rst) : 0.0f;
cVals.sA = self->numConditionals >= 11 ? GMM_sample(self->conditionals[10], rst) : 0.0f;
cVals.sB = self->numConditionals >= 12 ? GMM_sample(self->conditionals[11], rst) : 0.0f;
cVals.sC = self->numConditionals >= 13 ? GMM_sample(self->conditionals[12], rst) : 0.0f;
cVals.sD = self->numConditionals >= 14 ? GMM_sample(self->conditionals[13], rst) : 0.0f;
cVals.sE = self->numConditionals >= 15 ? GMM_sample(self->conditionals[14], rst) : 0.0f;
cVals.sF = self->numConditionals >= 16 ? GMM_sample(self->conditionals[15], rst) : 0.0f;

// Determine weights, means, and standard deviations from conditionals
Perceptron_calc2(self->perceptron, cVals, normParams);

// Determine sum of weight ratios
weightSum = 0.0f;
for(i = 0; i < 16; i++)
weightSum += normParams[i*3 + 2];

// Randomly choose normal from uniform distribution (0 to 1 exclusive)
ranluxRet = ranluxcl32(rst);
outNodeIndex = 0;
while(ranluxRet.x > 0)
{
ranluxRet.x -= normParams[outNodeIndex*3 + 2]/weightSum;
outNodeIndex++;
}
outNodeIndex--;

// Sample normal distributions
mean = normParams[3*outNodeIndex + 0];
sd = normParams[3*outNodeIndex + 1];
self->value = ranluxcl32norm(rst).x*sd + mean;
self->sampled = 1;

return self->value;
}

#endif	// GMM_CL
test.cl
/*
* test.cl
*
*  Created on: Jan 22, 2012
*      Author: scannon
*/

#ifndef TEST_CL
#define TEST_CL

//#include "ranluxcl.cl"
//#include "Perceptron.cl"

char test_PerceptronNode(ranluxcl_state_t *rst)
{
PerceptronNode pNode1;
float16 w1 = (float16)(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f);
float16 i1 = (float16)(1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f);
PerceptronNode_init(&pNode1, w1, -7.0f);
if(PerceptronNode_calc(&pNode1, i1) >= 0.001f)
return 1;

PerceptronNode pNode2;
float16 w2 = (float16)(1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f);
float16 i2 = (float16)(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f);
PerceptronNode_init(&pNode2, w2, -7.0f);
if(PerceptronNode_calc(&pNode2, i2) >= 0.001f)
return 2;

PerceptronNode pNode3;
float16 w3 = (float16)(1.0f, 1.0f, 1.0f, 1.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f);
float16 i3 = (float16)(0.75f, 0.75f, 0.75f, 0.75f, 0.75f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f);
PerceptronNode_init(&pNode3, w3, -2.0f);
if(PerceptronNode_calc(&pNode3, i3) - 0.731f > 0.001f ||
PerceptronNode_calc(&pNode3, i3) - 0.731f < -0.001f)
return 3;

PerceptronNode pNode4;
float16 i4 = (float16)(0.75f, 0.75f, 0.75f, 0.75f, 0.75f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f);
PerceptronNode_init2(&pNode4, 4);
if(PerceptronNode_calc(&pNode4, i4) - 0.731f > 0.001f ||
PerceptronNode_calc(&pNode4, i4) - 0.731f < -0.001f)
return 4;

PerceptronNode pNode5;
float16 w5 = (float16)(1.0f, 3.0f, 2.0f, 4.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f);
float16 i5 = (float16)(1.0f, 3.0f, 5.0f, 7.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f);
PerceptronNode_init(&pNode5, w5, -7.0f);
if(PerceptronNode_calc(&pNode5, i5) - 0.999f > 0.001f ||
PerceptronNode_calc(&pNode5, i5) - 0.999f < -0.001f)
return 5;

PerceptronNode pNode6;
PerceptronNode pNode7;
PerceptronNode_init3(&pNode6, 7, rst);
PerceptronNode_init3(&pNode7, 7, rst);
if(PerceptronNode_calc(&pNode6, i5) > 1 || PerceptronNode_calc(&pNode6, i5) < 0)
return 6;
if(PerceptronNode_calc(&pNode7, i5) > 1 || PerceptronNode_calc(&pNode7, i5) < 0)
return 6;
if(pNode6.w.s0 == pNode7.w.s0 &&
pNode6.w.s1 == pNode7.w.s1 &&
pNode6.w.s2 == pNode7.w.s2 &&
pNode6.w.s3 == pNode7.w.s3 &&
pNode6.w.s4 == pNode7.w.s4 &&
pNode6.w.s5 == pNode7.w.s5 &&
pNode6.w.s6 == pNode7.w.s6)
return 6;

PerceptronNode pNode8;
float16 i8 = (float16)(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f);
PerceptronNode_init2(&pNode8, 0);
if(PerceptronNode_calc(&pNode8, i8) - 0.5f > 0.001f ||
PerceptronNode_calc(&pNode8, i8) - 0.5f < -0.001f)
return 7;

return 0;
}

char test_Perceptron(ranluxcl_state_t *rst)
{
int i = 0;

Perceptron p1;
float16 i1 = (float16)(1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f);
float y1[48];
Perceptron_init2(&p1, 10);
Perceptron_calc2(&p1, i1, y1);
if(y1[0] - 0.999f > 0.001f ||
y1[0] - 0.999f < -0.001f)
return 11;
if(y1[47] - 0.999f > 0.001f ||
y1[47] - 0.999f < -0.001f)
return 11;

Perceptron p2;
PerceptronNode hiddenNodes[16];
PerceptronNode outputNodes[48];
float16 w2 = (float16)(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f);
float16 wh2a = (float16)(11.0f, 11.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f);
float16 wh2b = (float16)(5.0f, 5.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f);
float16 w02 = (float16)(11.0f, -15.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f);
PerceptronNode_init(&hiddenNodes[0], wh2a, -7.0f);
PerceptronNode_init(&hiddenNodes[1], wh2b, -7.0f);
for(i = 2; i < 16; i++)
PerceptronNode_init(&hiddenNodes[i], w2, -7.0f);
PerceptronNode_init(&outputNodes[0], w02, -7.0f);
for(i = 1; i < 48; i++)
PerceptronNode_init(&outputNodes[i], w2, -7.0f);
Perceptron_init(&p2, hiddenNodes, outputNodes);
float y[48];
float16 i2a = (float16)(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f);
float16 i2b = (float16)(1.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f);
float16 i2c = (float16)(0.0f, 1.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f);
float16 i2d = (float16)(1.0f, 1.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f);
Perceptron_calc2(&p2, i2a, y);
if(y[0] >= 0.15f)
return 12;
Perceptron_calc2(&p2, i2b, y);
if(y[0] <= 0.85f)
return 13;
Perceptron_calc2(&p2, i2c, y);
if(y[0] <= 0.85f)
return 14;
Perceptron_calc2(&p2, i2d, y);
if(y[0] >= 0.15f)
return 15;

return 0;
}

char test_GMM(ranluxcl_state_t *rst)
{
int numSamples = 1000;
int i = 0;
float mean = 0;
float sumX2 = 0;
float var = 0;
Perceptron p1;
Perceptron_init2(&p1, 0);
GMM node1;
GMM__init(&node1, &p1, 0, 0);
for(i = 0; i < numSamples; i++)
{
float sample = GMM_sample(&node1, rst);
mean += sample;
sumX2 += sample*sample;
node1.sampled = false;
}
mean /= numSamples;
var = sumX2/numSamples - mean*mean;
if(mean - 0.5f > 0.05f || mean - 0.5f < -0.05f)
return 21;
if(var - 0.25f > 0.05f || var - 0.25f < -0.05f)
return 22;

PerceptronNode hiddenNodes[16];
for(i = 0; i < 16; i++)
PerceptronNode_init2(&hiddenNodes[i], 0);
PerceptronNode outputNodes[48];
float16 w1 = (float16)(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f);
float bias[3] = {0.0f, -4.0f, 0.0f};
for(i = 0; i < 48; i++)
PerceptronNode_init(&outputNodes[i], w1, bias[i%3]);
Perceptron p2;
Perceptron_init(&p1, hiddenNodes, outputNodes);
Perceptron_init(&p2, hiddenNodes, outputNodes);
GMM node2;
GMM__init(&node1, &p1, 0, 0);
GMM__init(&node2, &p2, 0, 0);
Perceptron p3;
PerceptronNode hiddenNodes2[16];
for(i = 0; i < 16; i++)
PerceptronNode_init2(&hiddenNodes2[i], 2);
PerceptronNode outputNodes2[48];
float16 w2 = (float16)(1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f);
float bias2[3] = {-7.0f, -12.0f, 0.0f};
for(i = 0; i < 48; i++)
PerceptronNode_init(&outputNodes2[i], w2, bias2[i%3]);
Perceptron_init(&p3, hiddenNodes2, outputNodes2);
GMM node3;
GMM * parents[16] = {&node1, &node2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
GMM__init(&node3, &p3, parents, 2);
mean = 0;
sumX2 = 0;
for(i = 0; i < numSamples; i++)
{
float sample = GMM_sample(&node3, rst);
mean += sample;
sumX2 += sample*sample;
node3.sampled = false;
}
mean /= numSamples;
var = sumX2/numSamples - mean*mean;
if(mean - 0.73f > 0.05f || mean - 0.73f < -0.05f)
return 23;
if(var - 0.0003f > 0.0005f || var - 0.0003f < -0.0005f)
return 10000*var;//24;

return 0;
}

kernel void Kernel_Ranluxcl_Init(private uint ins,
global ranluxcl_state_t *ranluxcltab)
{
ranluxcl_initialization(ins, ranluxcltab);
}

__kernel void test_PerceptronNode_kernel(global ranluxcl_state_t *ranluxcltab,
__global char *result)
{
int gid = get_global_id(0);
ranluxcl_state_t ranluxclstate;

// Conduct tests
result[gid] = 0;
result[gid] = test_PerceptronNode(&ranluxclstate);

}

__kernel void test_Perceptron_kernel(global ranluxcl_state_t *ranluxcltab,
__global char *result)
{
int gid = get_global_id(0);
ranluxcl_state_t ranluxclstate;

// Conduct tests
result[gid] = 0;
result[gid] = test_Perceptron(&ranluxclstate);

}

__kernel void test_GMM_kernel(global ranluxcl_state_t *ranluxcltab,
__global char *result)
{
int gid = get_global_id(0);
ranluxcl_state_t ranluxclstate;

// Conduct tests
result[gid] = 0;
result[gid] = test_GMM(&ranluxclstate);

}

#endif	// TEST_CL

## Scratchwork Notes

Perceptron_calc:

CPT_calc:

• Polynomial and/or Exponential memory footprint

GMM_sample: (a single node in a Bayesian network)

x[1]

x[1, {'y':2, 'z':3}]