C++ to sse2 intrinsic

how do i achieve the same result using sse2 intrinsic in the first 'for'loop?
#include "stdafx.h"
#include <iostream>
#include <iomanip>
using namespace std;


int _tmain(int argc, _TCHAR* argv[])
{
short b[4][4]={1,1,1,1,2,2,2,2,3,3,3,3,4,4,4,4};
short a[4][4];
short c[4][4];
int j;
for ( j =0;j<4;j++)
{
a[0][j]=b[0][j]+b[3][j];
a[1][j]=b[1][j]+b[2][j];
a[2][j]=b[1][j]-b[2][j];
a[3][j]=b[0][j]-b[3][j];
// step 2
c[0][j]=a[0][j]+a[1][j];
c[1][j]=a[2][j]+(a[3][j]<<1);
c[2][j]=a[0][j]-a[1][j];
c[3][j]=a[3][j]-(a[2][j]<<1);
}

for (int i = 0; i < 4; i++)
{
for (int j = 0; j < 4; j++)
cout << a[i][j] << " ";
cout << endl;
}
cout<<endl;
for (int i = 0; i < 4; i++)
{
for (int j = 0; j < 4; j++)
cout << c[i][j] << " ";
cout << endl;
}

return 0;
}
Topic archived. No new replies allowed.