function weight = hamming_weight(x)
    if max(x) >= 2^32
        error('max(x) >= 2^32');
    end

    x = uint32(x);
    x = x - bitand(bitshift(x,-1), 1431655765);
    x = bitand(x, 858993459) + bitand(bitshift(x, -2), 858993459);
    x = bitand(x + bitshift(x, -4), 252645135);
    x = x + bitshift(x,-8);
    x = x + bitshift(x,-16);
%    x = x + bitshift(x,-32);

    
    
    weight = bitand(x, 127);

end

% % Expression 	Binary 	Decimal 	Comment
% % A 	01 10 11 00 10 11 10 10 		The original number
% % B = A & 01 01 01 01 01 01 01 01 	01 00 01 00 00 01 00 00 	1,0,1,0,0,1,0,0 	every other bit from A
% % C = (A >> 1) & 01 01 01 01 01 01 01 01 	00 01 01 00 01 01 01 01 	0,1,1,0,1,1,1,1 	the remaining bits from A
% % D = B + C 	01 01 10 00 01 10 01 01 	1,1,2,0,1,2,1,1 	list giving # of 1s in each 2-bit piece of A
% % E = D & 0011 0011 0011 0011 	0001 0000 0010 0001 	1,0,2,1 	every other count from D
% % F = (D >> 2) & 0011 0011 0011 0011 	0001 0010 0001 0001 	1,2,1,1 	the remaining counts from D
% % G = E + F 	0010 0010 0011 0010 	2,2,3,2 	list giving # of 1s in each 4-bit piece of A
% % H = G & 00001111 00001111 	00000010 00000010 	2,2 	every other count from G
% % I = (G >> 4) & 00001111 00001111 	00000010 00000011 	2,3 	the remaining counts from G
% % J = H + I 	00000100 00000101 	4,5 	list giving # of 1s in each 8-bit piece of A
% % K = J & 0000000011111111 	0000000000000101 	5 	every other count from J
% % L = (J >> 8) & 0000000011111111 	0000000000000100 	4 	the remaining counts from J
% % M = K + L 	0000000000001001 	9 	the final answer










% % 
% % //types and constants used in the functions below
% %  
% % typedef unsigned __int64 uint64;  //assume this gives 64-bits
% % const uint64 m1  = 0x5555555555555555; //binary: 0101...
% % const uint64 m2  = 0x3333333333333333; //binary: 00110011..
% % const uint64 m4  = 0x0f0f0f0f0f0f0f0f; //binary:  4 zeros,  4 ones ...
% % const uint64 m8  = 0x00ff00ff00ff00ff; //binary:  8 zeros,  8 ones ...
% % const uint64 m16 = 0x0000ffff0000ffff; //binary: 16 zeros, 16 ones ...
% % const uint64 m32 = 0x00000000ffffffff; //binary: 32 zeros, 32 ones
% % const uint64 hff = 0xffffffffffffffff; //binary: all ones
% % const uint64 h01 = 0x0101010101010101; //the sum of 256 to the power of 0,1,2,3...
% %  
% % //This is a naive implementation, shown for comparison,
% % //and to help in understanding the better functions.
% % //It uses 24 arithmetic operations (shift, add, and).
% % int popcount_1(uint64 x) {
% %     x = (x & m1 ) + ((x >>  1) & m1 ); //put count of each  2 bits into those  2 bits 
% %     x = (x & m2 ) + ((x >>  2) & m2 ); //put count of each  4 bits into those  4 bits 
% %     x = (x & m4 ) + ((x >>  4) & m4 ); //put count of each  8 bits into those  8 bits 
% %     x = (x & m8 ) + ((x >>  8) & m8 ); //put count of each 16 bits into those 16 bits 
% %     x = (x & m16) + ((x >> 16) & m16); //put count of each 32 bits into those 32 bits 
% %     x = (x & m32) + ((x >> 32) & m32); //put count of each 64 bits into those 64 bits 
% %     return x;
% % }
% %  
% % //This uses fewer arithmetic operations than any other known  
% % //implementation on machines with slow multiplication.
% % //It uses 17 arithmetic operations.
% % int popcount_2(uint64 x) {
% %     x -= (x >> 1) & m1;             //put count of each 2 bits into those 2 bits
% %     x = (x & m2) + ((x >> 2) & m2); //put count of each 4 bits into those 4 bits 
% %     x = (x + (x >> 4)) & m4;        //put count of each 8 bits into those 8 bits 
% %     x += x >>  8;  //put count of each 16 bits into their lowest 8 bits
% %     x += x >> 16;  //put count of each 32 bits into their lowest 8 bits
% %     x += x >> 32;  //put count of each 64 bits into their lowest 8 bits
% %     return x & 0x7f;
% % }
% %  
% % //This uses fewer arithmetic operations than any other known  
% % //implementation on machines with fast multiplication.
% % //It uses 12 arithmetic operations, one of which is a multiply.
% % int popcount_3(uint64 x) {
% %     x -= (x >> 1) & m1;             //put count of each 2 bits into those 2 bits
% %     x = (x & m2) + ((x >> 2) & m2); //put count of each 4 bits into those 4 bits 
% %     x = (x + (x >> 4)) & m4;        //put count of each 8 bits into those 8 bits 
% %     return (x * h01)>>56;  //returns left 8 bits of x + (x<<8) + (x<<16) + (x<<24) + ... 