Changeset 849
- Timestamp:
- 01/16/08 14:19:48 (16 years ago)
- Files:
-
- trunk/libffado/SConstruct (modified) (1 diff)
- trunk/libffado/src/libstreaming/amdtp/AmdtpBufferOps.h (added)
- trunk/libffado/src/libstreaming/amdtp/AmdtpTransmitStreamProcessor.cpp (modified) (10 diffs)
- trunk/libffado/src/libstreaming/amdtp/AmdtpTransmitStreamProcessor.h (modified) (1 diff)
- trunk/libffado/src/libutil/ByteSwap.h (modified) (5 diffs)
- trunk/libffado/tests/test-bufferops.cpp (modified) (5 diffs)
Legend:
- Unmodified
- Added
- Removed
- Modified
- Copied
- Moved
trunk/libffado/SConstruct
r847 r849 330 330 print "Doing an optimized build..." 331 331 332 #>>!!!!!!!HACK!!!!!! 333 env.AppendUnique( CCFLAGS=["-msse2"] ) 334 env.AppendUnique( CFLAGS=["-msse2"] ) 335 #<<!!!!!!!HACK!!!!!! 332 336 333 337 env['REVISION'] = os.popen('svnversion .').read()[:-1] trunk/libffado/src/libstreaming/amdtp/AmdtpTransmitStreamProcessor.cpp
r847 r849 25 25 #include "AmdtpTransmitStreamProcessor.h" 26 26 #include "AmdtpPort.h" 27 #include "AmdtpBufferOps.h" 27 28 #include "../StreamProcessorManager.h" 28 29 #include "devicemanager.h" … … 38 39 39 40 #include "libutil/ByteSwap.h" 40 41 #define AMDTP_FLOAT_MULTIPLIER 2147483392.042 41 43 42 namespace Streaming … … 409 408 unsigned int nevents, unsigned int offset ) 410 409 { 410 // update the variable parts of the cache 411 411 updatePortCache(); 412 413 // encode audio data 414 // the data is stored in the original format (float, int). later on 415 // the complete buffer is converted to the correct type and labeled at once 416 muxAudioPorts((quadlet_t *)data, offset, nevents); 417 418 // label everything as MBLA audio since those are by far the most 419 // occurring. If we treat all as audio we can use efficient block 420 // processing. Afterwards we can correct wrong labels of other 421 // types. 412 422 switch(m_StreamProcessorManager.getAudioDataType()) { 413 423 case StreamProcessorManager::eADT_Int24: 414 encodeAudioPortsInt24((quadlet_t *)data, offset, nevents);424 convertFromInt24AndLabelAsMBLA(((quadlet_t *)data), nevents * m_dimension); 415 425 break; 416 426 case StreamProcessorManager::eADT_Float: 417 encodeAudioPortsFloat((quadlet_t *)data, offset, nevents);427 convertFromFloatAndLabelAsMBLA(((quadlet_t *)data), nevents * m_dimension); 418 428 break; 419 429 } 430 431 // do midi ports 420 432 encodeMidiPorts((quadlet_t *)data, offset, nevents); 433 // do endian conversion 421 434 byteSwapToBus(((quadlet_t *)data), nevents * m_dimension); 422 435 return true; … … 431 444 432 445 encodeAudioPortsSilence((quadlet_t *)data, offset, nevents); 446 convertFromInt24AndLabelAsMBLA(((quadlet_t *)data), nevents * m_dimension); 433 447 encodeMidiPortsSilence((quadlet_t *)data, offset, nevents); 434 448 byteSwapToBus(((quadlet_t *)data), nevents * m_dimension); … … 457 471 for (j = 0;j < nevents; j += 1) 458 472 { 459 *target_event = 0 x40000000;473 *target_event = 0; 460 474 target_event += m_dimension; 461 475 __builtin_prefetch(target_event, 1, 0); // prefetch events for write, no temporal locality … … 464 478 } 465 479 480 #ifdef __SSE2__ 481 //#if 0 482 #include <emmintrin.h> 483 #warning SSE2 build 484 466 485 /** 467 * @brief encodes all audio ports in the cache to events (float data)486 * @brief mux all audio ports to events 468 487 * @param data 469 488 * @param offset … … 471 490 */ 472 491 void 473 AmdtpTransmitStreamProcessor:: encodeAudioPortsFloat(quadlet_t *data,474 475 492 AmdtpTransmitStreamProcessor::muxAudioPorts(quadlet_t *data, 493 unsigned int offset, 494 unsigned int nevents) 476 495 { 477 496 unsigned int j; … … 479 498 unsigned int i; 480 499 481 for (i = 0; i < m_nb_audio_ports; i++) { 500 quadlet_t * client_buffers[4]; 501 quadlet_t tmp_values[4] __attribute__ ((aligned (16))); 502 503 // prepare the scratch buffer 504 assert(m_scratch_buffer_size_bytes > nevents * 4); 505 memset(m_scratch_buffer, 0, nevents * 4); 506 507 // this assumes that audio ports are sorted by position, 508 // and that there are no gaps 509 for (i = 0; i < m_nb_audio_ports-4; i += 4) { 510 struct _MBLA_port_cache *p; 511 512 // get the port buffers 513 for (j=0; j<4; j++) { 514 p = &(m_audio_ports.at(i+j)); 515 if(p->buffer && p->enabled) { 516 client_buffers[j] = (quadlet_t *) p->buffer; 517 client_buffers[j] += offset; 518 } else { 519 // if a port is disabled or has no valid 520 // buffer, use the scratch buffer (all zero's) 521 client_buffers[j] = (quadlet_t *) m_scratch_buffer; 522 } 523 } 524 525 // the base event for this position 526 target_event = (quadlet_t *)(data + i); 527 528 // process the events 529 for (j=0;j < nevents; j += 1) 530 { 531 // read the values 532 tmp_values[0] = *(client_buffers[0]); 533 tmp_values[1] = *(client_buffers[1]); 534 tmp_values[2] = *(client_buffers[2]); 535 tmp_values[3] = *(client_buffers[3]); 536 537 // convert to packed int 538 __m128i v_vals = *((__m128i*)tmp_values); 539 __m128i *target = (__m128i*)target_event; 540 541 // store the packed int 542 // (target misalignment is assumed since we don't know the m_dimension) 543 _mm_storeu_si128 (target, v_vals); 544 545 // increment the buffer pointers 546 client_buffers[0]++; 547 client_buffers[1]++; 548 client_buffers[2]++; 549 client_buffers[3]++; 550 551 // go to next target event position 552 target_event += m_dimension; 553 } 554 } 555 556 // do remaining ports 557 for (; i < m_nb_audio_ports; i++) { 482 558 struct _MBLA_port_cache &p = m_audio_ports.at(i); 483 559 target_event = (quadlet_t *)(data + i); 484 __builtin_prefetch(target_event, 1, 0); // prefetch events for write, no temporal locality485 560 assert(nevents + offset <= p.buffer_size ); 486 561 487 562 if(p.buffer && p.enabled) { 488 float *buffer = (float *)(p.buffer);563 quadlet_t *buffer = (quadlet_t *)(p.buffer); 489 564 buffer += offset; 490 __builtin_prefetch(buffer, 0, 0); // prefetch events for read, no temporal locality491 565 492 566 for (j = 0;j < nevents; j += 1) 493 567 { 494 // don't care for overflow 495 float v = (*buffer) * AMDTP_FLOAT_MULTIPLIER; 496 unsigned int tmp = ((int) v); 497 *target_event = ( tmp >> 8 ) | 0x40000000; 568 *target_event = *buffer; 498 569 buffer++; 499 __builtin_prefetch(buffer, 0, 0); // prefetch events for read, no temporal locality500 570 target_event += m_dimension; 501 __builtin_prefetch(target_event, 1, 0); // prefetch events for write, no temporal locality502 571 } 503 572 } else { 504 573 for (j = 0;j < nevents; j += 1) 505 574 { 506 *target_event = 0x 40000000;575 *target_event = 0x0; 507 576 target_event += m_dimension; 508 __builtin_prefetch(target_event, 1, 0); // prefetch events for write, no temporal locality 509 } 510 } 511 } 512 } 577 } 578 } 579 } 580 } 581 582 #else 513 583 514 584 /** 515 * @brief encodes all audio ports in the cache to events (int24 data)585 * @brief mux all audio ports to events 516 586 * @param data 517 587 * @param offset … … 519 589 */ 520 590 void 521 AmdtpTransmitStreamProcessor:: encodeAudioPortsInt24(quadlet_t *data,522 523 591 AmdtpTransmitStreamProcessor::muxAudioPorts(quadlet_t *data, 592 unsigned int offset, 593 unsigned int nevents) 524 594 { 525 595 unsigned int j; … … 530 600 struct _MBLA_port_cache &p = m_audio_ports.at(i); 531 601 target_event = (quadlet_t *)(data + i); 532 __builtin_prefetch(target_event, 1, 0); // prefetch events for write, no temporal locality533 602 assert(nevents + offset <= p.buffer_size ); 534 603 535 604 if(p.buffer && p.enabled) { 536 uint32_t *buffer = (uint32_t *)(p.buffer);605 quadlet_t *buffer = (quadlet_t *)(p.buffer); 537 606 buffer += offset; 538 __builtin_prefetch(buffer, 0, 0); // prefetch events for read, no temporal locality 539 540 for (j = 0; j < nevents; j += 1) 607 608 for (j = 0;j < nevents; j += 1) 541 609 { 542 *target_event = ((*buffer) & 0x00FFFFFF) | 0x40000000;610 *target_event = *buffer; 543 611 buffer++; 544 __builtin_prefetch(buffer, 0, 0); // prefetch events for read, no temporal locality545 546 612 target_event += m_dimension; 547 __builtin_prefetch(target_event, 1, 0); // prefetch events for write, no temporal locality548 613 } 549 614 } else { 550 615 for (j = 0;j < nevents; j += 1) 551 616 { 552 *target_event = 0x 40000000;617 *target_event = 0x0; 553 618 target_event += m_dimension; 554 __builtin_prefetch(target_event, 1, 0); // prefetch events for write, no temporal locality555 556 557 558 } 619 } 620 } 621 } 622 } 623 #endif 559 624 560 625 /** trunk/libffado/src/libstreaming/amdtp/AmdtpTransmitStreamProcessor.h
r833 r849 120 120 void encodeMidiPortsSilence(quadlet_t *data, unsigned int offset, unsigned int nevents); 121 121 void encodeMidiPorts(quadlet_t *data, unsigned int offset, unsigned int nevents); 122 void muxAudioPorts(quadlet_t *data, unsigned int offset, unsigned int nevents); 122 123 123 124 unsigned int getFDF(); trunk/libffado/src/libutil/ByteSwap.h
r847 r849 26 26 27 27 #include <netinet/in.h> 28 #include <endian.h> 28 29 #include <assert.h> 29 30 … … 33 34 #include <stdio.h> 34 35 36 #if __BYTE_ORDER == __BIG_ENDIAN 37 38 // no-op for big endian machines 39 static inline void 40 byteSwapToBus(quadlet_t *data, unsigned int nb_elements) 41 { 42 return; 43 } 44 45 static inline void 46 byteSwapFromBus(quadlet_t *data, unsigned int nb_elements) 47 { 48 return; 49 } 50 51 #else 52 35 53 #ifdef __SSE2__ 36 54 #include <emmintrin.h> 37 55 #warning SSE2 build 38 56 39 static inline void 57 //static inline void 58 void 40 59 byteSwapToBus(quadlet_t *data, unsigned int nb_elements) 41 60 { … … 85 104 } 86 105 87 static inline void 106 //static inline void 107 void 88 108 byteSwapFromBus(quadlet_t *data, unsigned int nb_elements) 89 109 { … … 109 129 __m128i v; 110 130 while(nb_elements >= 4) { 111 // prefetch the data for the next round112 __builtin_prefetch(data+128, 0, 0);113 114 131 // load the data into the vector unit 115 132 v = _mm_load_si128((__m128i*)data); … … 155 172 } 156 173 157 #endif 174 #endif // sse2 158 175 159 #endif 176 #endif // byte order 177 178 #endif // h trunk/libffado/tests/test-bufferops.cpp
r847 r849 27 27 28 28 #include "libutil/ByteSwap.h" 29 #include "libstreaming/amdtp/AmdtpBufferOps.h" 30 29 31 #include "libutil/SystemTimeSource.h" 30 32 #include <inttypes.h> 31 33 32 #include <emmintrin.h> 33 /* 34 void test() { 35 vSInt16 *in, *out; //must be 16 byte aligned 36 37 for( x = 0; x < array_bytes / sizeof( vSInt16); x++ ) 38 { 39 vSInt16 v = in[x]; //load 16 bytes 40 v = _mm_or_si128( _mm_slli_epi16( v, 8 ), _mm_srli_epi16( v, 8 ) ); //swap it 41 out[x] = v; //store it out 42 } 43 }*/ 44 45 #define NB_QUADLETS (4096 * 4096) 34 // 32M of test data 35 #define NB_QUADLETS (1024 * 1024 * 32) 46 36 #define NB_TESTS 10 47 int 48 main(int argc, char **argv) { 37 38 bool 39 testByteSwap(int nb_quadlets, int nb_tests) { 49 40 quadlet_t *buffer_1; 50 41 quadlet_t *buffer_ref; … … 57 48 setDebugLevel(DEBUG_LEVEL_NORMAL); 58 49 59 buffer_1 = new quadlet_t[ NB_QUADLETS];60 buffer_ref = new quadlet_t[ NB_QUADLETS];61 62 debugOutput(DEBUG_LEVEL_NORMAL,"Generating test data...\n");63 for (i=0; i< NB_QUADLETS; i++) {50 buffer_1 = new quadlet_t[nb_quadlets]; 51 buffer_ref = new quadlet_t[nb_quadlets]; 52 53 printMessage( "Generating test data...\n"); 54 for (i=0; i<nb_quadlets; i++) { 64 55 byte_t tmp = i & 0xFF; 65 56 buffer_1[i] = tmp << 24; … … 74 65 // do reference conversion 75 66 76 for (i=0; i< NB_QUADLETS; i++) {67 for (i=0; i<nb_quadlets; i++) { 77 68 buffer_ref[i] = htonl(buffer_1[i]); 78 69 } 79 70 80 debugOutput(DEBUG_LEVEL_NORMAL,"Performing byte-swap...\n");71 printMessage( "Performing byte-swap...\n"); 81 72 82 73 int test=0; 83 for (test=0; test< NB_TESTS; test++) {84 for (i=0; i< NB_QUADLETS; i++) {74 for (test=0; test<nb_tests; test++) { 75 for (i=0; i<nb_quadlets; i++) { 85 76 byte_t tmp = i & 0xFF; 86 77 buffer_1[i] = tmp << 24; … … 94 85 95 86 start = time.getCurrentTimeAsUsecs(); 96 byteSwapToBus(buffer_1, NB_QUADLETS);87 byteSwapToBus(buffer_1, nb_quadlets); 97 88 elapsed = time.getCurrentTimeAsUsecs() - start; 98 debugOutput(DEBUG_LEVEL_NORMAL," took %lluusec...\n", elapsed);89 printMessage( " took %lluusec...\n", elapsed); 99 90 100 91 } 101 92 102 93 // check 103 debugOutput(DEBUG_LEVEL_NORMAL,"Checking results...\n");94 printMessage( "Checking results...\n"); 104 95 bool all_ok=true; 105 for (i=0; i< NB_QUADLETS; i++) {96 for (i=0; i<nb_quadlets; i++) { 106 97 if (buffer_1[i] != buffer_ref[i]) { 107 debugOutput(DEBUG_LEVEL_NORMAL," bad result: %08X should be %08X\n",98 printMessage( " bad result: %08X should be %08X\n", 108 99 buffer_1[i], buffer_ref[i]); 109 100 all_ok=false; … … 116 107 delete[] buffer_1; 117 108 delete[] buffer_ref; 109 return all_ok; 110 } 111 112 bool 113 testInt24Label(int nb_quadlets, int nb_tests) { 114 quadlet_t *buffer_1; 115 quadlet_t *buffer_ref; 116 int i=0; 117 118 Util::SystemTimeSource time; 119 ffado_microsecs_t start; 120 ffado_microsecs_t elapsed; 121 122 setDebugLevel(DEBUG_LEVEL_MESSAGE); 123 124 buffer_1 = new quadlet_t[nb_quadlets]; 125 buffer_ref = new quadlet_t[nb_quadlets]; 126 127 printMessage( "Generating test data...\n"); 128 for (i=0; i<nb_quadlets; i++) { 129 byte_t tmp = i & 0xFF; 130 buffer_1[i] = tmp << 16; 131 tmp = (i + 1) & 0xFF; 132 buffer_1[i] |= tmp << 8; 133 tmp = (i + 2) & 0xFF; 134 buffer_1[i] |= tmp; 135 } 136 137 // do reference conversion 138 for (i=0; i<nb_quadlets; i++) { 139 buffer_ref[i] = buffer_1[i] | 0x40000000; 140 } 141 142 printMessage( "Performing AMDTP labeling...\n"); 143 144 int test=0; 145 for (test=0; test<nb_tests; test++) { 146 for (i=0; i<nb_quadlets; i++) { 147 byte_t tmp = i & 0xFF; 148 buffer_1[i] = tmp << 16; 149 tmp = (i + 1) & 0xFF; 150 buffer_1[i] |= tmp << 8; 151 tmp = (i + 2) & 0xFF; 152 buffer_1[i] |= tmp; 153 } 154 155 start = time.getCurrentTimeAsUsecs(); 156 convertFromInt24AndLabelAsMBLA(buffer_1, nb_quadlets); 157 elapsed = time.getCurrentTimeAsUsecs() - start; 158 printMessage( " took %lluusec...\n", elapsed); 159 } 160 161 // check 162 printMessage( "Checking results...\n"); 163 bool all_ok=true; 164 for (i=0; i<nb_quadlets; i++) { 165 if (buffer_1[i] != buffer_ref[i]) { 166 printMessage( " bad result: %08X should be %08X\n", 167 buffer_1[i], buffer_ref[i]); 168 all_ok=false; 169 } else { 170 //debugOutput(DEBUG_LEVEL_VERBOSE, "good result: %08X should be %08X\n", 171 // buffer_1[i], buffer_ref[i]); 172 } 173 } 174 175 delete[] buffer_1; 176 delete[] buffer_ref; 177 return all_ok; 178 } 179 180 bool 181 testFloatLabel(int nb_quadlets, int nb_tests) { 182 quadlet_t *buffer_1; 183 quadlet_t *buffer_ref; 184 quadlet_t *buffer_in; 185 float *buffer_float; 186 int i=0; 187 188 Util::SystemTimeSource time; 189 ffado_microsecs_t start; 190 ffado_microsecs_t elapsed; 191 192 setDebugLevel(DEBUG_LEVEL_MESSAGE); 193 194 buffer_1 = new quadlet_t[nb_quadlets]; 195 buffer_in = new quadlet_t[nb_quadlets]; 196 buffer_ref = new quadlet_t[nb_quadlets]; 197 buffer_float = new float[nb_quadlets]; 198 199 printMessage( "Generating test data...\n"); 200 for (i=0; i<nb_quadlets; i++) { 201 byte_t tmp = i & 0xFF; 202 buffer_in[i] = tmp << 16; 203 tmp = (i + 1) & 0xFF; 204 buffer_in[i] |= tmp << 8; 205 tmp = (i + 2) & 0xFF; 206 buffer_in[i] |= tmp; 207 208 // convert to float and normalize 209 buffer_float[i] = (float)(buffer_in[i]); 210 buffer_float[i] /= (float)(0x007FFFFF); // range: 0..2 211 buffer_float[i] -= 1.0; // range: 1..-1 212 213 // copy to input buffer 214 float *t = &(buffer_float[i]); 215 quadlet_t *v = (quadlet_t *)t; 216 buffer_1[i] = *v; 217 } 218 219 // do reference conversion 220 for (i=0; i<nb_quadlets; i++) { 221 float v = (buffer_float[i]) * AMDTP_FLOAT_MULTIPLIER; 222 unsigned int tmp = ((int) v); 223 tmp = ( tmp >> 8 ) | 0x40000000; 224 buffer_ref[i] = tmp; 225 } 226 227 printMessage( "Performing AMDTP labeling...\n"); 228 229 int test=0; 230 for (test=0; test<nb_tests; test++) { 231 for (i=0; i<nb_quadlets; i++) { 232 // copy float to input buffer 233 float *t = &(buffer_float[i]); 234 quadlet_t *v = (quadlet_t *)t; 235 buffer_1[i] = *v; 236 } 237 238 start = time.getCurrentTimeAsUsecs(); 239 convertFromFloatAndLabelAsMBLA(buffer_1, nb_quadlets); 240 elapsed = time.getCurrentTimeAsUsecs() - start; 241 printMessage( " took %lluusec...\n", elapsed); 242 } 243 244 // check 245 printMessage( "Checking results...\n"); 246 bool all_ok=true; 247 for (i=0; i<nb_quadlets; i++) { 248 if (buffer_1[i] != buffer_ref[i]) { 249 printMessage( " bad result: %08X should be %08X\n", 250 buffer_1[i], buffer_ref[i]); 251 all_ok=false; 252 } else { 253 //debugOutput(DEBUG_LEVEL_VERBOSE, "good result: %08X should be %08X\n", 254 // buffer_1[i], buffer_ref[i]); 255 } 256 } 257 258 delete[] buffer_1; 259 delete[] buffer_ref; 260 delete[] buffer_in; 261 delete[] buffer_float; 262 return all_ok; 263 } 264 265 int 266 main(int argc, char **argv) { 267 268 testByteSwap(NB_QUADLETS, NB_TESTS); 269 testInt24Label(NB_QUADLETS, NB_TESTS); 270 testFloatLabel(NB_QUADLETS, NB_TESTS); 118 271 119 272 return 0;