Changeset 887
- Timestamp:
- 02/24/08 07:40:16 (13 years ago)
- Files:
-
- trunk/libffado/config.h.in (modified) (2 diffs)
- trunk/libffado/SConstruct (modified) (1 diff)
- trunk/libffado/src/libstreaming/amdtp/AmdtpTransmitStreamProcessor.cpp (modified) (5 diffs)
Legend:
- Unmodified
- Added
- Removed
- Modified
- Copied
- Moved
trunk/libffado/config.h.in
r884 r887 26 26 /* Define to the api version */ 27 27 #define FFADO_API_VERSION $FFADO_API_VERSION 28 29 /* Define to 1 if SSE assembly is available. */30 // broken31 //#define USE_SSE $USE_SSE32 #define USE_SSE 033 //#define USE_SSE2 $USE_SSE234 #define USE_SSE2 035 28 36 29 #define CACHEDIR "~/.ffado" … … 115 108 #define AMDTP_MAX_CYCLES_TO_TRANSMIT_EARLY 6 116 109 110 // ensure that the AMDTP SP clips all float values to [-1.0..1.0] 111 #define AMDTP_CLIP_FLOATS 1 112 113 117 114 #define MOTU_TRANSMIT_TRANSFER_DELAY (11776U) 118 115 trunk/libffado/SConstruct
r886 r887 24 24 25 25 FFADO_API_VERSION="7" 26 FFADO_VERSION="1.999.1 5"26 FFADO_VERSION="1.999.16" 27 27 28 28 import os trunk/libffado/src/libstreaming/amdtp/AmdtpTransmitStreamProcessor.cpp
r866 r887 529 529 const __m128 mult = _mm_set_ps(AMDTP_FLOAT_MULTIPLIER, AMDTP_FLOAT_MULTIPLIER, AMDTP_FLOAT_MULTIPLIER, AMDTP_FLOAT_MULTIPLIER); 530 530 531 #if AMDTP_CLIP_FLOATS 532 const __m128 v_max = _mm_set_ps(1.0, 1.0, 1.0, 1.0); 533 const __m128 v_min = _mm_set_ps(-1.0, -1.0, -1.0, -1.0); 534 #endif 535 531 536 // this assumes that audio ports are sorted by position, 532 537 // and that there are no gaps … … 563 568 __m128i *target = (__m128i*)target_event; 564 569 __m128i v_int; 570 571 // clip 572 #if AMDTP_CLIP_FLOATS 573 // implement sample<min?min:sample 574 // and sample>max?max:sample 575 // we use separate masks since that allows the 576 // compiler/cpu to do more out-of-order optimization 577 578 // is any of the pieces less than the minimum? 579 // or larger than the maximum? 580 __m128 mask1 = _mm_cmplt_ps(v_float, v_min); 581 __m128 mask2 = _mm_cmpgt_ps(v_float, v_max); 582 // clip the values that need to be clipped 583 // pass the values that don't 584 v_float = _mm_or_ps(_mm_andnot_ps(mask1, v_float), _mm_and_ps(mask1, v_min)); 585 v_float = _mm_or_ps(_mm_andnot_ps(mask2, v_float), _mm_and_ps(mask2, v_max)); 586 #endif 565 587 566 588 // multiply … … 616 638 tmp_values[3] = *buffer; 617 639 buffer++; 618 640 619 641 // now do the SSE based conversion/labeling 620 642 __m128 v_float = *((__m128*)tmp_values); 621 643 __m128i v_int; 622 644 645 #if AMDTP_CLIP_FLOATS 646 // implement sample<min?min:sample 647 // and sample>max?max:sample 648 // we use separate masks since that allows the 649 // compiler/cpu to do more out-of-order optimization 650 651 // is any of the pieces less than the minimum? 652 // or larger than the maximum? 653 __m128 mask1 = _mm_cmplt_ps(v_float, v_min); 654 __m128 mask2 = _mm_cmpgt_ps(v_float, v_max); 655 // clip the values that need to be clipped 656 // pass the values that don't 657 v_float = _mm_or_ps(_mm_andnot_ps(mask1, v_float), _mm_and_ps(mask1, v_min)); 658 v_float = _mm_or_ps(_mm_andnot_ps(mask2, v_float), _mm_and_ps(mask2, v_max)); 659 #endif 660 623 661 // multiply 624 662 v_float = _mm_mul_ps(v_float, mult); … … 653 691 for(;j < nevents; j += 1) { 654 692 float *in = (float *)buffer; 693 #if AMDTP_CLIP_FLOATS 694 if(*in > 1.0) *in=1.0; 695 if(*in < -1.0) *in=-1.0; 696 #endif 655 697 float v = (*in) * AMDTP_FLOAT_MULTIPLIER; 656 698 unsigned int tmp = ((int) v); … … 898 940 { 899 941 float *in = (float *)buffer; 942 #if AMDTP_CLIP_FLOATS 943 if(*in > 1.0) *in=1.0; 944 if(*in < -1.0) *in=-1.0; 945 #endif 900 946 float v = (*in) * AMDTP_FLOAT_MULTIPLIER; 901 947 unsigned int tmp = ((int) v);