root/trunk/libffado/src/libstreaming/amdtp/AmdtpBufferOps.h

Revision 849, 4.6 kB (checked in by ppalmers, 16 years ago)

temp commit of some SSE/memory optimizations

Line 
1 /*
2  * Copyright (C) 2005-2008 by Pieter Palmers
3  *
4  * This file is part of FFADO
5  * FFADO = Free Firewire (pro-)audio drivers for linux
6  *
7  * FFADO is based upon FreeBoB.
8  *
9  * This program is free software: you can redistribute it and/or modify
10  * it under the terms of the GNU General Public License as published by
11  * the Free Software Foundation, either version 3 of the License, or
12  * (at your option) any later version.
13  *
14  * This program is distributed in the hope that it will be useful,
15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17  * GNU General Public License for more details.
18  *
19  * You should have received a copy of the GNU General Public License
20  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
21  *
22  */
23
24 #ifndef __FFADO_AMDTPBUFFEROPS__
25 #define __FFADO_AMDTPBUFFEROPS__
26
27 #include <assert.h>
28 // to check for SSE etc...
29 #include "config.h"
30
31 #include <stdio.h>
32
33 #define AMDTP_FLOAT_MULTIPLIER 2147483392.0
34
35 #ifdef __SSE2__
36 //#if 0
37 #include <emmintrin.h>
38 #warning SSE2 build
39
40 //static inline void
41 void
42 convertFromFloatAndLabelAsMBLA(quadlet_t *data, unsigned int nb_elements)
43 {
44     // Work input until data reaches 16 byte alignment
45     while ((((unsigned long)data) & 0xF) && nb_elements > 0) {
46         float *in = (float *)data;
47         float v = (*in) * AMDTP_FLOAT_MULTIPLIER;
48         unsigned int tmp = ((int) v);
49         tmp = ( tmp >> 8 ) | 0x40000000;
50         *data = (quadlet_t)tmp;
51         data++;
52         nb_elements--;
53     }
54     assert((((unsigned long)data) & 0xF) == 0);
55
56     // now do the SSE based conversion/labeling
57     __m128i v_int;
58     __m128i label = _mm_set_epi32 (0x40000000, 0x40000000, 0x40000000, 0x40000000);
59     __m128 mult = _mm_set_ps(AMDTP_FLOAT_MULTIPLIER, AMDTP_FLOAT_MULTIPLIER, AMDTP_FLOAT_MULTIPLIER, AMDTP_FLOAT_MULTIPLIER);
60     __m128 v_float;
61     while(nb_elements >= 4) {
62         float *in = (float *)data;
63         // load the data into the vector unit
64         v_float = _mm_load_ps(in);
65         // multiply
66         v_float = _mm_mul_ps(v_float, mult);
67         // convert to signed integer
68         v_int = _mm_cvttps_epi32( v_float );
69         // shift right 8 bits
70         v_int = _mm_srli_epi32( v_int, 8 );
71         // label it
72         v_int = _mm_or_si128( v_int, label );
73         // store result
74         _mm_store_si128 ((__m128i*)data, v_int);
75
76         data += 4;
77         nb_elements -= 4;
78     }
79
80     // and do the remaining ones
81     while (nb_elements > 0) {
82         float *in = (float *)data;
83         float v = (*in) * AMDTP_FLOAT_MULTIPLIER;
84         unsigned int tmp = ((int) v);
85         tmp = ( tmp >> 8 ) | 0x40000000;
86         *data = (quadlet_t)tmp;
87         data++;
88         nb_elements--;
89     }
90 }
91
92 //static inline void
93 void
94 convertFromInt24AndLabelAsMBLA(quadlet_t *data, unsigned int nb_elements)
95 {
96     // Work input until data reaches 16 byte alignment
97     while ((((unsigned long)data) & 0xF) && nb_elements > 0) {
98         uint32_t in = (uint32_t)(*data);
99         *data = (quadlet_t)((in & 0x00FFFFFF) | 0x40000000);
100         data++;
101         nb_elements--;
102     }
103     assert((((unsigned long)data) & 0xF) == 0);
104
105     // now do the SSE based labeling
106     __m128i v;
107     const __m128i mask  = _mm_set_epi32 (0x00FFFFFF, 0x00FFFFFF, 0x00FFFFFF, 0x00FFFFFF);
108     const __m128i label = _mm_set_epi32 (0x40000000, 0x40000000, 0x40000000, 0x40000000);
109     while(nb_elements >= 4) {
110         // load the data into the vector unit
111         v = _mm_load_si128((__m128i*)data);
112         // mask
113         v = _mm_and_si128( v, mask );
114         // label
115         v = _mm_or_si128( v, label );
116         // store result
117         _mm_store_si128 ((__m128i*)data, v);
118
119         data += 4;
120         nb_elements -= 4;
121     }
122
123     // and do the remaining ones
124     while (nb_elements > 0) {
125         uint32_t in = (uint32_t)(*data);
126         *data = (quadlet_t)((in & 0x00FFFFFF) | 0x40000000);
127         data++;
128         nb_elements--;
129     }
130 }
131
132 #else
133
134 //static inline void
135 void
136 convertFromFloatAndLabelAsMBLA(quadlet_t *data, unsigned int nb_elements)
137 {
138     unsigned int i=0;
139     for(; i<nb_elements; i++) {
140         // don't care for overflow
141         float *in = (float *)data;
142         float v = (*in) * AMDTP_FLOAT_MULTIPLIER;
143         unsigned int tmp = ((int) v);
144         tmp = ( tmp >> 8 ) | 0x40000000;
145         *data = (quadlet_t)tmp;
146         data++;
147     }
148 }
149
150 //static inline void
151 void
152 convertFromInt24AndLabelAsMBLA(quadlet_t *data, unsigned int nb_elements)
153 {
154     unsigned int i=0;
155     for(; i<nb_elements; i++) {
156         uint32_t in = (uint32_t)(*data);
157         *data = (quadlet_t)((in & 0x00FFFFFF) | 0x40000000);
158         data++;
159     }
160 }
161
162 #endif // sse2
163
164 #endif /* __FFADO_AMDTPBUFFEROPS__ */
Note: See TracBrowser for help on using the browser.