root/trunk/libffado/src/libutil/ByteSwap.h

Revision 1152, 5.8 kB (checked in by holin, 13 years ago)

fix optimized compile

Line 
1 /*
2  * Copyright (C) 2005-2008 by Pieter Palmers
3  *
4  * This file is part of FFADO
5  * FFADO = Free Firewire (pro-)audio drivers for linux
6  *
7  * FFADO is based upon FreeBoB.
8  *
9  * This program is free software: you can redistribute it and/or modify
10  * it under the terms of the GNU General Public License as published by
11  * the Free Software Foundation, either version 2 of the License, or
12  * (at your option) version 3 of the License.
13  *
14  * This program is distributed in the hope that it will be useful,
15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17  * GNU General Public License for more details.
18  *
19  * You should have received a copy of the GNU General Public License
20  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
21  *
22  */
23
24 #ifndef __FFADO_BYTESWAP__
25 #define __FFADO_BYTESWAP__
26
27 #include <byteswap.h>
28 #include <inttypes.h>
29 #include <endian.h>
30 #include <assert.h>
31
32 // to check for SSE etc...
33 #include "config.h"
34
35 #include <stdio.h>
36
37 #define BYTESWAP32_CONST(x) ((((x) & 0x000000FF) << 24) |   \
38                              (((x) & 0x0000FF00) << 8) |    \
39                              (((x) & 0x00FF0000) >> 8) |    \
40                              (((x) & 0xFF000000) >> 24))
41
42 static inline uint64_t
43 ByteSwap64(uint64_t d)
44 {
45     return bswap_64(d);
46 }
47
48 static inline uint32_t
49 ByteSwap32(uint32_t d)
50 {
51     return bswap_32(d);
52 }
53
54 static inline uint16_t
55 ByteSwap16(uint16_t d)
56 {
57     return bswap_16(d);
58 }
59
60
61 #if __BYTE_ORDER == __BIG_ENDIAN
62
63 // no-op for big endian machines
64
65 #define CONDSWAPTOBUS32_CONST(x) (x)
66
67 static inline uint64_t
68 CondSwapToBus64(uint64_t d)
69 {
70     return d;
71 }
72
73 static inline uint32_t
74 CondSwapToBus32(uint32_t d)
75 {
76     return d;
77 }
78
79 static inline uint16_t
80 CondSwapToBus16(uint16_t d)
81 {
82     return d;
83 }
84
85 static inline uint64_t
86 CondSwapFromBus64(uint64_t d)
87 {
88     return d;
89 }
90
91 static inline uint32_t
92 CondSwapFromBus32(uint32_t d)
93 {
94     return d;
95 }
96
97 static inline uint16_t
98 CondSwapFromBus16(uint16_t d)
99 {
100     return d;
101 }
102
103 static inline void
104 byteSwapToBus(quadlet_t *data, unsigned int nb_elements)
105 {
106     return;
107 }
108
109 static inline void
110 byteSwapFromBus(quadlet_t *data, unsigned int nb_elements)
111 {
112     return;
113 }
114
115 #else
116
117 #define CONDSWAPTOBUS32_CONST BYTESWAP32_CONST
118
119 static inline uint64_t
120 CondSwapToBus64(uint64_t d)
121 {
122     return ByteSwap64(d);
123 }
124
125 static inline uint32_t
126 CondSwapToBus32(uint32_t d)
127 {
128     return ByteSwap32(d);
129 }
130
131 static inline uint16_t
132 CondSwapToBus16(uint16_t d)
133 {
134     return ByteSwap16(d);
135 }
136
137 static inline uint64_t
138 CondSwapFromBus64(uint64_t d)
139 {
140     return ByteSwap64(d);
141 }
142
143 static inline uint32_t
144 CondSwapFromBus32(uint32_t d)
145 {
146     return ByteSwap32(d);
147 }
148
149 static inline uint16_t
150 CondSwapFromBus16(uint16_t d)
151 {
152     return ByteSwap16(d);
153 }
154
155 #ifdef __SSE2__
156 #include <emmintrin.h>
157 #warning SSE2 build
158
159 static
160 //inline void
161 void
162 byteSwapToBus(quadlet_t *data, unsigned int nb_elements)
163 {
164     // Work input until data reaches 16 byte alignment
165     while ((((unsigned long)data) & 0xF) && nb_elements > 0) {
166         *data = ByteSwap32(*data);
167         data++;
168         nb_elements--;
169     }
170     assert((((unsigned long)data) & 0xF) == 0);
171
172     // now do the SSE based conversion
173     // we have to go from [A B C D] to [D C B A]
174     // where A, B, C, D are bytes
175     //
176     // the algorithm is:
177     // 1) [A B C D] => [B A D C]
178     // 2) [B A D C] => [D C B A]
179     //
180     // i.e. first do a 2x(2x8bit) swap
181     // then a 2x16bit swap
182    
183     __m128i v;
184     while(nb_elements >= 4) {
185         // prefetch the data for the next round
186          __builtin_prefetch(data+128, 0, 0);
187
188         // load the data into the vector unit
189         v = _mm_load_si128((__m128i*)data);
190         // do first swap
191         v = _mm_or_si128( _mm_slli_epi16( v, 8 ), _mm_srli_epi16( v, 8 ) ); //swap it
192         // do second swap
193         v = _mm_or_si128( _mm_slli_epi32( v, 16 ), _mm_srli_epi32( v, 16 ) ); //swap it
194         // store result
195         _mm_store_si128 ((__m128i*)data, v);
196        
197         data += 4;
198         nb_elements -= 4;
199     }
200
201     // and do the remaining ones
202     while (nb_elements > 0) {
203         *data = ByteSwap32(*data);
204         data++;
205         nb_elements--;
206     }
207 }
208
209 static
210 //inline void
211 void
212 byteSwapFromBus(quadlet_t *data, unsigned int nb_elements)
213 {
214     // Work input until data reaches 16 byte alignment
215     while ((((unsigned long)data) & 0xF) && nb_elements > 0) {
216         *data = ByteSwap32(*data);
217         data++;
218         nb_elements--;
219     }
220     assert((((unsigned long)data) & 0xF) == 0);
221
222     // now do the SSE based conversion
223     // we have to go from [A B C D] to [D C B A]
224     // where A, B, C, D are bytes
225     //
226     // the algorithm is:
227     // 1) [A B C D] => [B A D C]
228     // 2) [B A D C] => [D C B A]
229     //
230     // i.e. first do a 2x(2x8bit) swap
231     // then a 2x16bit swap
232    
233     __m128i v;
234     while(nb_elements >= 4) {
235         // load the data into the vector unit
236         v = _mm_load_si128((__m128i*)data);
237         // do first swap
238         v = _mm_or_si128( _mm_slli_epi16( v, 8 ), _mm_srli_epi16( v, 8 ) ); //swap it
239         // do second swap
240         v = _mm_or_si128( _mm_slli_epi32( v, 16 ), _mm_srli_epi32( v, 16 ) ); //swap it
241         // store result
242         _mm_store_si128 ((__m128i*)data, v);
243        
244         data += 4;
245         nb_elements -= 4;
246     }
247
248     // and do the remaining ones
249     while (nb_elements > 0) {
250         *data = ByteSwap32(*data);
251         data++;
252         nb_elements--;
253     }
254 }
255
256 #else
257
258 static inline void
259 byteSwapToBus(quadlet_t *data, unsigned int nb_elements)
260 {
261     unsigned int i=0;
262     for(; i<nb_elements; i++) {
263         *data = ByteSwap32(*data);
264         data++;
265     }
266 }
267
268 static inline void
269 byteSwapFromBus(quadlet_t *data, unsigned int nb_elements)
270 {
271     unsigned int i=0;
272     for(; i<nb_elements; i++) {
273         *data = ByteSwap32(*data);
274         data++;
275     }
276 }
277
278 #endif // sse2
279
280 #endif // byte order
281
282 #endif // h
Note: See TracBrowser for help on using the browser.