root/branches/libffado-2.0/src/libutil/ByteSwap.h

Revision 1632, 5.8 kB (checked in by ppalmers, 15 years ago)

fix byte-swapping bug

Line 
1 /*
2  * Copyright (C) 2005-2008 by Pieter Palmers
3  *
4  * This file is part of FFADO
5  * FFADO = Free Firewire (pro-)audio drivers for linux
6  *
7  * FFADO is based upon FreeBoB.
8  *
9  * This program is free software: you can redistribute it and/or modify
10  * it under the terms of the GNU General Public License as published by
11  * the Free Software Foundation, either version 2 of the License, or
12  * (at your option) version 3 of the License.
13  *
14  * This program is distributed in the hope that it will be useful,
15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17  * GNU General Public License for more details.
18  *
19  * You should have received a copy of the GNU General Public License
20  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
21  *
22  */
23
24 #ifndef __FFADO_BYTESWAP__
25 #define __FFADO_BYTESWAP__
26
27 #include <byteswap.h>
28 #include <inttypes.h>
29 #include <endian.h>
30 #include <assert.h>
31
32 // to check for SSE etc...
33 #include "config.h"
34
35 #include <stdio.h>
36
37 #define BYTESWAP32_CONST(x) ((((x) & 0x000000FF) << 24) |   \
38                              (((x) & 0x0000FF00) << 8) |    \
39                              (((x) & 0x00FF0000) >> 8) |    \
40                              (((x) & 0xFF000000) >> 24))
41
42 static inline uint64_t
43 ByteSwap64(uint64_t d)
44 {
45     return bswap_64(d);
46 }
47
48 static inline uint32_t
49 ByteSwap32(uint32_t d)
50 {
51     return bswap_32(d);
52 }
53
54 static inline uint16_t
55 ByteSwap16(uint16_t d)
56 {
57     return bswap_16(d);
58 }
59
60
61 #if __BYTE_ORDER == __BIG_ENDIAN
62
63 // no-op for big endian machines
64
65 #define CONDSWAPTOBUS32_CONST(x) (x)
66
67 static inline uint64_t
68 CondSwapToBus64(uint64_t d)
69 {
70     return d;
71 }
72
73 static inline uint32_t
74 CondSwapToBus32(uint32_t d)
75 {
76     return d;
77 }
78
79 static inline uint16_t
80 CondSwapToBus16(uint16_t d)
81 {
82     return d;
83 }
84
85 static inline uint64_t
86 CondSwapFromBus64(uint64_t d)
87 {
88     return d;
89 }
90
91 static inline uint32_t
92 CondSwapFromBus32(uint32_t d)
93 {
94     return d;
95 }
96
97 static inline uint16_t
98 CondSwapFromBus16(uint16_t d)
99 {
100     return d;
101 }
102
103 static inline void
104 byteSwapToBus(quadlet_t *data, unsigned int nb_elements)
105 {
106     return;
107 }
108
109 static inline void
110 byteSwapFromBus(quadlet_t *data, unsigned int nb_elements)
111 {
112     return;
113 }
114
115 #else
116
117 #define CONDSWAPTOBUS32_CONST BYTESWAP32_CONST
118
119 static inline uint64_t
120 CondSwapToBus64(uint64_t d)
121 {
122     return ByteSwap64(d);
123 }
124
125 static inline uint32_t
126 CondSwapToBus32(uint32_t d)
127 {
128     return ByteSwap32(d);
129 }
130
131 static inline uint16_t
132 CondSwapToBus16(uint16_t d)
133 {
134     return ByteSwap16(d);
135 }
136
137 static inline uint64_t
138 CondSwapFromBus64(uint64_t d)
139 {
140     return ByteSwap64(d);
141 }
142
143 static inline uint32_t
144 CondSwapFromBus32(uint32_t d)
145 {
146     return ByteSwap32(d);
147 }
148
149 static inline uint16_t
150 CondSwapFromBus16(uint16_t d)
151 {
152     return ByteSwap16(d);
153 }
154
155 #ifdef __SSE2__
156 #include <emmintrin.h>
157
158 static inline void
159 byteSwapToBus(quadlet_t *data, unsigned int nb_elements)
160 {
161     // Work input until data reaches 16 byte alignment
162     while ((((unsigned long)data) & 0xF) && nb_elements > 0) {
163         *data = ByteSwap32(*data);
164         data++;
165         nb_elements--;
166     }
167
168     if(nb_elements == 0) {
169         return;
170     }
171
172     assert((((unsigned long)data) & 0xF) == 0);
173
174     // now do the SSE based conversion
175     // we have to go from [A B C D] to [D C B A]
176     // where A, B, C, D are bytes
177     //
178     // the algorithm is:
179     // 1) [A B C D] => [B A D C]
180     // 2) [B A D C] => [D C B A]
181     //
182     // i.e. first do a 2x(2x8bit) swap
183     // then a 2x16bit swap
184    
185     __m128i v;
186     while(nb_elements >= 4) {
187         // prefetch the data for the next round
188          __builtin_prefetch(data+128, 0, 0);
189
190         // load the data into the vector unit
191         v = _mm_load_si128((__m128i*)data);
192         // do first swap
193         v = _mm_or_si128( _mm_slli_epi16( v, 8 ), _mm_srli_epi16( v, 8 ) ); //swap it
194         // do second swap
195         v = _mm_or_si128( _mm_slli_epi32( v, 16 ), _mm_srli_epi32( v, 16 ) ); //swap it
196         // store result
197         _mm_store_si128 ((__m128i*)data, v);
198        
199         data += 4;
200         nb_elements -= 4;
201     }
202
203     // and do the remaining ones
204     while (nb_elements > 0) {
205         *data = ByteSwap32(*data);
206         data++;
207         nb_elements--;
208     }
209 }
210
211 static inline void
212 byteSwapFromBus(quadlet_t *data, unsigned int nb_elements)
213 {
214     // Work input until data reaches 16 byte alignment
215     while ((((unsigned long)data) & 0xF) && nb_elements > 0) {
216         *data = ByteSwap32(*data);
217         data++;
218         nb_elements--;
219     }
220
221     if(nb_elements == 0) {
222         return;
223     }
224
225     assert((((unsigned long)data) & 0xF) == 0);
226
227     // now do the SSE based conversion
228     // we have to go from [A B C D] to [D C B A]
229     // where A, B, C, D are bytes
230     //
231     // the algorithm is:
232     // 1) [A B C D] => [B A D C]
233     // 2) [B A D C] => [D C B A]
234     //
235     // i.e. first do a 2x(2x8bit) swap
236     // then a 2x16bit swap
237    
238     __m128i v;
239     while(nb_elements >= 4) {
240         // load the data into the vector unit
241         v = _mm_load_si128((__m128i*)data);
242         // do first swap
243         v = _mm_or_si128( _mm_slli_epi16( v, 8 ), _mm_srli_epi16( v, 8 ) ); //swap it
244         // do second swap
245         v = _mm_or_si128( _mm_slli_epi32( v, 16 ), _mm_srli_epi32( v, 16 ) ); //swap it
246         // store result
247         _mm_store_si128 ((__m128i*)data, v);
248        
249         data += 4;
250         nb_elements -= 4;
251     }
252
253     // and do the remaining ones
254     while (nb_elements > 0) {
255         *data = ByteSwap32(*data);
256         data++;
257         nb_elements--;
258     }
259 }
260
261 #else
262
263 static inline void
264 byteSwapToBus(quadlet_t *data, unsigned int nb_elements)
265 {
266     unsigned int i=0;
267     for(; i<nb_elements; i++) {
268         *data = ByteSwap32(*data);
269         data++;
270     }
271 }
272
273 static inline void
274 byteSwapFromBus(quadlet_t *data, unsigned int nb_elements)
275 {
276     unsigned int i=0;
277     for(; i<nb_elements; i++) {
278         *data = ByteSwap32(*data);
279         data++;
280     }
281 }
282
283 #endif // sse2
284
285 #endif // byte order
286
287 #endif // h
Note: See TracBrowser for help on using the browser.