root/branches/libffado-2.0/src/libutil/ByteSwap.h

Revision 1378, 5.7 kB (checked in by ppalmers, 12 years ago)

fix compiler warnings when doing SSE2 builds

Line 
1 /*
2  * Copyright (C) 2005-2008 by Pieter Palmers
3  *
4  * This file is part of FFADO
5  * FFADO = Free Firewire (pro-)audio drivers for linux
6  *
7  * FFADO is based upon FreeBoB.
8  *
9  * This program is free software: you can redistribute it and/or modify
10  * it under the terms of the GNU General Public License as published by
11  * the Free Software Foundation, either version 2 of the License, or
12  * (at your option) version 3 of the License.
13  *
14  * This program is distributed in the hope that it will be useful,
15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17  * GNU General Public License for more details.
18  *
19  * You should have received a copy of the GNU General Public License
20  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
21  *
22  */
23
24 #ifndef __FFADO_BYTESWAP__
25 #define __FFADO_BYTESWAP__
26
27 #include <byteswap.h>
28 #include <inttypes.h>
29 #include <endian.h>
30 #include <assert.h>
31
32 // to check for SSE etc...
33 #include "config.h"
34
35 #include <stdio.h>
36
37 #define BYTESWAP32_CONST(x) ((((x) & 0x000000FF) << 24) |   \
38                              (((x) & 0x0000FF00) << 8) |    \
39                              (((x) & 0x00FF0000) >> 8) |    \
40                              (((x) & 0xFF000000) >> 24))
41
42 static inline uint64_t
43 ByteSwap64(uint64_t d)
44 {
45     return bswap_64(d);
46 }
47
48 static inline uint32_t
49 ByteSwap32(uint32_t d)
50 {
51     return bswap_32(d);
52 }
53
54 static inline uint16_t
55 ByteSwap16(uint16_t d)
56 {
57     return bswap_16(d);
58 }
59
60
61 #if __BYTE_ORDER == __BIG_ENDIAN
62
63 // no-op for big endian machines
64
65 #define CONDSWAPTOBUS32_CONST(x) (x)
66
67 static inline uint64_t
68 CondSwapToBus64(uint64_t d)
69 {
70     return d;
71 }
72
73 static inline uint32_t
74 CondSwapToBus32(uint32_t d)
75 {
76     return d;
77 }
78
79 static inline uint16_t
80 CondSwapToBus16(uint16_t d)
81 {
82     return d;
83 }
84
85 static inline uint64_t
86 CondSwapFromBus64(uint64_t d)
87 {
88     return d;
89 }
90
91 static inline uint32_t
92 CondSwapFromBus32(uint32_t d)
93 {
94     return d;
95 }
96
97 static inline uint16_t
98 CondSwapFromBus16(uint16_t d)
99 {
100     return d;
101 }
102
103 static inline void
104 byteSwapToBus(quadlet_t *data, unsigned int nb_elements)
105 {
106     return;
107 }
108
109 static inline void
110 byteSwapFromBus(quadlet_t *data, unsigned int nb_elements)
111 {
112     return;
113 }
114
115 #else
116
117 #define CONDSWAPTOBUS32_CONST BYTESWAP32_CONST
118
119 static inline uint64_t
120 CondSwapToBus64(uint64_t d)
121 {
122     return ByteSwap64(d);
123 }
124
125 static inline uint32_t
126 CondSwapToBus32(uint32_t d)
127 {
128     return ByteSwap32(d);
129 }
130
131 static inline uint16_t
132 CondSwapToBus16(uint16_t d)
133 {
134     return ByteSwap16(d);
135 }
136
137 static inline uint64_t
138 CondSwapFromBus64(uint64_t d)
139 {
140     return ByteSwap64(d);
141 }
142
143 static inline uint32_t
144 CondSwapFromBus32(uint32_t d)
145 {
146     return ByteSwap32(d);
147 }
148
149 static inline uint16_t
150 CondSwapFromBus16(uint16_t d)
151 {
152     return ByteSwap16(d);
153 }
154
155 #ifdef __SSE2__
156 #include <emmintrin.h>
157
158 static inline void
159 byteSwapToBus(quadlet_t *data, unsigned int nb_elements)
160 {
161     // Work input until data reaches 16 byte alignment
162     while ((((unsigned long)data) & 0xF) && nb_elements > 0) {
163         *data = ByteSwap32(*data);
164         data++;
165         nb_elements--;
166     }
167     assert((((unsigned long)data) & 0xF) == 0);
168
169     // now do the SSE based conversion
170     // we have to go from [A B C D] to [D C B A]
171     // where A, B, C, D are bytes
172     //
173     // the algorithm is:
174     // 1) [A B C D] => [B A D C]
175     // 2) [B A D C] => [D C B A]
176     //
177     // i.e. first do a 2x(2x8bit) swap
178     // then a 2x16bit swap
179    
180     __m128i v;
181     while(nb_elements >= 4) {
182         // prefetch the data for the next round
183          __builtin_prefetch(data+128, 0, 0);
184
185         // load the data into the vector unit
186         v = _mm_load_si128((__m128i*)data);
187         // do first swap
188         v = _mm_or_si128( _mm_slli_epi16( v, 8 ), _mm_srli_epi16( v, 8 ) ); //swap it
189         // do second swap
190         v = _mm_or_si128( _mm_slli_epi32( v, 16 ), _mm_srli_epi32( v, 16 ) ); //swap it
191         // store result
192         _mm_store_si128 ((__m128i*)data, v);
193        
194         data += 4;
195         nb_elements -= 4;
196     }
197
198     // and do the remaining ones
199     while (nb_elements > 0) {
200         *data = ByteSwap32(*data);
201         data++;
202         nb_elements--;
203     }
204 }
205
206 static inline void
207 byteSwapFromBus(quadlet_t *data, unsigned int nb_elements)
208 {
209     // Work input until data reaches 16 byte alignment
210     while ((((unsigned long)data) & 0xF) && nb_elements > 0) {
211         *data = ByteSwap32(*data);
212         data++;
213         nb_elements--;
214     }
215     assert((((unsigned long)data) & 0xF) == 0);
216
217     // now do the SSE based conversion
218     // we have to go from [A B C D] to [D C B A]
219     // where A, B, C, D are bytes
220     //
221     // the algorithm is:
222     // 1) [A B C D] => [B A D C]
223     // 2) [B A D C] => [D C B A]
224     //
225     // i.e. first do a 2x(2x8bit) swap
226     // then a 2x16bit swap
227    
228     __m128i v;
229     while(nb_elements >= 4) {
230         // load the data into the vector unit
231         v = _mm_load_si128((__m128i*)data);
232         // do first swap
233         v = _mm_or_si128( _mm_slli_epi16( v, 8 ), _mm_srli_epi16( v, 8 ) ); //swap it
234         // do second swap
235         v = _mm_or_si128( _mm_slli_epi32( v, 16 ), _mm_srli_epi32( v, 16 ) ); //swap it
236         // store result
237         _mm_store_si128 ((__m128i*)data, v);
238        
239         data += 4;
240         nb_elements -= 4;
241     }
242
243     // and do the remaining ones
244     while (nb_elements > 0) {
245         *data = ByteSwap32(*data);
246         data++;
247         nb_elements--;
248     }
249 }
250
251 #else
252
253 static inline void
254 byteSwapToBus(quadlet_t *data, unsigned int nb_elements)
255 {
256     unsigned int i=0;
257     for(; i<nb_elements; i++) {
258         *data = ByteSwap32(*data);
259         data++;
260     }
261 }
262
263 static inline void
264 byteSwapFromBus(quadlet_t *data, unsigned int nb_elements)
265 {
266     unsigned int i=0;
267     for(; i<nb_elements; i++) {
268         *data = ByteSwap32(*data);
269         data++;
270     }
271 }
272
273 #endif // sse2
274
275 #endif // byte order
276
277 #endif // h
Note: See TracBrowser for help on using the browser.