root/trunk/libffado/src/libutil/ByteSwap.h

Revision 1550, 5.8 kB (checked in by ppalmers, 15 years ago)

- Implement basic HSS1394 support for the Stanton SCS devices
- Start of the implementation of a generic TCAT DICE EAP control
- Reworked part of the busreset / ARM handler code

Line 
1 /*
2  * Copyright (C) 2005-2008 by Pieter Palmers
3  *
4  * This file is part of FFADO
5  * FFADO = Free Firewire (pro-)audio drivers for linux
6  *
7  * FFADO is based upon FreeBoB.
8  *
9  * This program is free software: you can redistribute it and/or modify
10  * it under the terms of the GNU General Public License as published by
11  * the Free Software Foundation, either version 2 of the License, or
12  * (at your option) version 3 of the License.
13  *
14  * This program is distributed in the hope that it will be useful,
15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17  * GNU General Public License for more details.
18  *
19  * You should have received a copy of the GNU General Public License
20  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
21  *
22  */
23
24 #ifndef __FFADO_BYTESWAP__
25 #define __FFADO_BYTESWAP__
26
27 #include <byteswap.h>
28 #include <inttypes.h>
29 #include <endian.h>
30 #include <assert.h>
31
32 // to check for SSE etc...
33
34
35 #include <stdio.h>
36
37 #define BYTESWAP32_CONST(x) ((((x) & 0x000000FF) << 24) |   \
38                              (((x) & 0x0000FF00) << 8) |    \
39                              (((x) & 0x00FF0000) >> 8) |    \
40                              (((x) & 0xFF000000) >> 24))
41
42 static inline uint64_t
43 ByteSwap64(uint64_t d)
44 {
45     return bswap_64(d);
46 }
47
48 static inline uint32_t
49 ByteSwap32(uint32_t d)
50 {
51     return bswap_32(d);
52 }
53
54 static inline uint16_t
55 ByteSwap16(uint16_t d)
56 {
57     return bswap_16(d);
58 }
59
60 static inline void
61 byteSwapBlock(quadlet_t *data, unsigned int nb_elements)
62 {
63     unsigned int i=0;
64     for(; i<nb_elements; i++) {
65         *data = ByteSwap32(*data);
66         data++;
67     }
68 }
69
70 #if __BYTE_ORDER == __BIG_ENDIAN
71
72 // no-op for big endian machines
73
74 #define CONDSWAPTOBUS32_CONST(x) (x)
75
76 static inline uint64_t
77 CondSwapToBus64(uint64_t d)
78 {
79     return d;
80 }
81
82 static inline uint32_t
83 CondSwapToBus32(uint32_t d)
84 {
85     return d;
86 }
87
88 static inline uint16_t
89 CondSwapToBus16(uint16_t d)
90 {
91     return d;
92 }
93
94 static inline uint64_t
95 CondSwapFromBus64(uint64_t d)
96 {
97     return d;
98 }
99
100 static inline uint32_t
101 CondSwapFromBus32(uint32_t d)
102 {
103     return d;
104 }
105
106 static inline uint16_t
107 CondSwapFromBus16(uint16_t d)
108 {
109     return d;
110 }
111
112 static inline void
113 byteSwapToBus(quadlet_t *data, unsigned int nb_elements)
114 {
115     return;
116 }
117
118 static inline void
119 byteSwapFromBus(quadlet_t *data, unsigned int nb_elements)
120 {
121     return;
122 }
123
124 #else
125
126 #define CONDSWAPTOBUS32_CONST BYTESWAP32_CONST
127
128 static inline uint64_t
129 CondSwapToBus64(uint64_t d)
130 {
131     return ByteSwap64(d);
132 }
133
134 static inline uint32_t
135 CondSwapToBus32(uint32_t d)
136 {
137     return ByteSwap32(d);
138 }
139
140 static inline uint16_t
141 CondSwapToBus16(uint16_t d)
142 {
143     return ByteSwap16(d);
144 }
145
146 static inline uint64_t
147 CondSwapFromBus64(uint64_t d)
148 {
149     return ByteSwap64(d);
150 }
151
152 static inline uint32_t
153 CondSwapFromBus32(uint32_t d)
154 {
155     return ByteSwap32(d);
156 }
157
158 static inline uint16_t
159 CondSwapFromBus16(uint16_t d)
160 {
161     return ByteSwap16(d);
162 }
163
164 #ifdef __SSE2__
165 #include <emmintrin.h>
166
167 static inline void
168 byteSwapToBus(quadlet_t *data, unsigned int nb_elements)
169 {
170     // Work input until data reaches 16 byte alignment
171     while ((((unsigned long)data) & 0xF) && nb_elements > 0) {
172         *data = ByteSwap32(*data);
173         data++;
174         nb_elements--;
175     }
176     assert((((unsigned long)data) & 0xF) == 0);
177
178     // now do the SSE based conversion
179     // we have to go from [A B C D] to [D C B A]
180     // where A, B, C, D are bytes
181     //
182     // the algorithm is:
183     // 1) [A B C D] => [B A D C]
184     // 2) [B A D C] => [D C B A]
185     //
186     // i.e. first do a 2x(2x8bit) swap
187     // then a 2x16bit swap
188    
189     __m128i v;
190     while(nb_elements >= 4) {
191         // prefetch the data for the next round
192          __builtin_prefetch(data+128, 0, 0);
193
194         // load the data into the vector unit
195         v = _mm_load_si128((__m128i*)data);
196         // do first swap
197         v = _mm_or_si128( _mm_slli_epi16( v, 8 ), _mm_srli_epi16( v, 8 ) ); //swap it
198         // do second swap
199         v = _mm_or_si128( _mm_slli_epi32( v, 16 ), _mm_srli_epi32( v, 16 ) ); //swap it
200         // store result
201         _mm_store_si128 ((__m128i*)data, v);
202        
203         data += 4;
204         nb_elements -= 4;
205     }
206
207     // and do the remaining ones
208     while (nb_elements > 0) {
209         *data = ByteSwap32(*data);
210         data++;
211         nb_elements--;
212     }
213 }
214
215 static inline void
216 byteSwapFromBus(quadlet_t *data, unsigned int nb_elements)
217 {
218     // Work input until data reaches 16 byte alignment
219     while ((((unsigned long)data) & 0xF) && nb_elements > 0) {
220         *data = ByteSwap32(*data);
221         data++;
222         nb_elements--;
223     }
224     assert((((unsigned long)data) & 0xF) == 0);
225
226     // now do the SSE based conversion
227     // we have to go from [A B C D] to [D C B A]
228     // where A, B, C, D are bytes
229     //
230     // the algorithm is:
231     // 1) [A B C D] => [B A D C]
232     // 2) [B A D C] => [D C B A]
233     //
234     // i.e. first do a 2x(2x8bit) swap
235     // then a 2x16bit swap
236    
237     __m128i v;
238     while(nb_elements >= 4) {
239         // load the data into the vector unit
240         v = _mm_load_si128((__m128i*)data);
241         // do first swap
242         v = _mm_or_si128( _mm_slli_epi16( v, 8 ), _mm_srli_epi16( v, 8 ) ); //swap it
243         // do second swap
244         v = _mm_or_si128( _mm_slli_epi32( v, 16 ), _mm_srli_epi32( v, 16 ) ); //swap it
245         // store result
246         _mm_store_si128 ((__m128i*)data, v);
247        
248         data += 4;
249         nb_elements -= 4;
250     }
251
252     // and do the remaining ones
253     while (nb_elements > 0) {
254         *data = ByteSwap32(*data);
255         data++;
256         nb_elements--;
257     }
258 }
259
260 #else
261
262 static inline void
263 byteSwapToBus(quadlet_t *data, unsigned int nb_elements)
264 {
265     byteSwapBlock(data, nb_elements);
266 }
267
268 static inline void
269 byteSwapFromBus(quadlet_t *data, unsigned int nb_elements)
270 {
271     byteSwapBlock(data, nb_elements);
272 }
273
274 #endif // sse2
275
276 #endif // byte order
277
278 #endif // h
Note: See TracBrowser for help on using the browser.