1 |
/* |
---|
2 |
* Copyright (C) 2005-2008 by Pieter Palmers |
---|
3 |
* |
---|
4 |
* This file is part of FFADO |
---|
5 |
* FFADO = Free Firewire (pro-)audio drivers for linux |
---|
6 |
* |
---|
7 |
* FFADO is based upon FreeBoB. |
---|
8 |
* |
---|
9 |
* This program is free software: you can redistribute it and/or modify |
---|
10 |
* it under the terms of the GNU General Public License as published by |
---|
11 |
* the Free Software Foundation, either version 2 of the License, or |
---|
12 |
* (at your option) version 3 of the License. |
---|
13 |
* |
---|
14 |
* This program is distributed in the hope that it will be useful, |
---|
15 |
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
---|
16 |
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
---|
17 |
* GNU General Public License for more details. |
---|
18 |
* |
---|
19 |
* You should have received a copy of the GNU General Public License |
---|
20 |
* along with this program. If not, see <http://www.gnu.org/licenses/>. |
---|
21 |
* |
---|
22 |
*/ |
---|
23 |
|
---|
24 |
#ifndef __FFADO_BYTESWAP__ |
---|
25 |
#define __FFADO_BYTESWAP__ |
---|
26 |
|
---|
27 |
#include <byteswap.h> |
---|
28 |
#include <inttypes.h> |
---|
29 |
#include <endian.h> |
---|
30 |
#include <assert.h> |
---|
31 |
|
---|
32 |
// to check for SSE etc... |
---|
33 |
#include "config.h" |
---|
34 |
|
---|
35 |
#include <stdio.h> |
---|
36 |
|
---|
37 |
#define BYTESWAP32_CONST(x) ((((x) & 0x000000FF) << 24) | \ |
---|
38 |
(((x) & 0x0000FF00) << 8) | \ |
---|
39 |
(((x) & 0x00FF0000) >> 8) | \ |
---|
40 |
(((x) & 0xFF000000) >> 24)) |
---|
41 |
|
---|
42 |
static inline uint64_t |
---|
43 |
ByteSwap64(uint64_t d) |
---|
44 |
{ |
---|
45 |
return bswap_64(d); |
---|
46 |
} |
---|
47 |
|
---|
48 |
static inline uint32_t |
---|
49 |
ByteSwap32(uint32_t d) |
---|
50 |
{ |
---|
51 |
return bswap_32(d); |
---|
52 |
} |
---|
53 |
|
---|
54 |
static inline uint16_t |
---|
55 |
ByteSwap16(uint16_t d) |
---|
56 |
{ |
---|
57 |
return bswap_16(d); |
---|
58 |
} |
---|
59 |
|
---|
60 |
|
---|
61 |
#if __BYTE_ORDER == __BIG_ENDIAN |
---|
62 |
|
---|
63 |
// no-op for big endian machines |
---|
64 |
|
---|
65 |
#define CONDSWAPTOBUS32_CONST(x) (x) |
---|
66 |
|
---|
67 |
static inline uint64_t |
---|
68 |
CondSwapToBus64(uint64_t d) |
---|
69 |
{ |
---|
70 |
return d; |
---|
71 |
} |
---|
72 |
|
---|
73 |
static inline uint32_t |
---|
74 |
CondSwapToBus32(uint32_t d) |
---|
75 |
{ |
---|
76 |
return d; |
---|
77 |
} |
---|
78 |
|
---|
79 |
static inline uint16_t |
---|
80 |
CondSwapToBus16(uint16_t d) |
---|
81 |
{ |
---|
82 |
return d; |
---|
83 |
} |
---|
84 |
|
---|
85 |
static inline uint64_t |
---|
86 |
CondSwapFromBus64(uint64_t d) |
---|
87 |
{ |
---|
88 |
return d; |
---|
89 |
} |
---|
90 |
|
---|
91 |
static inline uint32_t |
---|
92 |
CondSwapFromBus32(uint32_t d) |
---|
93 |
{ |
---|
94 |
return d; |
---|
95 |
} |
---|
96 |
|
---|
97 |
static inline uint16_t |
---|
98 |
CondSwapFromBus16(uint16_t d) |
---|
99 |
{ |
---|
100 |
return d; |
---|
101 |
} |
---|
102 |
|
---|
103 |
static inline void |
---|
104 |
byteSwapToBus(quadlet_t *data, unsigned int nb_elements) |
---|
105 |
{ |
---|
106 |
return; |
---|
107 |
} |
---|
108 |
|
---|
109 |
static inline void |
---|
110 |
byteSwapFromBus(quadlet_t *data, unsigned int nb_elements) |
---|
111 |
{ |
---|
112 |
return; |
---|
113 |
} |
---|
114 |
|
---|
115 |
#else |
---|
116 |
|
---|
117 |
#define CONDSWAPTOBUS32_CONST BYTESWAP32_CONST |
---|
118 |
|
---|
119 |
static inline uint64_t |
---|
120 |
CondSwapToBus64(uint64_t d) |
---|
121 |
{ |
---|
122 |
return ByteSwap64(d); |
---|
123 |
} |
---|
124 |
|
---|
125 |
static inline uint32_t |
---|
126 |
CondSwapToBus32(uint32_t d) |
---|
127 |
{ |
---|
128 |
return ByteSwap32(d); |
---|
129 |
} |
---|
130 |
|
---|
131 |
static inline uint16_t |
---|
132 |
CondSwapToBus16(uint16_t d) |
---|
133 |
{ |
---|
134 |
return ByteSwap16(d); |
---|
135 |
} |
---|
136 |
|
---|
137 |
static inline uint64_t |
---|
138 |
CondSwapFromBus64(uint64_t d) |
---|
139 |
{ |
---|
140 |
return ByteSwap64(d); |
---|
141 |
} |
---|
142 |
|
---|
143 |
static inline uint32_t |
---|
144 |
CondSwapFromBus32(uint32_t d) |
---|
145 |
{ |
---|
146 |
return ByteSwap32(d); |
---|
147 |
} |
---|
148 |
|
---|
149 |
static inline uint16_t |
---|
150 |
CondSwapFromBus16(uint16_t d) |
---|
151 |
{ |
---|
152 |
return ByteSwap16(d); |
---|
153 |
} |
---|
154 |
|
---|
155 |
#ifdef __SSE2__ |
---|
156 |
#include <emmintrin.h> |
---|
157 |
|
---|
158 |
static inline void |
---|
159 |
byteSwapToBus(quadlet_t *data, unsigned int nb_elements) |
---|
160 |
{ |
---|
161 |
// Work input until data reaches 16 byte alignment |
---|
162 |
while ((((unsigned long)data) & 0xF) && nb_elements > 0) { |
---|
163 |
*data = ByteSwap32(*data); |
---|
164 |
data++; |
---|
165 |
nb_elements--; |
---|
166 |
} |
---|
167 |
|
---|
168 |
if(nb_elements == 0) { |
---|
169 |
return; |
---|
170 |
} |
---|
171 |
|
---|
172 |
assert((((unsigned long)data) & 0xF) == 0); |
---|
173 |
|
---|
174 |
// now do the SSE based conversion |
---|
175 |
// we have to go from [A B C D] to [D C B A] |
---|
176 |
// where A, B, C, D are bytes |
---|
177 |
// |
---|
178 |
// the algorithm is: |
---|
179 |
// 1) [A B C D] => [B A D C] |
---|
180 |
// 2) [B A D C] => [D C B A] |
---|
181 |
// |
---|
182 |
// i.e. first do a 2x(2x8bit) swap |
---|
183 |
// then a 2x16bit swap |
---|
184 |
|
---|
185 |
__m128i v; |
---|
186 |
while(nb_elements >= 4) { |
---|
187 |
// prefetch the data for the next round |
---|
188 |
__builtin_prefetch(data+128, 0, 0); |
---|
189 |
|
---|
190 |
// load the data into the vector unit |
---|
191 |
v = _mm_load_si128((__m128i*)data); |
---|
192 |
// do first swap |
---|
193 |
v = _mm_or_si128( _mm_slli_epi16( v, 8 ), _mm_srli_epi16( v, 8 ) ); //swap it |
---|
194 |
// do second swap |
---|
195 |
v = _mm_or_si128( _mm_slli_epi32( v, 16 ), _mm_srli_epi32( v, 16 ) ); //swap it |
---|
196 |
// store result |
---|
197 |
_mm_store_si128 ((__m128i*)data, v); |
---|
198 |
|
---|
199 |
data += 4; |
---|
200 |
nb_elements -= 4; |
---|
201 |
} |
---|
202 |
|
---|
203 |
// and do the remaining ones |
---|
204 |
while (nb_elements > 0) { |
---|
205 |
*data = ByteSwap32(*data); |
---|
206 |
data++; |
---|
207 |
nb_elements--; |
---|
208 |
} |
---|
209 |
} |
---|
210 |
|
---|
211 |
static inline void |
---|
212 |
byteSwapFromBus(quadlet_t *data, unsigned int nb_elements) |
---|
213 |
{ |
---|
214 |
// Work input until data reaches 16 byte alignment |
---|
215 |
while ((((unsigned long)data) & 0xF) && nb_elements > 0) { |
---|
216 |
*data = ByteSwap32(*data); |
---|
217 |
data++; |
---|
218 |
nb_elements--; |
---|
219 |
} |
---|
220 |
|
---|
221 |
if(nb_elements == 0) { |
---|
222 |
return; |
---|
223 |
} |
---|
224 |
|
---|
225 |
assert((((unsigned long)data) & 0xF) == 0); |
---|
226 |
|
---|
227 |
// now do the SSE based conversion |
---|
228 |
// we have to go from [A B C D] to [D C B A] |
---|
229 |
// where A, B, C, D are bytes |
---|
230 |
// |
---|
231 |
// the algorithm is: |
---|
232 |
// 1) [A B C D] => [B A D C] |
---|
233 |
// 2) [B A D C] => [D C B A] |
---|
234 |
// |
---|
235 |
// i.e. first do a 2x(2x8bit) swap |
---|
236 |
// then a 2x16bit swap |
---|
237 |
|
---|
238 |
__m128i v; |
---|
239 |
while(nb_elements >= 4) { |
---|
240 |
// load the data into the vector unit |
---|
241 |
v = _mm_load_si128((__m128i*)data); |
---|
242 |
// do first swap |
---|
243 |
v = _mm_or_si128( _mm_slli_epi16( v, 8 ), _mm_srli_epi16( v, 8 ) ); //swap it |
---|
244 |
// do second swap |
---|
245 |
v = _mm_or_si128( _mm_slli_epi32( v, 16 ), _mm_srli_epi32( v, 16 ) ); //swap it |
---|
246 |
// store result |
---|
247 |
_mm_store_si128 ((__m128i*)data, v); |
---|
248 |
|
---|
249 |
data += 4; |
---|
250 |
nb_elements -= 4; |
---|
251 |
} |
---|
252 |
|
---|
253 |
// and do the remaining ones |
---|
254 |
while (nb_elements > 0) { |
---|
255 |
*data = ByteSwap32(*data); |
---|
256 |
data++; |
---|
257 |
nb_elements--; |
---|
258 |
} |
---|
259 |
} |
---|
260 |
|
---|
261 |
#else |
---|
262 |
|
---|
263 |
static inline void |
---|
264 |
byteSwapToBus(quadlet_t *data, unsigned int nb_elements) |
---|
265 |
{ |
---|
266 |
unsigned int i=0; |
---|
267 |
for(; i<nb_elements; i++) { |
---|
268 |
*data = ByteSwap32(*data); |
---|
269 |
data++; |
---|
270 |
} |
---|
271 |
} |
---|
272 |
|
---|
273 |
static inline void |
---|
274 |
byteSwapFromBus(quadlet_t *data, unsigned int nb_elements) |
---|
275 |
{ |
---|
276 |
unsigned int i=0; |
---|
277 |
for(; i<nb_elements; i++) { |
---|
278 |
*data = ByteSwap32(*data); |
---|
279 |
data++; |
---|
280 |
} |
---|
281 |
} |
---|
282 |
|
---|
283 |
#endif // sse2 |
---|
284 |
|
---|
285 |
#endif // byte order |
---|
286 |
|
---|
287 |
#endif // h |
---|