1 |
/* |
---|
2 |
* Copyright (C) 2005-2008 by Pieter Palmers |
---|
3 |
* |
---|
4 |
* This file is part of FFADO |
---|
5 |
* FFADO = Free Firewire (pro-)audio drivers for linux |
---|
6 |
* |
---|
7 |
* FFADO is based upon FreeBoB. |
---|
8 |
* |
---|
9 |
* This program is free software: you can redistribute it and/or modify |
---|
10 |
* it under the terms of the GNU General Public License as published by |
---|
11 |
* the Free Software Foundation, either version 2 of the License, or |
---|
12 |
* (at your option) version 3 of the License. |
---|
13 |
* |
---|
14 |
* This program is distributed in the hope that it will be useful, |
---|
15 |
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
---|
16 |
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
---|
17 |
* GNU General Public License for more details. |
---|
18 |
* |
---|
19 |
* You should have received a copy of the GNU General Public License |
---|
20 |
* along with this program. If not, see <http://www.gnu.org/licenses/>. |
---|
21 |
* |
---|
22 |
*/ |
---|
23 |
|
---|
24 |
#ifndef __FFADO_BYTESWAP__ |
---|
25 |
#define __FFADO_BYTESWAP__ |
---|
26 |
|
---|
27 |
#include <netinet/in.h> |
---|
28 |
#include <endian.h> |
---|
29 |
#include <assert.h> |
---|
30 |
|
---|
31 |
// to check for SSE etc... |
---|
32 |
#include "config.h" |
---|
33 |
|
---|
34 |
#include <stdio.h> |
---|
35 |
|
---|
36 |
#if __BYTE_ORDER == __BIG_ENDIAN |
---|
37 |
|
---|
38 |
// no-op for big endian machines |
---|
39 |
static inline void |
---|
40 |
byteSwapToBus(quadlet_t *data, unsigned int nb_elements) |
---|
41 |
{ |
---|
42 |
return; |
---|
43 |
} |
---|
44 |
|
---|
45 |
static inline void |
---|
46 |
byteSwapFromBus(quadlet_t *data, unsigned int nb_elements) |
---|
47 |
{ |
---|
48 |
return; |
---|
49 |
} |
---|
50 |
|
---|
51 |
#else |
---|
52 |
|
---|
53 |
#ifdef __SSE2__ |
---|
54 |
#include <emmintrin.h> |
---|
55 |
#warning SSE2 build |
---|
56 |
|
---|
57 |
//static inline void |
---|
58 |
void |
---|
59 |
byteSwapToBus(quadlet_t *data, unsigned int nb_elements) |
---|
60 |
{ |
---|
61 |
// Work input until data reaches 16 byte alignment |
---|
62 |
while ((((unsigned long)data) & 0xF) && nb_elements > 0) { |
---|
63 |
*data = htonl(*data); |
---|
64 |
data++; |
---|
65 |
nb_elements--; |
---|
66 |
} |
---|
67 |
assert((((unsigned long)data) & 0xF) == 0); |
---|
68 |
|
---|
69 |
// now do the SSE based conversion |
---|
70 |
// we have to go from [A B C D] to [D C B A] |
---|
71 |
// where A, B, C, D are bytes |
---|
72 |
// |
---|
73 |
// the algorithm is: |
---|
74 |
// 1) [A B C D] => [B A D C] |
---|
75 |
// 2) [B A D C] => [D C B A] |
---|
76 |
// |
---|
77 |
// i.e. first do a 2x(2x8bit) swap |
---|
78 |
// then a 2x16bit swap |
---|
79 |
|
---|
80 |
__m128i v; |
---|
81 |
while(nb_elements >= 4) { |
---|
82 |
// prefetch the data for the next round |
---|
83 |
__builtin_prefetch(data+128, 0, 0); |
---|
84 |
|
---|
85 |
// load the data into the vector unit |
---|
86 |
v = _mm_load_si128((__m128i*)data); |
---|
87 |
// do first swap |
---|
88 |
v = _mm_or_si128( _mm_slli_epi16( v, 8 ), _mm_srli_epi16( v, 8 ) ); //swap it |
---|
89 |
// do second swap |
---|
90 |
v = _mm_or_si128( _mm_slli_epi32( v, 16 ), _mm_srli_epi32( v, 16 ) ); //swap it |
---|
91 |
// store result |
---|
92 |
_mm_store_si128 ((__m128i*)data, v); |
---|
93 |
|
---|
94 |
data += 4; |
---|
95 |
nb_elements -= 4; |
---|
96 |
} |
---|
97 |
|
---|
98 |
// and do the remaining ones |
---|
99 |
while (nb_elements > 0) { |
---|
100 |
*data = htonl(*data); |
---|
101 |
data++; |
---|
102 |
nb_elements--; |
---|
103 |
} |
---|
104 |
} |
---|
105 |
|
---|
106 |
//static inline void |
---|
107 |
void |
---|
108 |
byteSwapFromBus(quadlet_t *data, unsigned int nb_elements) |
---|
109 |
{ |
---|
110 |
// Work input until data reaches 16 byte alignment |
---|
111 |
while ((((unsigned long)data) & 0xF) && nb_elements > 0) { |
---|
112 |
*data = htonl(*data); |
---|
113 |
data++; |
---|
114 |
nb_elements--; |
---|
115 |
} |
---|
116 |
assert((((unsigned long)data) & 0xF) == 0); |
---|
117 |
|
---|
118 |
// now do the SSE based conversion |
---|
119 |
// we have to go from [A B C D] to [D C B A] |
---|
120 |
// where A, B, C, D are bytes |
---|
121 |
// |
---|
122 |
// the algorithm is: |
---|
123 |
// 1) [A B C D] => [B A D C] |
---|
124 |
// 2) [B A D C] => [D C B A] |
---|
125 |
// |
---|
126 |
// i.e. first do a 2x(2x8bit) swap |
---|
127 |
// then a 2x16bit swap |
---|
128 |
|
---|
129 |
__m128i v; |
---|
130 |
while(nb_elements >= 4) { |
---|
131 |
// load the data into the vector unit |
---|
132 |
v = _mm_load_si128((__m128i*)data); |
---|
133 |
// do first swap |
---|
134 |
v = _mm_or_si128( _mm_slli_epi16( v, 8 ), _mm_srli_epi16( v, 8 ) ); //swap it |
---|
135 |
// do second swap |
---|
136 |
v = _mm_or_si128( _mm_slli_epi32( v, 16 ), _mm_srli_epi32( v, 16 ) ); //swap it |
---|
137 |
// store result |
---|
138 |
_mm_store_si128 ((__m128i*)data, v); |
---|
139 |
|
---|
140 |
data += 4; |
---|
141 |
nb_elements -= 4; |
---|
142 |
} |
---|
143 |
|
---|
144 |
// and do the remaining ones |
---|
145 |
while (nb_elements > 0) { |
---|
146 |
*data = htonl(*data); |
---|
147 |
data++; |
---|
148 |
nb_elements--; |
---|
149 |
} |
---|
150 |
} |
---|
151 |
|
---|
152 |
#else |
---|
153 |
|
---|
154 |
static inline void |
---|
155 |
byteSwapToBus(quadlet_t *data, unsigned int nb_elements) |
---|
156 |
{ |
---|
157 |
unsigned int i=0; |
---|
158 |
for(; i<nb_elements; i++) { |
---|
159 |
*data = htonl(*data); |
---|
160 |
data++; |
---|
161 |
} |
---|
162 |
} |
---|
163 |
|
---|
164 |
static inline void |
---|
165 |
byteSwapFromBus(quadlet_t *data, unsigned int nb_elements) |
---|
166 |
{ |
---|
167 |
unsigned int i=0; |
---|
168 |
for(; i<nb_elements; i++) { |
---|
169 |
*data = ntohl(*data); |
---|
170 |
data++; |
---|
171 |
} |
---|
172 |
} |
---|
173 |
|
---|
174 |
#endif // sse2 |
---|
175 |
|
---|
176 |
#endif // byte order |
---|
177 |
|
---|
178 |
#endif // h |
---|