1 |
/* |
---|
2 |
* Copyright (C) 2005-2008 by Pieter Palmers |
---|
3 |
* |
---|
4 |
* This file is part of FFADO |
---|
5 |
* FFADO = Free Firewire (pro-)audio drivers for linux |
---|
6 |
* |
---|
7 |
* FFADO is based upon FreeBoB. |
---|
8 |
* |
---|
9 |
* This program is free software: you can redistribute it and/or modify |
---|
10 |
* it under the terms of the GNU General Public License as published by |
---|
11 |
* the Free Software Foundation, either version 3 of the License, or |
---|
12 |
* (at your option) any later version. |
---|
13 |
* |
---|
14 |
* This program is distributed in the hope that it will be useful, |
---|
15 |
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
---|
16 |
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
---|
17 |
* GNU General Public License for more details. |
---|
18 |
* |
---|
19 |
* You should have received a copy of the GNU General Public License |
---|
20 |
* along with this program. If not, see <http://www.gnu.org/licenses/>. |
---|
21 |
* |
---|
22 |
*/ |
---|
23 |
|
---|
24 |
#ifndef __FFADO_BYTESWAP__ |
---|
25 |
#define __FFADO_BYTESWAP__ |
---|
26 |
|
---|
27 |
#include <netinet/in.h> |
---|
28 |
#include <assert.h> |
---|
29 |
|
---|
30 |
// to check for SSE etc... |
---|
31 |
#include "config.h" |
---|
32 |
|
---|
33 |
#include <stdio.h> |
---|
34 |
|
---|
35 |
#ifdef __SSE2__ |
---|
36 |
#include <emmintrin.h> |
---|
37 |
#warning SSE2 build |
---|
38 |
|
---|
39 |
static inline void |
---|
40 |
byteSwapToBus(quadlet_t *data, unsigned int nb_elements) |
---|
41 |
{ |
---|
42 |
// Work input until data reaches 16 byte alignment |
---|
43 |
while ((((unsigned long)data) & 0xF) && nb_elements > 0) { |
---|
44 |
*data = htonl(*data); |
---|
45 |
data++; |
---|
46 |
nb_elements--; |
---|
47 |
} |
---|
48 |
assert((((unsigned long)data) & 0xF) == 0); |
---|
49 |
|
---|
50 |
// now do the SSE based conversion |
---|
51 |
// we have to go from [A B C D] to [D C B A] |
---|
52 |
// where A, B, C, D are bytes |
---|
53 |
// |
---|
54 |
// the algorithm is: |
---|
55 |
// 1) [A B C D] => [B A D C] |
---|
56 |
// 2) [B A D C] => [D C B A] |
---|
57 |
// |
---|
58 |
// i.e. first do a 2x(2x8bit) swap |
---|
59 |
// then a 2x16bit swap |
---|
60 |
|
---|
61 |
__m128i v; |
---|
62 |
while(nb_elements >= 4) { |
---|
63 |
// prefetch the data for the next round |
---|
64 |
__builtin_prefetch(data+128, 0, 0); |
---|
65 |
|
---|
66 |
// load the data into the vector unit |
---|
67 |
v = _mm_load_si128((__m128i*)data); |
---|
68 |
// do first swap |
---|
69 |
v = _mm_or_si128( _mm_slli_epi16( v, 8 ), _mm_srli_epi16( v, 8 ) ); //swap it |
---|
70 |
// do second swap |
---|
71 |
v = _mm_or_si128( _mm_slli_epi32( v, 16 ), _mm_srli_epi32( v, 16 ) ); //swap it |
---|
72 |
// store result |
---|
73 |
_mm_store_si128 ((__m128i*)data, v); |
---|
74 |
|
---|
75 |
data += 4; |
---|
76 |
nb_elements -= 4; |
---|
77 |
} |
---|
78 |
|
---|
79 |
// and do the remaining ones |
---|
80 |
while (nb_elements > 0) { |
---|
81 |
*data = htonl(*data); |
---|
82 |
data++; |
---|
83 |
nb_elements--; |
---|
84 |
} |
---|
85 |
} |
---|
86 |
|
---|
87 |
static inline void |
---|
88 |
byteSwapFromBus(quadlet_t *data, unsigned int nb_elements) |
---|
89 |
{ |
---|
90 |
// Work input until data reaches 16 byte alignment |
---|
91 |
while ((((unsigned long)data) & 0xF) && nb_elements > 0) { |
---|
92 |
*data = htonl(*data); |
---|
93 |
data++; |
---|
94 |
nb_elements--; |
---|
95 |
} |
---|
96 |
assert((((unsigned long)data) & 0xF) == 0); |
---|
97 |
|
---|
98 |
// now do the SSE based conversion |
---|
99 |
// we have to go from [A B C D] to [D C B A] |
---|
100 |
// where A, B, C, D are bytes |
---|
101 |
// |
---|
102 |
// the algorithm is: |
---|
103 |
// 1) [A B C D] => [B A D C] |
---|
104 |
// 2) [B A D C] => [D C B A] |
---|
105 |
// |
---|
106 |
// i.e. first do a 2x(2x8bit) swap |
---|
107 |
// then a 2x16bit swap |
---|
108 |
|
---|
109 |
__m128i v; |
---|
110 |
while(nb_elements >= 4) { |
---|
111 |
// prefetch the data for the next round |
---|
112 |
__builtin_prefetch(data+128, 0, 0); |
---|
113 |
|
---|
114 |
// load the data into the vector unit |
---|
115 |
v = _mm_load_si128((__m128i*)data); |
---|
116 |
// do first swap |
---|
117 |
v = _mm_or_si128( _mm_slli_epi16( v, 8 ), _mm_srli_epi16( v, 8 ) ); //swap it |
---|
118 |
// do second swap |
---|
119 |
v = _mm_or_si128( _mm_slli_epi32( v, 16 ), _mm_srli_epi32( v, 16 ) ); //swap it |
---|
120 |
// store result |
---|
121 |
_mm_store_si128 ((__m128i*)data, v); |
---|
122 |
|
---|
123 |
data += 4; |
---|
124 |
nb_elements -= 4; |
---|
125 |
} |
---|
126 |
|
---|
127 |
// and do the remaining ones |
---|
128 |
while (nb_elements > 0) { |
---|
129 |
*data = htonl(*data); |
---|
130 |
data++; |
---|
131 |
nb_elements--; |
---|
132 |
} |
---|
133 |
} |
---|
134 |
|
---|
135 |
#else |
---|
136 |
|
---|
137 |
static inline void |
---|
138 |
byteSwapToBus(quadlet_t *data, unsigned int nb_elements) |
---|
139 |
{ |
---|
140 |
unsigned int i=0; |
---|
141 |
for(; i<nb_elements; i++) { |
---|
142 |
*data = htonl(*data); |
---|
143 |
data++; |
---|
144 |
} |
---|
145 |
} |
---|
146 |
|
---|
147 |
static inline void |
---|
148 |
byteSwapFromBus(quadlet_t *data, unsigned int nb_elements) |
---|
149 |
{ |
---|
150 |
unsigned int i=0; |
---|
151 |
for(; i<nb_elements; i++) { |
---|
152 |
*data = ntohl(*data); |
---|
153 |
data++; |
---|
154 |
} |
---|
155 |
} |
---|
156 |
|
---|
157 |
#endif |
---|
158 |
|
---|
159 |
#endif |
---|