root/trunk/libffado/src/libutil/ByteSwap.h

Revision 864, 4.3 kB (checked in by ppalmers, 16 years ago)

update license to GPLv2 or GPLv3 instead of GPLv2 or any later version. Update copyrights to reflect the new year

Line 
1 /*
2  * Copyright (C) 2005-2008 by Pieter Palmers
3  *
4  * This file is part of FFADO
5  * FFADO = Free Firewire (pro-)audio drivers for linux
6  *
7  * FFADO is based upon FreeBoB.
8  *
9  * This program is free software: you can redistribute it and/or modify
10  * it under the terms of the GNU General Public License as published by
11  * the Free Software Foundation, either version 2 of the License, or
12  * (at your option) version 3 of the License.
13  *
14  * This program is distributed in the hope that it will be useful,
15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17  * GNU General Public License for more details.
18  *
19  * You should have received a copy of the GNU General Public License
20  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
21  *
22  */
23
24 #ifndef __FFADO_BYTESWAP__
25 #define __FFADO_BYTESWAP__
26
27 #include <netinet/in.h>
28 #include <endian.h>
29 #include <assert.h>
30
31 // to check for SSE etc...
32 #include "config.h"
33
34 #include <stdio.h>
35
36 #if __BYTE_ORDER == __BIG_ENDIAN
37
38 // no-op for big endian machines
39 static inline void
40 byteSwapToBus(quadlet_t *data, unsigned int nb_elements)
41 {
42     return;
43 }
44
45 static inline void
46 byteSwapFromBus(quadlet_t *data, unsigned int nb_elements)
47 {
48     return;
49 }
50
51 #else
52
53 #ifdef __SSE2__
54 #include <emmintrin.h>
55 #warning SSE2 build
56
57 //static inline void
58 void
59 byteSwapToBus(quadlet_t *data, unsigned int nb_elements)
60 {
61     // Work input until data reaches 16 byte alignment
62     while ((((unsigned long)data) & 0xF) && nb_elements > 0) {
63         *data = htonl(*data);
64         data++;
65         nb_elements--;
66     }
67     assert((((unsigned long)data) & 0xF) == 0);
68
69     // now do the SSE based conversion
70     // we have to go from [A B C D] to [D C B A]
71     // where A, B, C, D are bytes
72     //
73     // the algorithm is:
74     // 1) [A B C D] => [B A D C]
75     // 2) [B A D C] => [D C B A]
76     //
77     // i.e. first do a 2x(2x8bit) swap
78     // then a 2x16bit swap
79    
80     __m128i v;
81     while(nb_elements >= 4) {
82         // prefetch the data for the next round
83          __builtin_prefetch(data+128, 0, 0);
84
85         // load the data into the vector unit
86         v = _mm_load_si128((__m128i*)data);
87         // do first swap
88         v = _mm_or_si128( _mm_slli_epi16( v, 8 ), _mm_srli_epi16( v, 8 ) ); //swap it
89         // do second swap
90         v = _mm_or_si128( _mm_slli_epi32( v, 16 ), _mm_srli_epi32( v, 16 ) ); //swap it
91         // store result
92         _mm_store_si128 ((__m128i*)data, v);
93        
94         data += 4;
95         nb_elements -= 4;
96     }
97
98     // and do the remaining ones
99     while (nb_elements > 0) {
100         *data = htonl(*data);
101         data++;
102         nb_elements--;
103     }
104 }
105
106 //static inline void
107 void
108 byteSwapFromBus(quadlet_t *data, unsigned int nb_elements)
109 {
110     // Work input until data reaches 16 byte alignment
111     while ((((unsigned long)data) & 0xF) && nb_elements > 0) {
112         *data = htonl(*data);
113         data++;
114         nb_elements--;
115     }
116     assert((((unsigned long)data) & 0xF) == 0);
117
118     // now do the SSE based conversion
119     // we have to go from [A B C D] to [D C B A]
120     // where A, B, C, D are bytes
121     //
122     // the algorithm is:
123     // 1) [A B C D] => [B A D C]
124     // 2) [B A D C] => [D C B A]
125     //
126     // i.e. first do a 2x(2x8bit) swap
127     // then a 2x16bit swap
128    
129     __m128i v;
130     while(nb_elements >= 4) {
131         // load the data into the vector unit
132         v = _mm_load_si128((__m128i*)data);
133         // do first swap
134         v = _mm_or_si128( _mm_slli_epi16( v, 8 ), _mm_srli_epi16( v, 8 ) ); //swap it
135         // do second swap
136         v = _mm_or_si128( _mm_slli_epi32( v, 16 ), _mm_srli_epi32( v, 16 ) ); //swap it
137         // store result
138         _mm_store_si128 ((__m128i*)data, v);
139        
140         data += 4;
141         nb_elements -= 4;
142     }
143
144     // and do the remaining ones
145     while (nb_elements > 0) {
146         *data = htonl(*data);
147         data++;
148         nb_elements--;
149     }
150 }
151
152 #else
153
154 static inline void
155 byteSwapToBus(quadlet_t *data, unsigned int nb_elements)
156 {
157     unsigned int i=0;
158     for(; i<nb_elements; i++) {
159         *data = htonl(*data);
160         data++;
161     }
162 }
163
164 static inline void
165 byteSwapFromBus(quadlet_t *data, unsigned int nb_elements)
166 {
167     unsigned int i=0;
168     for(; i<nb_elements; i++) {
169         *data = ntohl(*data);
170         data++;
171     }
172 }
173
174 #endif // sse2
175
176 #endif // byte order
177
178 #endif // h
Note: See TracBrowser for help on using the browser.