1 |
/* |
---|
2 |
* Copyright (C) 2005-2008 by Pieter Palmers |
---|
3 |
* |
---|
4 |
* This file is part of FFADO |
---|
5 |
* FFADO = Free Firewire (pro-)audio drivers for linux |
---|
6 |
* |
---|
7 |
* FFADO is based upon FreeBoB. |
---|
8 |
* |
---|
9 |
* This program is free software: you can redistribute it and/or modify |
---|
10 |
* it under the terms of the GNU General Public License as published by |
---|
11 |
* the Free Software Foundation, either version 2 of the License, or |
---|
12 |
* (at your option) version 3 of the License. |
---|
13 |
* |
---|
14 |
* This program is distributed in the hope that it will be useful, |
---|
15 |
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
---|
16 |
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
---|
17 |
* GNU General Public License for more details. |
---|
18 |
* |
---|
19 |
* You should have received a copy of the GNU General Public License |
---|
20 |
* along with this program. If not, see <http://www.gnu.org/licenses/>. |
---|
21 |
* |
---|
22 |
*/ |
---|
23 |
|
---|
24 |
#ifndef __FFADO_AMDTPBUFFEROPS__ |
---|
25 |
#define __FFADO_AMDTPBUFFEROPS__ |
---|
26 |
|
---|
27 |
#include <assert.h> |
---|
28 |
// to check for SSE etc... |
---|
29 |
|
---|
30 |
|
---|
31 |
#include <stdio.h> |
---|
32 |
|
---|
33 |
#define AMDTP_FLOAT_MULTIPLIER 2147483392.0 |
---|
34 |
|
---|
35 |
#ifdef __SSE2__ |
---|
36 |
//#if 0 |
---|
37 |
#include <emmintrin.h> |
---|
38 |
#warning SSE2 build |
---|
39 |
|
---|
40 |
//static inline void |
---|
41 |
void |
---|
42 |
convertFromFloatAndLabelAsMBLA(quadlet_t *data, unsigned int nb_elements) |
---|
43 |
{ |
---|
44 |
// Work input until data reaches 16 byte alignment |
---|
45 |
while ((((unsigned long)data) & 0xF) && nb_elements > 0) { |
---|
46 |
float *in = (float *)data; |
---|
47 |
float v = (*in) * AMDTP_FLOAT_MULTIPLIER; |
---|
48 |
unsigned int tmp = ((int) v); |
---|
49 |
tmp = ( tmp >> 8 ) | 0x40000000; |
---|
50 |
*data = (quadlet_t)tmp; |
---|
51 |
data++; |
---|
52 |
nb_elements--; |
---|
53 |
} |
---|
54 |
assert((((unsigned long)data) & 0xF) == 0); |
---|
55 |
|
---|
56 |
// now do the SSE based conversion/labeling |
---|
57 |
__m128i v_int; |
---|
58 |
__m128i label = _mm_set_epi32 (0x40000000, 0x40000000, 0x40000000, 0x40000000); |
---|
59 |
__m128 mult = _mm_set_ps(AMDTP_FLOAT_MULTIPLIER, AMDTP_FLOAT_MULTIPLIER, AMDTP_FLOAT_MULTIPLIER, AMDTP_FLOAT_MULTIPLIER); |
---|
60 |
__m128 v_float; |
---|
61 |
while(nb_elements >= 4) { |
---|
62 |
float *in = (float *)data; |
---|
63 |
// load the data into the vector unit |
---|
64 |
v_float = _mm_load_ps(in); |
---|
65 |
// multiply |
---|
66 |
v_float = _mm_mul_ps(v_float, mult); |
---|
67 |
// convert to signed integer |
---|
68 |
v_int = _mm_cvttps_epi32( v_float ); |
---|
69 |
// shift right 8 bits |
---|
70 |
v_int = _mm_srli_epi32( v_int, 8 ); |
---|
71 |
// label it |
---|
72 |
v_int = _mm_or_si128( v_int, label ); |
---|
73 |
// store result |
---|
74 |
_mm_store_si128 ((__m128i*)data, v_int); |
---|
75 |
|
---|
76 |
data += 4; |
---|
77 |
nb_elements -= 4; |
---|
78 |
} |
---|
79 |
|
---|
80 |
// and do the remaining ones |
---|
81 |
while (nb_elements > 0) { |
---|
82 |
float *in = (float *)data; |
---|
83 |
float v = (*in) * AMDTP_FLOAT_MULTIPLIER; |
---|
84 |
unsigned int tmp = ((int) v); |
---|
85 |
tmp = ( tmp >> 8 ) | 0x40000000; |
---|
86 |
*data = (quadlet_t)tmp; |
---|
87 |
data++; |
---|
88 |
nb_elements--; |
---|
89 |
} |
---|
90 |
} |
---|
91 |
|
---|
92 |
//static inline void |
---|
93 |
void |
---|
94 |
convertFromInt24AndLabelAsMBLA(quadlet_t *data, unsigned int nb_elements) |
---|
95 |
{ |
---|
96 |
// Work input until data reaches 16 byte alignment |
---|
97 |
while ((((unsigned long)data) & 0xF) && nb_elements > 0) { |
---|
98 |
uint32_t in = (uint32_t)(*data); |
---|
99 |
*data = (quadlet_t)((in & 0x00FFFFFF) | 0x40000000); |
---|
100 |
data++; |
---|
101 |
nb_elements--; |
---|
102 |
} |
---|
103 |
assert((((unsigned long)data) & 0xF) == 0); |
---|
104 |
|
---|
105 |
// now do the SSE based labeling |
---|
106 |
__m128i v; |
---|
107 |
const __m128i mask = _mm_set_epi32 (0x00FFFFFF, 0x00FFFFFF, 0x00FFFFFF, 0x00FFFFFF); |
---|
108 |
const __m128i label = _mm_set_epi32 (0x40000000, 0x40000000, 0x40000000, 0x40000000); |
---|
109 |
while(nb_elements >= 4) { |
---|
110 |
// load the data into the vector unit |
---|
111 |
v = _mm_load_si128((__m128i*)data); |
---|
112 |
// mask |
---|
113 |
v = _mm_and_si128( v, mask ); |
---|
114 |
// label |
---|
115 |
v = _mm_or_si128( v, label ); |
---|
116 |
// store result |
---|
117 |
_mm_store_si128 ((__m128i*)data, v); |
---|
118 |
|
---|
119 |
data += 4; |
---|
120 |
nb_elements -= 4; |
---|
121 |
} |
---|
122 |
|
---|
123 |
// and do the remaining ones |
---|
124 |
while (nb_elements > 0) { |
---|
125 |
uint32_t in = (uint32_t)(*data); |
---|
126 |
*data = (quadlet_t)((in & 0x00FFFFFF) | 0x40000000); |
---|
127 |
data++; |
---|
128 |
nb_elements--; |
---|
129 |
} |
---|
130 |
} |
---|
131 |
|
---|
132 |
#else |
---|
133 |
|
---|
134 |
//static inline void |
---|
135 |
void |
---|
136 |
convertFromFloatAndLabelAsMBLA(quadlet_t *data, unsigned int nb_elements) |
---|
137 |
{ |
---|
138 |
unsigned int i=0; |
---|
139 |
for(; i<nb_elements; i++) { |
---|
140 |
// don't care for overflow |
---|
141 |
float *in = (float *)data; |
---|
142 |
float v = (*in) * AMDTP_FLOAT_MULTIPLIER; |
---|
143 |
unsigned int tmp = ((int) v); |
---|
144 |
tmp = ( tmp >> 8 ) | 0x40000000; |
---|
145 |
*data = (quadlet_t)tmp; |
---|
146 |
data++; |
---|
147 |
} |
---|
148 |
} |
---|
149 |
|
---|
150 |
//static inline void |
---|
151 |
void |
---|
152 |
convertFromInt24AndLabelAsMBLA(quadlet_t *data, unsigned int nb_elements) |
---|
153 |
{ |
---|
154 |
unsigned int i=0; |
---|
155 |
for(; i<nb_elements; i++) { |
---|
156 |
uint32_t in = (uint32_t)(*data); |
---|
157 |
*data = (quadlet_t)((in & 0x00FFFFFF) | 0x40000000); |
---|
158 |
data++; |
---|
159 |
} |
---|
160 |
} |
---|
161 |
|
---|
162 |
#endif // sse2 |
---|
163 |
|
---|
164 |
#endif /* __FFADO_AMDTPBUFFEROPS__ */ |
---|