# root/trunk/libffado/src/libstreaming/amdtp/AmdtpBufferOps.h

Revision 849, 4.6 kB (checked in by ppalmers, 16 years ago) |
---|

Line | |
---|---|

1 | /* |

2 | * Copyright (C) 2005-2008 by Pieter Palmers |

3 | * |

4 | * This file is part of FFADO |

5 | * FFADO = Free Firewire (pro-)audio drivers for linux |

6 | * |

7 | * FFADO is based upon FreeBoB. |

8 | * |

9 | * This program is free software: you can redistribute it and/or modify |

10 | * it under the terms of the GNU General Public License as published by |

11 | * the Free Software Foundation, either version 3 of the License, or |

12 | * (at your option) any later version. |

13 | * |

14 | * This program is distributed in the hope that it will be useful, |

15 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |

16 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |

17 | * GNU General Public License for more details. |

18 | * |

19 | * You should have received a copy of the GNU General Public License |

20 | * along with this program. If not, see <http://www.gnu.org/licenses/>. |

21 | * |

22 | */ |

23 | |

24 | #ifndef __FFADO_AMDTPBUFFEROPS__ |

25 | #define __FFADO_AMDTPBUFFEROPS__ |

26 | |

27 | #include <assert.h> |

28 | // to check for SSE etc... |

29 | #include "config.h" |

30 | |

31 | #include <stdio.h> |

32 | |

33 | #define AMDTP_FLOAT_MULTIPLIER 2147483392.0 |

34 | |

35 | #ifdef __SSE2__ |

36 | //#if 0 |

37 | #include <emmintrin.h> |

38 | #warning SSE2 build |

39 | |

40 | //static inline void |

41 | void |

42 | convertFromFloatAndLabelAsMBLA(quadlet_t *data, unsigned int nb_elements) |

43 | { |

44 | // Work input until data reaches 16 byte alignment |

45 | while ((((unsigned long)data) & 0xF) && nb_elements > 0) { |

46 | float *in = (float *)data; |

47 | float v = (*in) * AMDTP_FLOAT_MULTIPLIER; |

48 | unsigned int tmp = ((int) v); |

49 | tmp = ( tmp >> 8 ) | 0x40000000; |

50 | *data = (quadlet_t)tmp; |

51 | data++; |

52 | nb_elements--; |

53 | } |

54 | assert((((unsigned long)data) & 0xF) == 0); |

55 | |

56 | // now do the SSE based conversion/labeling |

57 | __m128i v_int; |

58 | __m128i label = _mm_set_epi32 (0x40000000, 0x40000000, 0x40000000, 0x40000000); |

59 | __m128 mult = _mm_set_ps(AMDTP_FLOAT_MULTIPLIER, AMDTP_FLOAT_MULTIPLIER, AMDTP_FLOAT_MULTIPLIER, AMDTP_FLOAT_MULTIPLIER); |

60 | __m128 v_float; |

61 | while(nb_elements >= 4) { |

62 | float *in = (float *)data; |

63 | // load the data into the vector unit |

64 | v_float = _mm_load_ps(in); |

65 | // multiply |

66 | v_float = _mm_mul_ps(v_float, mult); |

67 | // convert to signed integer |

68 | v_int = _mm_cvttps_epi32( v_float ); |

69 | // shift right 8 bits |

70 | v_int = _mm_srli_epi32( v_int, 8 ); |

71 | // label it |

72 | v_int = _mm_or_si128( v_int, label ); |

73 | // store result |

74 | _mm_store_si128 ((__m128i*)data, v_int); |

75 | |

76 | data += 4; |

77 | nb_elements -= 4; |

78 | } |

79 | |

80 | // and do the remaining ones |

81 | while (nb_elements > 0) { |

82 | float *in = (float *)data; |

83 | float v = (*in) * AMDTP_FLOAT_MULTIPLIER; |

84 | unsigned int tmp = ((int) v); |

85 | tmp = ( tmp >> 8 ) | 0x40000000; |

86 | *data = (quadlet_t)tmp; |

87 | data++; |

88 | nb_elements--; |

89 | } |

90 | } |

91 | |

92 | //static inline void |

93 | void |

94 | convertFromInt24AndLabelAsMBLA(quadlet_t *data, unsigned int nb_elements) |

95 | { |

96 | // Work input until data reaches 16 byte alignment |

97 | while ((((unsigned long)data) & 0xF) && nb_elements > 0) { |

98 | uint32_t in = (uint32_t)(*data); |

99 | *data = (quadlet_t)((in & 0x00FFFFFF) | 0x40000000); |

100 | data++; |

101 | nb_elements--; |

102 | } |

103 | assert((((unsigned long)data) & 0xF) == 0); |

104 | |

105 | // now do the SSE based labeling |

106 | __m128i v; |

107 | const __m128i mask = _mm_set_epi32 (0x00FFFFFF, 0x00FFFFFF, 0x00FFFFFF, 0x00FFFFFF); |

108 | const __m128i label = _mm_set_epi32 (0x40000000, 0x40000000, 0x40000000, 0x40000000); |

109 | while(nb_elements >= 4) { |

110 | // load the data into the vector unit |

111 | v = _mm_load_si128((__m128i*)data); |

112 | // mask |

113 | v = _mm_and_si128( v, mask ); |

114 | // label |

115 | v = _mm_or_si128( v, label ); |

116 | // store result |

117 | _mm_store_si128 ((__m128i*)data, v); |

118 | |

119 | data += 4; |

120 | nb_elements -= 4; |

121 | } |

122 | |

123 | // and do the remaining ones |

124 | while (nb_elements > 0) { |

125 | uint32_t in = (uint32_t)(*data); |

126 | *data = (quadlet_t)((in & 0x00FFFFFF) | 0x40000000); |

127 | data++; |

128 | nb_elements--; |

129 | } |

130 | } |

131 | |

132 | #else |

133 | |

134 | //static inline void |

135 | void |

136 | convertFromFloatAndLabelAsMBLA(quadlet_t *data, unsigned int nb_elements) |

137 | { |

138 | unsigned int i=0; |

139 | for(; i<nb_elements; i++) { |

140 | // don't care for overflow |

141 | float *in = (float *)data; |

142 | float v = (*in) * AMDTP_FLOAT_MULTIPLIER; |

143 | unsigned int tmp = ((int) v); |

144 | tmp = ( tmp >> 8 ) | 0x40000000; |

145 | *data = (quadlet_t)tmp; |

146 | data++; |

147 | } |

148 | } |

149 | |

150 | //static inline void |

151 | void |

152 | convertFromInt24AndLabelAsMBLA(quadlet_t *data, unsigned int nb_elements) |

153 | { |

154 | unsigned int i=0; |

155 | for(; i<nb_elements; i++) { |

156 | uint32_t in = (uint32_t)(*data); |

157 | *data = (quadlet_t)((in & 0x00FFFFFF) | 0x40000000); |

158 | data++; |

159 | } |

160 | } |

161 | |

162 | #endif // sse2 |

163 | |

164 | #endif /* __FFADO_AMDTPBUFFEROPS__ */ |

**Note:**See TracBrowser for help on using the browser.