root/trunk/libffado/src/libstreaming/amdtp/AmdtpTransmitStreamProcessor.cpp

Revision 1336, 39.6 kB (checked in by ppalmers, 14 years ago)

Bring trunk up to date with branches/libffado-2.0:

"""
svn merge -r 1254:1299 svn+ssh://ffadosvn@ffado.org/ffado/branches/libffado-2.0
svn merge -r 1301:1320 svn+ssh://ffadosvn@ffado.org/ffado/branches/libffado-2.0
svn merge -r 1322:1323 svn+ssh://ffadosvn@ffado.org/ffado/branches/libffado-2.0
svn merge -r 1329:HEAD svn+ssh://ffadosvn@ffado.org/ffado/branches/libffado-2.0
"""

Add getSupportedSamplingFrequencies() to DICE, RME and Metric Halo AvDevices?

Line 
1 /*
2  * Copyright (C) 2005-2008 by Pieter Palmers
3  *
4  * This file is part of FFADO
5  * FFADO = Free Firewire (pro-)audio drivers for linux
6  *
7  * FFADO is based upon FreeBoB.
8  *
9  * This program is free software: you can redistribute it and/or modify
10  * it under the terms of the GNU General Public License as published by
11  * the Free Software Foundation, either version 2 of the License, or
12  * (at your option) version 3 of the License.
13  *
14  * This program is distributed in the hope that it will be useful,
15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17  * GNU General Public License for more details.
18  *
19  * You should have received a copy of the GNU General Public License
20  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
21  *
22  */
23
24 #include "config.h"
25
26 #include "AmdtpTransmitStreamProcessor.h"
27 #include "AmdtpPort.h"
28 #include "../StreamProcessorManager.h"
29 #include "devicemanager.h"
30
31 #include "libutil/Time.h"
32 #include "libutil/float_cast.h"
33
34 #include "libieee1394/ieee1394service.h"
35 #include "libieee1394/IsoHandlerManager.h"
36 #include "libieee1394/cycletimer.h"
37
38 #include "libutil/ByteSwap.h"
39 #include <assert.h>
40 #include <cstring>
41
42 #define AMDTP_FLOAT_MULTIPLIER 2147483392.0
43
44 namespace Streaming
45 {
46
47 /* transmit */
48 AmdtpTransmitStreamProcessor::AmdtpTransmitStreamProcessor(FFADODevice &parent, int dimension)
49         : StreamProcessor(parent, ePT_Transmit)
50         , m_dimension( dimension )
51         , m_dbc( 0 )
52 #if AMDTP_ALLOW_PAYLOAD_IN_NODATA_XMIT
53         , m_send_nodata_payload ( AMDTP_SEND_PAYLOAD_IN_NODATA_XMIT_BY_DEFAULT )
54 #endif
55         , m_nb_audio_ports( 0 )
56         , m_nb_midi_ports( 0 )
57 {}
58
59 enum StreamProcessor::eChildReturnValue
60 AmdtpTransmitStreamProcessor::generatePacketHeader (
61     unsigned char *data, unsigned int *length,
62     unsigned char *tag, unsigned char *sy,
63     uint32_t pkt_ctr )
64 {
65     __builtin_prefetch(data, 1, 0); // prefetch events for write, no temporal locality
66     struct iec61883_packet *packet = (struct iec61883_packet *)data;
67     /* Our node ID can change after a bus reset, so it is best to fetch
68     * our node ID for each packet. */
69     packet->sid = m_local_node_id;
70
71     packet->dbs = m_dimension;
72     packet->fn = 0;
73     packet->qpc = 0;
74     packet->sph = 0;
75     packet->reserved = 0;
76     packet->dbc = m_dbc;
77     packet->eoh1 = 2;
78     packet->fmt = IEC61883_FMT_AMDTP;
79
80     *tag = IEC61883_TAG_WITH_CIP;
81     *sy = 0;
82
83     signed int fc;
84     uint64_t presentation_time;
85     unsigned int presentation_cycle;
86     int cycles_until_presentation;
87
88     uint64_t transmit_at_time;
89     unsigned int transmit_at_cycle;
90     int cycles_until_transmit;
91
92     debugOutputExtreme( DEBUG_LEVEL_ULTRA_VERBOSE,
93                         "Try for cycle %d\n", CYCLE_TIMER_GET_CYCLES(pkt_ctr) );
94     // check whether the packet buffer has packets for us to send.
95     // the base timestamp is the one of the next sample in the buffer
96     ffado_timestamp_t ts_head_tmp;
97     m_data_buffer->getBufferHeadTimestamp( &ts_head_tmp, &fc ); // thread safe
98
99     // the timestamp gives us the time at which we want the sample block
100     // to be output by the device
101     presentation_time = ( uint64_t ) ts_head_tmp;
102
103     // now we calculate the time when we have to transmit the sample block
104     transmit_at_time = substractTicks( presentation_time, AMDTP_TRANSMIT_TRANSFER_DELAY );
105
106     // calculate the cycle this block should be presented in
107     // (this is just a virtual calculation since at that time it should
108     //  already be in the device's buffer)
109     presentation_cycle = ( unsigned int ) ( TICKS_TO_CYCLES ( presentation_time ) );
110
111     // calculate the cycle this block should be transmitted in
112     transmit_at_cycle = ( unsigned int ) ( TICKS_TO_CYCLES ( transmit_at_time ) );
113
114     // we can check whether this cycle is within the 'window' we have
115     // to send this packet.
116     // first calculate the number of cycles left before presentation time
117     cycles_until_presentation = diffCycles ( presentation_cycle, CYCLE_TIMER_GET_CYCLES(pkt_ctr) );
118
119     // we can check whether this cycle is within the 'window' we have
120     // to send this packet.
121     // first calculate the number of cycles left before presentation time
122     cycles_until_transmit = diffCycles ( transmit_at_cycle, CYCLE_TIMER_GET_CYCLES(pkt_ctr) );
123
124     // two different options:
125     // 1) there are not enough frames for one packet
126     //      => determine wether this is a problem, since we might still
127     //         have some time to send it
128     // 2) there are enough packets
129     //      => determine whether we have to send them in this packet
130     if ( fc < ( signed int ) m_syt_interval )
131     {
132         // not enough frames in the buffer,
133
134         // we can still postpone the queueing of the packets
135         // if we are far enough ahead of the presentation time
136         if ( cycles_until_presentation <= AMDTP_MIN_CYCLES_BEFORE_PRESENTATION )
137         {
138             debugOutput( DEBUG_LEVEL_NORMAL,
139                          "Insufficient frames (P): N=%02d, CY=%04u, TC=%04u, CUT=%04d\n",
140                          fc, CYCLE_TIMER_GET_CYCLES(pkt_ctr),
141                          transmit_at_cycle, cycles_until_transmit );
142             // we are too late
143             return eCRV_XRun;
144         }
145         else
146         {
147             #if DEBUG_EXTREME
148             unsigned int now_cycle = ( unsigned int ) ( TICKS_TO_CYCLES ( m_1394service.getCycleTimerTicks() ) );
149
150             debugOutputExtreme(DEBUG_LEVEL_VERBOSE,
151                                "Insufficient frames (NP): N=%02d, CY=%04u, TC=%04u, CUT=%04d, NOW=%04d\n",
152                                fc, CYCLE_TIMER_GET_CYCLES(pkt_ctr),
153                                transmit_at_cycle, cycles_until_transmit, now_cycle );
154             #endif
155
156             // there is still time left to send the packet
157             // we want the system to give this packet another go at a later time instant
158             return eCRV_Again; // note that the raw1394 again system doesn't work as expected
159
160             // we could wait here for a certain time before trying again. However, this
161             // is not going to work since we then block the iterator thread, hence also
162             // the receiving code, meaning that we are not processing received packets,
163             // and hence there is no progression in the number of frames available.
164
165             // for example:
166             // SleepRelativeUsec(125); // one cycle
167             // goto try_block_of_frames;
168
169             // or more advanced, calculate how many cycles we are ahead of 'now' and
170             // base the sleep on that.
171
172             // note that this requires that there is one thread for each IsoHandler,
173             // otherwise we're in the deadlock described above.
174         }
175     }
176     else
177     {
178         // there are enough frames, so check the time they are intended for
179         // all frames have a certain 'time window' in which they can be sent
180         // this corresponds to the range of the timestamp mechanism:
181         // we can send a packet 15 cycles in advance of the 'presentation time'
182         // in theory we can send the packet up till one cycle before the presentation time,
183         // however this is not very smart.
184
185         // There are 3 options:
186         // 1) the frame block is too early
187         //      => send an empty packet
188         // 2) the frame block is within the window
189         //      => send it
190         // 3) the frame block is too late
191         //      => discard (and raise xrun?)
192         //         get next block of frames and repeat
193
194         if(cycles_until_transmit < 0)
195         {
196             // we are too late
197             debugOutput(DEBUG_LEVEL_VERBOSE,
198                         "Too late: CY=%04u, TC=%04u, CUT=%04d, TSP=%011llu (%04u)\n",
199                         CYCLE_TIMER_GET_CYCLES(pkt_ctr),
200                         transmit_at_cycle, cycles_until_transmit,
201                         presentation_time, (unsigned int)TICKS_TO_CYCLES(presentation_time) );
202             //debugShowBackLogLines(200);
203             // however, if we can send this sufficiently before the presentation
204             // time, it could be harmless.
205             // NOTE: dangerous since the device has no way of reporting that it didn't get
206             //       this packet on time.
207             if(cycles_until_presentation >= AMDTP_MIN_CYCLES_BEFORE_PRESENTATION)
208             {
209                 // we are not that late and can still try to transmit the packet
210                 m_dbc += fillDataPacketHeader(packet, length, presentation_time);
211                 m_last_timestamp = presentation_time;
212                 return (fc < (signed)(2*m_syt_interval) ? eCRV_Defer : eCRV_Packet);
213             }
214             else   // definitely too late
215             {
216                 return eCRV_XRun;
217             }
218         }
219         else if(cycles_until_transmit <= AMDTP_MAX_CYCLES_TO_TRANSMIT_EARLY)
220         {
221             // it's time send the packet
222             m_dbc += fillDataPacketHeader(packet, length, presentation_time);
223             m_last_timestamp = presentation_time;
224
225             // for timestamp tracing
226             debugOutputExtreme(DEBUG_LEVEL_VERY_VERBOSE,
227                                "XMIT PKT: TSP= %011llu (%04u) (%04u) (%04u)\n",
228                                presentation_time,
229                                (unsigned int)CYCLE_TIMER_GET_CYCLES(pkt_ctr),
230                                presentation_cycle, transmit_at_cycle);
231
232             return (fc < (signed)(m_syt_interval) ? eCRV_Defer : eCRV_Packet);
233         }
234         else
235         {
236             debugOutputExtreme(DEBUG_LEVEL_VERY_VERBOSE,
237                                "Too early: CY=%04u, TC=%04u, CUT=%04d, TST=%011llu (%04u), TSP=%011llu (%04u)\n",
238                                CYCLE_TIMER_GET_CYCLES(pkt_ctr),
239                                transmit_at_cycle, cycles_until_transmit,
240                                transmit_at_time, (unsigned int)TICKS_TO_CYCLES(transmit_at_time),
241                                presentation_time, (unsigned int)TICKS_TO_CYCLES(presentation_time));
242 #ifdef DEBUG
243             if ( cycles_until_transmit > AMDTP_MAX_CYCLES_TO_TRANSMIT_EARLY + 1 )
244             {
245                 debugOutputExtreme(DEBUG_LEVEL_VERY_VERBOSE,
246                                    "Way too early: CY=%04u, TC=%04u, CUT=%04d, TST=%011llu (%04u), TSP=%011llu (%04u)\n",
247                                    CYCLE_TIMER_GET_CYCLES(pkt_ctr),
248                                    transmit_at_cycle, cycles_until_transmit,
249                                    transmit_at_time, (unsigned int)TICKS_TO_CYCLES(transmit_at_time),
250                                    presentation_time, (unsigned int)TICKS_TO_CYCLES(presentation_time));
251             }
252 #endif
253             // we are too early, send only an empty packet
254             return eCRV_EmptyPacket;
255         }
256     }
257     return eCRV_Invalid;
258 }
259
260 enum StreamProcessor::eChildReturnValue
261 AmdtpTransmitStreamProcessor::generatePacketData (
262     unsigned char *data, unsigned int *length )
263 {
264     if (m_data_buffer->readFrames(m_syt_interval, (char *)(data + 8)))
265     {
266         debugOutputExtreme(DEBUG_LEVEL_VERY_VERBOSE,
267                            "XMIT DATA: TSP= %011llu (%04u)\n",
268                            m_last_timestamp,
269                            (unsigned int)TICKS_TO_CYCLES(m_last_timestamp));
270         return eCRV_OK;
271     }
272     else return eCRV_XRun;
273 }
274
275 enum StreamProcessor::eChildReturnValue
276 AmdtpTransmitStreamProcessor::generateSilentPacketHeader (
277     unsigned char *data, unsigned int *length,
278     unsigned char *tag, unsigned char *sy,
279     uint32_t pkt_ctr )
280 {
281     struct iec61883_packet *packet = ( struct iec61883_packet * ) data;
282     debugOutputExtreme(DEBUG_LEVEL_ULTRA_VERBOSE,
283                        "XMIT SILENT (cy %04d): CY=%04u, TSP=%011llu (%04u)\n",
284                        CYCLE_TIMER_GET_CYCLES(pkt_ctr), m_last_timestamp,
285                        (unsigned int)TICKS_TO_CYCLES(m_last_timestamp));
286
287     packet->sid = m_local_node_id;
288
289     packet->dbs = m_dimension;
290     packet->fn = 0;
291     packet->qpc = 0;
292     packet->sph = 0;
293     packet->reserved = 0;
294     packet->dbc = m_dbc;
295     packet->eoh1 = 2;
296     packet->fmt = IEC61883_FMT_AMDTP;
297
298     *tag = IEC61883_TAG_WITH_CIP;
299     *sy = 0;
300
301     m_dbc += fillNoDataPacketHeader(packet, length);
302     return eCRV_Packet;
303 }
304
305 enum StreamProcessor::eChildReturnValue
306 AmdtpTransmitStreamProcessor::generateSilentPacketData (
307     unsigned char *data, unsigned int *length )
308 {
309     return eCRV_OK; // no need to do anything
310 }
311
312 enum StreamProcessor::eChildReturnValue
313 AmdtpTransmitStreamProcessor::generateEmptyPacketHeader (
314     unsigned char *data, unsigned int *length,
315     unsigned char *tag, unsigned char *sy,
316     uint32_t pkt_ctr )
317 {
318     struct iec61883_packet *packet = ( struct iec61883_packet * ) data;
319     debugOutputExtreme(DEBUG_LEVEL_ULTRA_VERBOSE,
320                        "XMIT EMPTY (cy %04d): CY=%04u, TSP=%011llu (%04u)\n",
321                        CYCLE_TIMER_GET_CYCLES(pkt_ctr), m_last_timestamp,
322                        (unsigned int)TICKS_TO_CYCLES(m_last_timestamp) );
323     packet->sid = m_local_node_id;
324
325     packet->dbs = m_dimension;
326     packet->fn = 0;
327     packet->qpc = 0;
328     packet->sph = 0;
329     packet->reserved = 0;
330     packet->dbc = m_dbc;
331     packet->eoh1 = 2;
332     packet->fmt = IEC61883_FMT_AMDTP;
333
334     *tag = IEC61883_TAG_WITH_CIP;
335     *sy = 0;
336
337     m_dbc += fillNoDataPacketHeader(packet, length);
338     return eCRV_OK;
339 }
340
341 enum StreamProcessor::eChildReturnValue
342 AmdtpTransmitStreamProcessor::generateEmptyPacketData (
343     unsigned char *data, unsigned int *length )
344 {
345     return eCRV_OK; // no need to do anything
346 }
347
348 unsigned int AmdtpTransmitStreamProcessor::fillDataPacketHeader (
349     struct iec61883_packet *packet, unsigned int* length,
350     uint32_t ts )
351 {
352
353     packet->fdf = m_fdf;
354
355     // convert the timestamp to SYT format
356     uint16_t timestamp_SYT = TICKS_TO_SYT ( ts );
357     packet->syt = CondSwapToBus16 ( timestamp_SYT );
358
359     // FIXME: use a precomputed value here
360     *length = m_syt_interval*sizeof ( quadlet_t ) *m_dimension + 8;
361
362     return m_syt_interval;
363 }
364
365 unsigned int AmdtpTransmitStreamProcessor::fillNoDataPacketHeader (
366     struct iec61883_packet *packet, unsigned int* length )
367 {
368     // no-data packets have syt=0xFFFF
369     // and (can) have the usual amount of events as dummy data
370     // DBC is not increased
371     packet->fdf = IEC61883_FDF_NODATA;
372     packet->syt = 0xffff;
373
374 #if AMDTP_ALLOW_PAYLOAD_IN_NODATA_XMIT
375     if ( m_send_nodata_payload )
376     { // no-data packets with payload (NOTE: DICE-II doesn't like that)
377         *length = 2*sizeof ( quadlet_t ) + m_syt_interval * m_dimension * sizeof ( quadlet_t );
378         return m_syt_interval;
379     } else { // no-data packets without payload
380         *length = 2*sizeof ( quadlet_t );
381         return 0;
382     }
383 #else
384     // no-data packets without payload
385     *length = 2*sizeof ( quadlet_t );
386     return 0;
387 #endif
388 }
389
390 unsigned int
391 AmdtpTransmitStreamProcessor::getSytInterval() {
392     switch (m_StreamProcessorManager.getNominalRate()) {
393         case 32000:
394         case 44100:
395         case 48000:
396             return 8;
397         case 88200:
398         case 96000:
399             return 16;
400         case 176400:
401         case 192000:
402             return 32;
403         default:
404             debugError("Unsupported rate: %d\n", m_StreamProcessorManager.getNominalRate());
405             return 0;
406     }
407 }
408 unsigned int
409 AmdtpTransmitStreamProcessor::getFDF() {
410     switch (m_StreamProcessorManager.getNominalRate()) {
411         case 32000: return IEC61883_FDF_SFC_32KHZ;
412         case 44100: return IEC61883_FDF_SFC_44K1HZ;
413         case 48000: return IEC61883_FDF_SFC_48KHZ;
414         case 88200: return IEC61883_FDF_SFC_88K2HZ;
415         case 96000: return IEC61883_FDF_SFC_96KHZ;
416         case 176400: return IEC61883_FDF_SFC_176K4HZ;
417         case 192000: return IEC61883_FDF_SFC_192KHZ;
418         default:
419             debugError("Unsupported rate: %d\n", m_StreamProcessorManager.getNominalRate());
420             return 0;
421     }
422 }
423
424 bool AmdtpTransmitStreamProcessor::prepareChild()
425 {
426     debugOutput ( DEBUG_LEVEL_VERBOSE, "Preparing (%p)...\n", this );
427     m_syt_interval = getSytInterval();
428     m_fdf = getFDF();
429
430     iec61883_cip_init (
431         &m_cip_status,
432         IEC61883_FMT_AMDTP,
433         m_fdf,
434         m_StreamProcessorManager.getNominalRate(),
435         m_dimension,
436         m_syt_interval );
437
438     if (!initPortCache()) {
439         debugError("Could not init port cache\n");
440         return false;
441     }
442
443     return true;
444 }
445
446 /*
447 * compose the event streams for the packets from the port buffers
448 */
449 bool AmdtpTransmitStreamProcessor::processWriteBlock ( char *data,
450         unsigned int nevents, unsigned int offset )
451 {
452     // update the variable parts of the cache
453     updatePortCache();
454
455     // encode audio data
456     switch(m_StreamProcessorManager.getAudioDataType()) {
457         case StreamProcessorManager::eADT_Int24:
458             encodeAudioPortsInt24((quadlet_t *)data, offset, nevents);
459             break;
460         case StreamProcessorManager::eADT_Float:
461             encodeAudioPortsFloat((quadlet_t *)data, offset, nevents);
462             break;
463     }
464
465     // do midi ports
466     encodeMidiPorts((quadlet_t *)data, offset, nevents);
467     return true;
468 }
469
470 bool
471 AmdtpTransmitStreamProcessor::transmitSilenceBlock(
472     char *data, unsigned int nevents, unsigned int offset)
473 {
474     // no need to update the port cache when transmitting silence since
475     // no dynamic values are used to do so.
476     encodeAudioPortsSilence((quadlet_t *)data, offset, nevents);
477     encodeMidiPortsSilence((quadlet_t *)data, offset, nevents);
478     return true;
479 }
480
481 /**
482  * @brief encodes all audio ports in the cache to events (silent data)
483  * @param data
484  * @param offset
485  * @param nevents
486  */
487 void
488 AmdtpTransmitStreamProcessor::encodeAudioPortsSilence(quadlet_t *data,
489                                                       unsigned int offset,
490                                                       unsigned int nevents)
491 {
492     unsigned int j;
493     quadlet_t *target_event;
494     int i;
495
496     for (i = 0; i < m_nb_audio_ports; i++) {
497         target_event = (quadlet_t *)(data + i);
498
499         for (j = 0;j < nevents; j += 1)
500         {
501             *target_event = CONDSWAPTOBUS32_CONST(0x40000000);
502             target_event += m_dimension;
503         }
504     }
505 }
506
507 #ifdef __SSE2__
508 //#if 0
509 #include <emmintrin.h>
510 #warning SSE2 build
511
512 /**
513  * @brief mux all audio ports to events
514  * @param data
515  * @param offset
516  * @param nevents
517  */
518 void
519 AmdtpTransmitStreamProcessor::encodeAudioPortsFloat(quadlet_t *data,
520                                                     unsigned int offset,
521                                                     unsigned int nevents)
522 {
523     unsigned int j;
524     quadlet_t *target_event;
525     int i;
526
527     float * client_buffers[4];
528     float tmp_values[4] __attribute__ ((aligned (16)));
529     uint32_t tmp_values_int[4] __attribute__ ((aligned (16)));
530
531     // prepare the scratch buffer
532     assert(m_scratch_buffer_size_bytes > nevents * 4);
533     memset(m_scratch_buffer, 0, nevents * 4);
534
535     const __m128i label = _mm_set_epi32 (0x40000000, 0x40000000, 0x40000000, 0x40000000);
536     const __m128 mult = _mm_set_ps(AMDTP_FLOAT_MULTIPLIER, AMDTP_FLOAT_MULTIPLIER, AMDTP_FLOAT_MULTIPLIER, AMDTP_FLOAT_MULTIPLIER);
537
538 #if AMDTP_CLIP_FLOATS
539     const __m128 v_max = _mm_set_ps(1.0, 1.0, 1.0, 1.0);
540     const __m128 v_min = _mm_set_ps(-1.0, -1.0, -1.0, -1.0);
541 #endif
542
543     // this assumes that audio ports are sorted by position,
544     // and that there are no gaps
545     for (i = 0; i < ((int)m_nb_audio_ports)-4; i += 4) {
546         struct _MBLA_port_cache *p;
547
548         // get the port buffers
549         for (j=0; j<4; j++) {
550             p = &(m_audio_ports.at(i+j));
551             if(p->buffer && p->enabled) {
552                 client_buffers[j] = (float *) p->buffer;
553                 client_buffers[j] += offset;
554             } else {
555                 // if a port is disabled or has no valid
556                 // buffer, use the scratch buffer (all zero's)
557                 client_buffers[j] = (float *) m_scratch_buffer;
558             }
559         }
560
561         // the base event for this position
562         target_event = (quadlet_t *)(data + i);
563
564         // process the events
565         for (j=0;j < nevents; j += 1)
566         {
567             // read the values
568             tmp_values[0] = *(client_buffers[0]);
569             tmp_values[1] = *(client_buffers[1]);
570             tmp_values[2] = *(client_buffers[2]);
571             tmp_values[3] = *(client_buffers[3]);
572
573             // now do the SSE based conversion/labeling
574             __m128 v_float = *((__m128*)tmp_values);
575             __m128i *target = (__m128i*)target_event;
576             __m128i v_int;
577
578             // clip
579 #if AMDTP_CLIP_FLOATS
580             // do SSE clipping
581             v_float = _mm_max_ps(v_float, v_min);
582             v_float = _mm_min_ps(v_float, v_max);
583 #endif
584
585             // multiply
586             v_float = _mm_mul_ps(v_float, mult);
587             // convert to signed integer
588             v_int = _mm_cvttps_epi32( v_float );
589             // shift right 8 bits
590             v_int = _mm_srli_epi32( v_int, 8 );
591             // label it
592             v_int = _mm_or_si128( v_int, label );
593
594             // do endian conversion (SSE is always little endian)
595             // do first swap
596             v_int = _mm_or_si128( _mm_slli_epi16( v_int, 8 ), _mm_srli_epi16( v_int, 8 ) );
597             // do second swap
598             v_int = _mm_or_si128( _mm_slli_epi32( v_int, 16 ), _mm_srli_epi32( v_int, 16 ) );
599
600             // store the packed int
601             // (target misalignment is assumed since we don't know the m_dimension)
602             _mm_storeu_si128 (target, v_int);
603
604             // increment the buffer pointers
605             client_buffers[0]++;
606             client_buffers[1]++;
607             client_buffers[2]++;
608             client_buffers[3]++;
609
610             // go to next target event position
611             target_event += m_dimension;
612         }
613     }
614
615     // do remaining ports
616     // NOTE: these can be time-SSE'd
617     for (; i < (int)m_nb_audio_ports; i++) {
618         struct _MBLA_port_cache &p = m_audio_ports.at(i);
619         target_event = (quadlet_t *)(data + i);
620         assert(nevents + offset <= p.buffer_size );
621
622         if(p.buffer && p.enabled) {
623             float *buffer = (float *)(p.buffer);
624             buffer += offset;
625    
626             for (j = 0;j < nevents; j += 4)
627             {
628                 // read the values
629                 tmp_values[0] = *buffer;
630                 buffer++;
631                 tmp_values[1] = *buffer;
632                 buffer++;
633                 tmp_values[2] = *buffer;
634                 buffer++;
635                 tmp_values[3] = *buffer;
636                 buffer++;
637
638                 // now do the SSE based conversion/labeling
639                 __m128 v_float = *((__m128*)tmp_values);
640                 __m128i v_int;
641
642 #if AMDTP_CLIP_FLOATS
643                 // do SSE clipping
644                 v_float = _mm_max_ps(v_float, v_min);
645                 v_float = _mm_min_ps(v_float, v_max);
646 #endif
647
648                 // multiply
649                 v_float = _mm_mul_ps(v_float, mult);
650                 // convert to signed integer
651                 v_int = _mm_cvttps_epi32( v_float );
652                 // shift right 8 bits
653                 v_int = _mm_srli_epi32( v_int, 8 );
654                 // label it
655                 v_int = _mm_or_si128( v_int, label );
656    
657                 // do endian conversion (SSE is always little endian)
658                 // do first swap
659                 v_int = _mm_or_si128( _mm_slli_epi16( v_int, 8 ), _mm_srli_epi16( v_int, 8 ) );
660                 // do second swap
661                 v_int = _mm_or_si128( _mm_slli_epi32( v_int, 16 ), _mm_srli_epi32( v_int, 16 ) );
662
663                 // store the packed int
664                 _mm_store_si128 ((__m128i *)(&tmp_values_int), v_int);
665
666                 // increment the buffer pointers
667                 *target_event = tmp_values_int[0];
668                 target_event += m_dimension;
669                 *target_event = tmp_values_int[1];
670                 target_event += m_dimension;
671                 *target_event = tmp_values_int[2];
672                 target_event += m_dimension;
673                 *target_event = tmp_values_int[3];
674                 target_event += m_dimension;
675             }
676
677             // do the remainder of the events
678             for(;j < nevents; j += 1) {
679                 float *in = (float *)buffer;
680 #if AMDTP_CLIP_FLOATS
681                 if(*in > 1.0) *in=1.0;
682                 if(*in < -1.0) *in=-1.0;
683 #endif
684                 float v = (*in) * AMDTP_FLOAT_MULTIPLIER;
685                 unsigned int tmp = ((int) v);
686                 tmp = ( tmp >> 8 ) | 0x40000000;
687                 *target_event = CondSwapToBus32((quadlet_t)tmp);
688                 buffer++;
689                 target_event += m_dimension;
690             }
691
692         } else {
693             for (j = 0;j < nevents; j += 1)
694             {
695                 // hardcoded byte swapped
696                 *target_event = 0x00000040;
697                 target_event += m_dimension;
698             }
699         }
700     }
701 }
702
703
704 /**
705  * @brief mux all audio ports to events
706  * @param data
707  * @param offset
708  * @param nevents
709  */
710 void
711 AmdtpTransmitStreamProcessor::encodeAudioPortsInt24(quadlet_t *data,
712                                                     unsigned int offset,
713                                                     unsigned int nevents)
714 {
715     unsigned int j;
716     quadlet_t *target_event;
717     int i;
718
719     uint32_t *client_buffers[4];
720     uint32_t tmp_values[4] __attribute__ ((aligned (16)));
721
722     // prepare the scratch buffer
723     assert(m_scratch_buffer_size_bytes > nevents * 4);
724     memset(m_scratch_buffer, 0, nevents * 4);
725
726     const __m128i label = _mm_set_epi32 (0x40000000, 0x40000000, 0x40000000, 0x40000000);
727     const __m128i mask  = _mm_set_epi32 (0x00FFFFFF, 0x00FFFFFF, 0x00FFFFFF, 0x00FFFFFF);
728
729     // this assumes that audio ports are sorted by position,
730     // and that there are no gaps
731     for (i = 0; i < ((int)m_nb_audio_ports)-4; i += 4) {
732         struct _MBLA_port_cache *p;
733
734         // get the port buffers
735         for (j=0; j<4; j++) {
736             p = &(m_audio_ports.at(i+j));
737             if(p->buffer && p->enabled) {
738                 client_buffers[j] = (uint32_t *) p->buffer;
739                 client_buffers[j] += offset;
740             } else {
741                 // if a port is disabled or has no valid
742                 // buffer, use the scratch buffer (all zero's)
743                 client_buffers[j] = (uint32_t *) m_scratch_buffer;
744             }
745         }
746
747         // the base event for this position
748         target_event = (quadlet_t *)(data + i);
749
750         // process the events
751         for (j=0;j < nevents; j += 1)
752         {
753             // read the values
754             tmp_values[0] = *(client_buffers[0]);
755             tmp_values[1] = *(client_buffers[1]);
756             tmp_values[2] = *(client_buffers[2]);
757             tmp_values[3] = *(client_buffers[3]);
758
759             // now do the SSE based conversion/labeling
760             __m128i *target = (__m128i*)target_event;
761             __m128i v_int = *((__m128i*)tmp_values);;
762
763             // mask
764             v_int = _mm_and_si128( v_int, mask );
765             // label it
766             v_int = _mm_or_si128( v_int, label );
767
768             // do endian conversion (SSE is always little endian)
769             // do first swap
770             v_int = _mm_or_si128( _mm_slli_epi16( v_int, 8 ), _mm_srli_epi16( v_int, 8 ) );
771             // do second swap
772             v_int = _mm_or_si128( _mm_slli_epi32( v_int, 16 ), _mm_srli_epi32( v_int, 16 ) );
773
774             // store the packed int
775             // (target misalignment is assumed since we don't know the m_dimension)
776             _mm_storeu_si128 (target, v_int);
777
778             // increment the buffer pointers
779             client_buffers[0]++;
780             client_buffers[1]++;
781             client_buffers[2]++;
782             client_buffers[3]++;
783
784             // go to next target event position
785             target_event += m_dimension;
786         }
787     }
788
789     // do remaining ports
790     // NOTE: these can be time-SSE'd
791     for (; i < ((int)m_nb_audio_ports); i++) {
792         struct _MBLA_port_cache &p = m_audio_ports.at(i);
793         target_event = (quadlet_t *)(data + i);
794         assert(nevents + offset <= p.buffer_size );
795
796         if(p.buffer && p.enabled) {
797             uint32_t *buffer = (uint32_t *)(p.buffer);
798             buffer += offset;
799    
800             for (j = 0;j < nevents; j += 4)
801             {
802                 // read the values
803                 tmp_values[0] = *buffer;
804                 buffer++;
805                 tmp_values[1] = *buffer;
806                 buffer++;
807                 tmp_values[2] = *buffer;
808                 buffer++;
809                 tmp_values[3] = *buffer;
810                 buffer++;
811
812                 // now do the SSE based conversion/labeling
813                 __m128i v_int = *((__m128i*)tmp_values);;
814
815                 // mask
816                 v_int = _mm_and_si128( v_int, mask );
817                 // label it
818                 v_int = _mm_or_si128( v_int, label );
819
820                 // do endian conversion (SSE is always little endian)
821                 // do first swap
822                 v_int = _mm_or_si128( _mm_slli_epi16( v_int, 8 ), _mm_srli_epi16( v_int, 8 ) );
823                 // do second swap
824                 v_int = _mm_or_si128( _mm_slli_epi32( v_int, 16 ), _mm_srli_epi32( v_int, 16 ) );
825
826                 // store the packed int
827                 _mm_store_si128 ((__m128i *)(&tmp_values), v_int);
828
829                 // increment the buffer pointers
830                 *target_event = tmp_values[0];
831                 target_event += m_dimension;
832                 *target_event = tmp_values[1];
833                 target_event += m_dimension;
834                 *target_event = tmp_values[2];
835                 target_event += m_dimension;
836                 *target_event = tmp_values[3];
837                 target_event += m_dimension;
838             }
839
840             // do the remainder of the events
841             for(;j < nevents; j += 1) {
842                 uint32_t in = (uint32_t)(*buffer);
843                 *target_event = CondSwapToBus32((quadlet_t)((in & 0x00FFFFFF) | 0x40000000));
844                 buffer++;
845                 target_event += m_dimension;
846             }
847
848         } else {
849             for (j = 0;j < nevents; j += 1)
850             {
851                 // hardcoded byte swapped
852                 *target_event = 0x00000040;
853                 target_event += m_dimension;
854             }
855         }
856     }
857 }
858
859 #else
860
861 /**
862  * @brief mux all audio ports to events
863  * @param data
864  * @param offset
865  * @param nevents
866  */
867 void
868 AmdtpTransmitStreamProcessor::encodeAudioPortsInt24(quadlet_t *data,
869                                                     unsigned int offset,
870                                                     unsigned int nevents)
871 {
872     unsigned int j;
873     quadlet_t *target_event;
874     int i;
875
876     for (i = 0; i < m_nb_audio_ports; i++) {
877         struct _MBLA_port_cache &p = m_audio_ports.at(i);
878         target_event = (quadlet_t *)(data + i);
879         assert(nevents + offset <= p.buffer_size );
880
881         if(p.buffer && p.enabled) {
882             quadlet_t *buffer = (quadlet_t *)(p.buffer);
883             buffer += offset;
884    
885             for (j = 0;j < nevents; j += 1)
886             {
887                 uint32_t in = (uint32_t)(*buffer);
888                 *target_event = CondSwapToBus32((quadlet_t)((in & 0x00FFFFFF) | 0x40000000));
889                 buffer++;
890                 target_event += m_dimension;
891             }
892         } else {
893             for (j = 0;j < nevents; j += 1)
894             {
895                 *target_event = CONDSWAPTOBUS32_CONST(0x40000000);
896                 target_event += m_dimension;
897             }
898         }
899     }
900 }
901
902 /**
903  * @brief mux all audio ports to events
904  * @param data
905  * @param offset
906  * @param nevents
907  */
908 void
909 AmdtpTransmitStreamProcessor::encodeAudioPortsFloat(quadlet_t *data,
910                                                     unsigned int offset,
911                                                     unsigned int nevents)
912 {
913     unsigned int j;
914     quadlet_t *target_event;
915     int i;
916
917     for (i = 0; i < m_nb_audio_ports; i++) {
918         struct _MBLA_port_cache &p = m_audio_ports.at(i);
919         target_event = (quadlet_t *)(data + i);
920         assert(nevents + offset <= p.buffer_size );
921
922         if(p.buffer && p.enabled) {
923             quadlet_t *buffer = (quadlet_t *)(p.buffer);
924             buffer += offset;
925    
926             for (j = 0;j < nevents; j += 1)
927             {
928                 float *in = (float *)buffer;
929 #if AMDTP_CLIP_FLOATS
930                 if(*in > 1.0) *in=1.0;
931                 if(*in < -1.0) *in=-1.0;
932 #endif
933                 float v = (*in) * AMDTP_FLOAT_MULTIPLIER;
934                 unsigned int tmp = ((int) lrintf(v));
935
936                 tmp = ( tmp >> 8 ) | 0x40000000;
937                 *target_event = CondSwapToBus32((quadlet_t)tmp);
938                 buffer++;
939                 target_event += m_dimension;
940             }
941         } else {
942             for (j = 0;j < nevents; j += 1)
943             {
944                 *target_event = CONDSWAPTOBUS32_CONST(0x40000000);
945                 target_event += m_dimension;
946             }
947         }
948     }
949 }
950 #endif
951
952 /**
953  * @brief encodes all midi ports in the cache to events (silence)
954  * @param data
955  * @param offset
956  * @param nevents
957  */
958 void
959 AmdtpTransmitStreamProcessor::encodeMidiPortsSilence(quadlet_t *data,
960                                                      unsigned int offset,
961                                                      unsigned int nevents)
962 {
963     quadlet_t *target_event;
964     int i;
965     unsigned int j;
966
967     for (i = 0; i < m_nb_midi_ports; i++) {
968         struct _MIDI_port_cache &p = m_midi_ports.at(i);
969
970         for (j = p.location;j < nevents; j += 8) {
971             target_event = (quadlet_t *) (data + ((j * m_dimension) + p.position));
972             *target_event = CondSwapToBus32(IEC61883_AM824_SET_LABEL(0, IEC61883_AM824_LABEL_MIDI_NO_DATA));
973         }
974     }
975 }
976
977 /**
978  * @brief encodes all midi ports in the cache to events
979  * @param data
980  * @param offset
981  * @param nevents
982  */
983 void
984 AmdtpTransmitStreamProcessor::encodeMidiPorts(quadlet_t *data,
985                                               unsigned int offset,
986                                               unsigned int nevents)
987 {
988     quadlet_t *target_event;
989     int i;
990     unsigned int j;
991
992     for (i = 0; i < m_nb_midi_ports; i++) {
993         struct _MIDI_port_cache &p = m_midi_ports.at(i);
994         if (p.buffer && p.enabled) {
995             uint32_t *buffer = (quadlet_t *)(p.buffer);
996             buffer += offset;
997
998             for (j = p.location;j < nevents; j += 8) {
999                 target_event = (quadlet_t *) (data + ((j * m_dimension) + p.position));
1000
1001                 if ( *buffer & 0xFF000000 )   // we can send a byte
1002                 {
1003                     quadlet_t tmpval;
1004                     tmpval = ((*buffer)<<16) & 0x00FF0000;
1005                     tmpval = IEC61883_AM824_SET_LABEL(tmpval, IEC61883_AM824_LABEL_MIDI_1X);
1006                     *target_event = CondSwapToBus32(tmpval);
1007
1008 //                     debugOutput ( DEBUG_LEVEL_VERBOSE, "MIDI port %s, pos=%u, loc=%u, nevents=%u, dim=%d\n",
1009 //                                p.port->getName().c_str(), p.position, p.location, nevents, m_dimension );
1010 //                     debugOutput ( DEBUG_LEVEL_VERBOSE, "base=%p, target=%p, value=%08X\n",
1011 //                                data, target_event, tmpval );
1012                 } else {
1013                     // can't send a byte, either because there is no byte,
1014                     // or because this would exceed the maximum rate
1015                     // FIXME: this can be ifdef optimized since it's a constant
1016                     *target_event = CondSwapToBus32(IEC61883_AM824_SET_LABEL(0, IEC61883_AM824_LABEL_MIDI_NO_DATA));
1017                 }
1018                 buffer+=8;
1019             }
1020         } else {
1021             for (j = p.location;j < nevents; j += 8) {
1022                 target_event = (quadlet_t *)(data + ((j * m_dimension) + p.position));
1023                 __builtin_prefetch(target_event, 1, 0); // prefetch events for write, no temporal locality
1024                 *target_event = CondSwapToBus32(IEC61883_AM824_SET_LABEL(0, IEC61883_AM824_LABEL_MIDI_NO_DATA));
1025             }
1026         }
1027     }
1028 }
1029
1030 bool
1031 AmdtpTransmitStreamProcessor::initPortCache() {
1032     // make use of the fact that audio ports are the first ports in
1033     // the cluster as per AMDTP. so we can sort the ports by position
1034     // and have very efficient lookups:
1035     // m_float_ports.at(i).buffer -> audio stream i buffer
1036     // for midi ports we simply cache all port info since they are (usually) not
1037     // that numerous
1038     m_nb_audio_ports = 0;
1039     m_audio_ports.clear();
1040    
1041     m_nb_midi_ports = 0;
1042     m_midi_ports.clear();
1043    
1044     for(PortVectorIterator it = m_Ports.begin();
1045         it != m_Ports.end();
1046         ++it )
1047     {
1048         AmdtpPortInfo *pinfo=dynamic_cast<AmdtpPortInfo *>(*it);
1049         assert(pinfo); // this should not fail!!
1050
1051         switch( pinfo->getFormat() )
1052         {
1053             case AmdtpPortInfo::E_MBLA:
1054                 m_nb_audio_ports++;
1055                 break;
1056             case AmdtpPortInfo::E_SPDIF: // still unimplemented
1057                 break;
1058             case AmdtpPortInfo::E_Midi:
1059                 m_nb_midi_ports++;
1060                 break;
1061             default: // ignore
1062                 break;
1063         }
1064     }
1065
1066     int idx;
1067     for (idx = 0; idx < m_nb_audio_ports; idx++) {
1068         for(PortVectorIterator it = m_Ports.begin();
1069             it != m_Ports.end();
1070             ++it )
1071         {
1072             AmdtpPortInfo *pinfo=dynamic_cast<AmdtpPortInfo *>(*it);
1073             debugOutput(DEBUG_LEVEL_VERY_VERBOSE,
1074                         "idx %u: looking at port %s at position %u\n",
1075                         idx, (*it)->getName().c_str(), pinfo->getPosition());
1076             if(pinfo->getPosition() == (unsigned int)idx) {
1077                 struct _MBLA_port_cache p;
1078                 p.port = dynamic_cast<AmdtpAudioPort *>(*it);
1079                 if(p.port == NULL) {
1080                     debugError("Port is not an AmdtpAudioPort!\n");
1081                     return false;
1082                 }
1083                 p.buffer = NULL; // to be filled by updatePortCache
1084                 #ifdef DEBUG
1085                 p.buffer_size = (*it)->getBufferSize();
1086                 #endif
1087
1088                 m_audio_ports.push_back(p);
1089                 debugOutput(DEBUG_LEVEL_VERBOSE,
1090                             "Cached port %s at position %u\n",
1091                             p.port->getName().c_str(), idx);
1092                 goto next_index;
1093             }
1094         }
1095         debugError("No MBLA port found for position %d\n", idx);
1096         return false;
1097 next_index:
1098         continue;
1099     }
1100
1101     for(PortVectorIterator it = m_Ports.begin();
1102         it != m_Ports.end();
1103         ++it )
1104     {
1105         AmdtpPortInfo *pinfo=dynamic_cast<AmdtpPortInfo *>(*it);
1106         debugOutput(DEBUG_LEVEL_VERY_VERBOSE,
1107                     "idx %u: looking at port %s at position %u, location %u\n",
1108                     idx, (*it)->getName().c_str(), pinfo->getPosition(), pinfo->getLocation());
1109         if ((*it)->getPortType() == Port::E_Midi) {
1110             struct _MIDI_port_cache p;
1111             p.port = dynamic_cast<AmdtpMidiPort *>(*it);
1112             if(p.port == NULL) {
1113                 debugError("Port is not an AmdtpMidiPort!\n");
1114                 return false;
1115             }
1116             p.position = pinfo->getPosition();
1117             p.location = pinfo->getLocation();
1118             p.buffer = NULL; // to be filled by updatePortCache
1119             #ifdef DEBUG
1120             p.buffer_size = (*it)->getBufferSize();
1121             #endif
1122
1123             m_midi_ports.push_back(p);
1124             debugOutput(DEBUG_LEVEL_VERBOSE,
1125                         "Cached port %s at position %u, location %u\n",
1126                         p.port->getName().c_str(), p.position, p.location);
1127         }
1128     }
1129
1130     return true;
1131 }
1132
1133 void
1134 AmdtpTransmitStreamProcessor::updatePortCache() {
1135     int idx;
1136     for (idx = 0; idx < m_nb_audio_ports; idx++) {
1137         struct _MBLA_port_cache& p = m_audio_ports.at(idx);
1138         AmdtpAudioPort *port = p.port;
1139         p.buffer = port->getBufferAddress();
1140         p.enabled = !port->isDisabled();
1141     }
1142     for (idx = 0; idx < m_nb_midi_ports; idx++) {
1143         struct _MIDI_port_cache& p = m_midi_ports.at(idx);
1144         AmdtpMidiPort *port = p.port;
1145         p.buffer = port->getBufferAddress();
1146         p.enabled = !port->isDisabled();
1147     }
1148 }
1149
1150 } // end of namespace Streaming
Note: See TracBrowser for help on using the browser.