root/trunk/libffado/src/libstreaming/amdtp/AmdtpTransmitStreamProcessor.cpp

Revision 857, 38.6 kB (checked in by ppalmers, 15 years ago)

Introduce distinction between empty and silent packets. When shutting down a transmit SP, we now send silent (= valid, but all audio muted) packets instead of empty (w/o
payload) packets for a while. This should solve the shutdown issue with the motu's.

Line 
1 /*
2  * Copyright (C) 2005-2007 by Pieter Palmers
3  *
4  * This file is part of FFADO
5  * FFADO = Free Firewire (pro-)audio drivers for linux
6  *
7  * FFADO is based upon FreeBoB.
8  *
9  * This program is free software: you can redistribute it and/or modify
10  * it under the terms of the GNU General Public License as published by
11  * the Free Software Foundation, either version 3 of the License, or
12  * (at your option) any later version.
13  *
14  * This program is distributed in the hope that it will be useful,
15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17  * GNU General Public License for more details.
18  *
19  * You should have received a copy of the GNU General Public License
20  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
21  *
22  */
23
24 #include "config.h"
25 #include "AmdtpTransmitStreamProcessor.h"
26 #include "AmdtpPort.h"
27 #include "../StreamProcessorManager.h"
28 #include "devicemanager.h"
29
30 #include "libutil/Time.h"
31
32 #include "libieee1394/ieee1394service.h"
33 #include "libieee1394/IsoHandlerManager.h"
34 #include "libieee1394/cycletimer.h"
35
36 #include <netinet/in.h>
37 #include <assert.h>
38
39 #define AMDTP_FLOAT_MULTIPLIER 2147483392.0
40
41 namespace Streaming
42 {
43
44 /* transmit */
45 AmdtpTransmitStreamProcessor::AmdtpTransmitStreamProcessor(FFADODevice &parent, int dimension)
46         : StreamProcessor(parent, ePT_Transmit)
47         , m_dimension( dimension )
48         , m_dbc( 0 )
49         , m_nb_audio_ports( 0 )
50         , m_nb_midi_ports( 0 )
51 {}
52
53 enum StreamProcessor::eChildReturnValue
54 AmdtpTransmitStreamProcessor::generatePacketHeader (
55     unsigned char *data, unsigned int *length,
56     unsigned char *tag, unsigned char *sy,
57     int cycle, unsigned int dropped, unsigned int max_length )
58 {
59     __builtin_prefetch(data, 1, 0); // prefetch events for write, no temporal locality
60     struct iec61883_packet *packet = (struct iec61883_packet *)data;
61     /* Our node ID can change after a bus reset, so it is best to fetch
62     * our node ID for each packet. */
63     packet->sid = m_local_node_id;
64
65     packet->dbs = m_dimension;
66     packet->fn = 0;
67     packet->qpc = 0;
68     packet->sph = 0;
69     packet->reserved = 0;
70     packet->dbc = m_dbc;
71     packet->eoh1 = 2;
72     packet->fmt = IEC61883_FMT_AMDTP;
73
74     *tag = IEC61883_TAG_WITH_CIP;
75     *sy = 0;
76
77     signed int fc;
78     uint64_t presentation_time;
79     unsigned int presentation_cycle;
80     int cycles_until_presentation;
81
82     uint64_t transmit_at_time;
83     unsigned int transmit_at_cycle;
84     int cycles_until_transmit;
85
86     debugOutput ( DEBUG_LEVEL_ULTRA_VERBOSE, "Try for cycle %d\n", cycle );
87     // check whether the packet buffer has packets for us to send.
88     // the base timestamp is the one of the next sample in the buffer
89     ffado_timestamp_t ts_head_tmp;
90     m_data_buffer->getBufferHeadTimestamp ( &ts_head_tmp, &fc ); // thread safe
91
92     // the timestamp gives us the time at which we want the sample block
93     // to be output by the device
94     presentation_time = ( uint64_t ) ts_head_tmp;
95     m_last_timestamp = presentation_time;
96
97     // now we calculate the time when we have to transmit the sample block
98     transmit_at_time = substractTicks ( presentation_time, AMDTP_TRANSMIT_TRANSFER_DELAY );
99
100     // calculate the cycle this block should be presented in
101     // (this is just a virtual calculation since at that time it should
102     //  already be in the device's buffer)
103     presentation_cycle = ( unsigned int ) ( TICKS_TO_CYCLES ( presentation_time ) );
104
105     // calculate the cycle this block should be transmitted in
106     transmit_at_cycle = ( unsigned int ) ( TICKS_TO_CYCLES ( transmit_at_time ) );
107
108     // we can check whether this cycle is within the 'window' we have
109     // to send this packet.
110     // first calculate the number of cycles left before presentation time
111     cycles_until_presentation = diffCycles ( presentation_cycle, cycle );
112
113     // we can check whether this cycle is within the 'window' we have
114     // to send this packet.
115     // first calculate the number of cycles left before presentation time
116     cycles_until_transmit = diffCycles ( transmit_at_cycle, cycle );
117
118     if (dropped) {
119         debugOutput ( DEBUG_LEVEL_VERBOSE,
120                     "Gen HDR: CY=%04u, TC=%04u, CUT=%04d, TST=%011llu (%04u), TSP=%011llu (%04u)\n",
121                     cycle,
122                     transmit_at_cycle, cycles_until_transmit,
123                     transmit_at_time, ( unsigned int ) TICKS_TO_CYCLES ( transmit_at_time ),
124                     presentation_time, ( unsigned int ) TICKS_TO_CYCLES ( presentation_time ) );
125     }
126     // two different options:
127     // 1) there are not enough frames for one packet
128     //      => determine wether this is a problem, since we might still
129     //         have some time to send it
130     // 2) there are enough packets
131     //      => determine whether we have to send them in this packet
132     if ( fc < ( signed int ) m_syt_interval )
133     {
134         // not enough frames in the buffer,
135
136         // we can still postpone the queueing of the packets
137         // if we are far enough ahead of the presentation time
138         if ( cycles_until_presentation <= AMDTP_MIN_CYCLES_BEFORE_PRESENTATION )
139         {
140             debugOutput ( DEBUG_LEVEL_VERBOSE,
141                         "Insufficient frames (P): N=%02d, CY=%04u, TC=%04u, CUT=%04d\n",
142                         fc, cycle, transmit_at_cycle, cycles_until_transmit );
143             // we are too late
144             return eCRV_XRun;
145         }
146         else
147         {
148             unsigned int now_cycle = ( unsigned int ) ( TICKS_TO_CYCLES ( m_1394service.getCycleTimerTicks() ) );
149
150             debugOutput ( DEBUG_LEVEL_VERBOSE,
151                         "Insufficient frames (NP): N=%02d, CY=%04u, TC=%04u, CUT=%04d, NOW=%04d\n",
152                         fc, cycle, transmit_at_cycle, cycles_until_transmit, now_cycle );
153             debugWarning("Insufficient frames (NP): N=%02d, CY=%04u, TC=%04u, CUT=%04d, NOW=%04d\n",
154                          fc, cycle, transmit_at_cycle, cycles_until_transmit, now_cycle );
155
156             // there is still time left to send the packet
157             // we want the system to give this packet another go at a later time instant
158             return eCRV_Again; // note that the raw1394 again system doesn't work as expected
159
160             // we could wait here for a certain time before trying again. However, this
161             // is not going to work since we then block the iterator thread, hence also
162             // the receiving code, meaning that we are not processing received packets,
163             // and hence there is no progression in the number of frames available.
164
165             // for example:
166             // SleepRelativeUsec(125); // one cycle
167             // goto try_block_of_frames;
168
169             // or more advanced, calculate how many cycles we are ahead of 'now' and
170             // base the sleep on that.
171
172             // note that this requires that there is one thread for each IsoHandler,
173             // otherwise we're in the deadlock described above.
174         }
175     }
176     else
177     {
178         // there are enough frames, so check the time they are intended for
179         // all frames have a certain 'time window' in which they can be sent
180         // this corresponds to the range of the timestamp mechanism:
181         // we can send a packet 15 cycles in advance of the 'presentation time'
182         // in theory we can send the packet up till one cycle before the presentation time,
183         // however this is not very smart.
184
185         // There are 3 options:
186         // 1) the frame block is too early
187         //      => send an empty packet
188         // 2) the frame block is within the window
189         //      => send it
190         // 3) the frame block is too late
191         //      => discard (and raise xrun?)
192         //         get next block of frames and repeat
193
194         if(cycles_until_transmit < 0)
195         {
196             // we are too late
197             debugOutput(DEBUG_LEVEL_VERBOSE,
198                         "Too late: CY=%04u, TC=%04u, CUT=%04d, TSP=%011llu (%04u)\n",
199                         cycle,
200                         transmit_at_cycle, cycles_until_transmit,
201                         presentation_time, (unsigned int)TICKS_TO_CYCLES(presentation_time) );
202             //debugShowBackLogLines(200);
203 //             // however, if we can send this sufficiently before the presentation
204 //             // time, it could be harmless.
205 //             // NOTE: dangerous since the device has no way of reporting that it didn't get
206 //             //       this packet on time.
207 //             if(cycles_until_presentation >= AMDTP_MIN_CYCLES_BEFORE_PRESENTATION)
208 //             {
209 //                 // we are not that late and can still try to transmit the packet
210 //                 m_dbc += fillDataPacketHeader(packet, length, m_last_timestamp);
211 //                 return (fc < (signed)(2*m_syt_interval) ? eCRV_Defer : eCRV_Packet);
212 //             }
213 //             else   // definitely too late
214 //             {
215                 return eCRV_XRun;
216 //             }
217         }
218         else if(cycles_until_transmit <= AMDTP_MAX_CYCLES_TO_TRANSMIT_EARLY)
219         {
220             // it's time send the packet
221             m_dbc += fillDataPacketHeader(packet, length, m_last_timestamp);
222             return (fc < (signed)(2*m_syt_interval) ? eCRV_Defer : eCRV_Packet);
223         }
224         else
225         {
226             debugOutput ( DEBUG_LEVEL_VERY_VERBOSE,
227                         "Too early: CY=%04u, TC=%04u, CUT=%04d, TST=%011llu (%04u), TSP=%011llu (%04u)\n",
228                         cycle,
229                         transmit_at_cycle, cycles_until_transmit,
230                         transmit_at_time, ( unsigned int ) TICKS_TO_CYCLES ( transmit_at_time ),
231                         presentation_time, ( unsigned int ) TICKS_TO_CYCLES ( presentation_time ) );
232 #ifdef DEBUG
233             if ( cycles_until_transmit > AMDTP_MAX_CYCLES_TO_TRANSMIT_EARLY + 1 )
234             {
235                 debugOutput ( DEBUG_LEVEL_VERY_VERBOSE,
236                             "Way too early: CY=%04u, TC=%04u, CUT=%04d, TST=%011llu (%04u), TSP=%011llu (%04u)\n",
237                             cycle,
238                             transmit_at_cycle, cycles_until_transmit,
239                             transmit_at_time, ( unsigned int ) TICKS_TO_CYCLES ( transmit_at_time ),
240                             presentation_time, ( unsigned int ) TICKS_TO_CYCLES ( presentation_time ) );
241             }
242 #endif
243             // we are too early, send only an empty packet
244             return eCRV_EmptyPacket;
245         }
246     }
247     return eCRV_Invalid;
248 }
249
250 enum StreamProcessor::eChildReturnValue
251 AmdtpTransmitStreamProcessor::generatePacketData (
252     unsigned char *data, unsigned int *length,
253     unsigned char *tag, unsigned char *sy,
254     int cycle, unsigned int dropped, unsigned int max_length )
255 {
256     if ( m_data_buffer->readFrames ( m_syt_interval, ( char * ) ( data + 8 ) ) )
257     {
258         debugOutput ( DEBUG_LEVEL_ULTRA_VERBOSE, "XMIT DATA (cy %04d): TSP=%011llu (%04u)\n",
259                     cycle, m_last_timestamp, ( unsigned int ) TICKS_TO_CYCLES ( m_last_timestamp ) );
260         return eCRV_OK;
261     }
262     else return eCRV_XRun;
263
264 }
265
266 enum StreamProcessor::eChildReturnValue
267 AmdtpTransmitStreamProcessor::generateSilentPacketHeader (
268     unsigned char *data, unsigned int *length,
269     unsigned char *tag, unsigned char *sy,
270     int cycle, unsigned int dropped, unsigned int max_length )
271 {
272     struct iec61883_packet *packet = ( struct iec61883_packet * ) data;
273     debugOutput ( DEBUG_LEVEL_ULTRA_VERBOSE, "XMIT SILENT (cy %04d): CY=%04u, TSP=%011llu (%04u)\n",
274                 cycle, m_last_timestamp, ( unsigned int ) TICKS_TO_CYCLES ( m_last_timestamp ) );
275
276     packet->sid = m_local_node_id;
277
278     packet->dbs = m_dimension;
279     packet->fn = 0;
280     packet->qpc = 0;
281     packet->sph = 0;
282     packet->reserved = 0;
283     packet->dbc = m_dbc;
284     packet->eoh1 = 2;
285     packet->fmt = IEC61883_FMT_AMDTP;
286
287     *tag = IEC61883_TAG_WITH_CIP;
288     *sy = 0;
289
290     m_dbc += fillNoDataPacketHeader ( packet, length );
291     return eCRV_Packet;
292 }
293
294 enum StreamProcessor::eChildReturnValue
295 AmdtpTransmitStreamProcessor::generateSilentPacketData (
296     unsigned char *data, unsigned int *length,
297     unsigned char *tag, unsigned char *sy,
298     int cycle, unsigned int dropped, unsigned int max_length )
299 {
300     return eCRV_OK; // no need to do anything
301 }
302
303 enum StreamProcessor::eChildReturnValue
304 AmdtpTransmitStreamProcessor::generateEmptyPacketHeader (
305     unsigned char *data, unsigned int *length,
306     unsigned char *tag, unsigned char *sy,
307     int cycle, unsigned int dropped, unsigned int max_length )
308 {
309     struct iec61883_packet *packet = ( struct iec61883_packet * ) data;
310     debugOutput ( DEBUG_LEVEL_ULTRA_VERBOSE, "XMIT EMPTY (cy %04d): CY=%04u, TSP=%011llu (%04u)\n",
311                 cycle, m_last_timestamp, ( unsigned int ) TICKS_TO_CYCLES ( m_last_timestamp ) );
312     packet->sid = m_local_node_id;
313
314     packet->dbs = m_dimension;
315     packet->fn = 0;
316     packet->qpc = 0;
317     packet->sph = 0;
318     packet->reserved = 0;
319     packet->dbc = m_dbc;
320     packet->eoh1 = 2;
321     packet->fmt = IEC61883_FMT_AMDTP;
322
323     *tag = IEC61883_TAG_WITH_CIP;
324     *sy = 0;
325
326     m_dbc += fillNoDataPacketHeader ( packet, length );
327     return eCRV_OK;
328 }
329
330 enum StreamProcessor::eChildReturnValue
331 AmdtpTransmitStreamProcessor::generateEmptyPacketData (
332     unsigned char *data, unsigned int *length,
333     unsigned char *tag, unsigned char *sy,
334     int cycle, unsigned int dropped, unsigned int max_length )
335 {
336     return eCRV_OK; // no need to do anything
337 }
338
339 unsigned int AmdtpTransmitStreamProcessor::fillDataPacketHeader (
340     struct iec61883_packet *packet, unsigned int* length,
341     uint32_t ts )
342 {
343
344     packet->fdf = m_fdf;
345
346     // convert the timestamp to SYT format
347     uint16_t timestamp_SYT = TICKS_TO_SYT ( ts );
348     packet->syt = ntohs ( timestamp_SYT );
349
350     *length = m_syt_interval*sizeof ( quadlet_t ) *m_dimension + 8;
351
352     return m_syt_interval;
353 }
354
355 unsigned int AmdtpTransmitStreamProcessor::fillNoDataPacketHeader (
356     struct iec61883_packet *packet, unsigned int* length )
357 {
358
359     // no-data packets have syt=0xFFFF
360     // and have the usual amount of events as dummy data (?)
361     packet->fdf = IEC61883_FDF_NODATA;
362     packet->syt = 0xffff;
363
364     // FIXME: either make this a setting or choose
365     bool send_payload=true;
366     if ( send_payload )
367     {
368         // this means no-data packets with payload (DICE doesn't like that)
369         *length = 2*sizeof ( quadlet_t ) + m_syt_interval * m_dimension * sizeof ( quadlet_t );
370         return m_syt_interval;
371     }
372     else
373     {
374         // dbc is not incremented
375         // this means no-data packets without payload
376         *length = 2*sizeof ( quadlet_t );
377         return 0;
378     }
379 }
380
381 unsigned int
382 AmdtpTransmitStreamProcessor::getSytInterval() {
383     switch (m_StreamProcessorManager.getNominalRate()) {
384         case 32000:
385         case 44100:
386         case 48000:
387             return 8;
388         case 88200:
389         case 96000:
390             return 16;
391         case 176400:
392         case 192000:
393             return 32;
394         default:
395             debugError("Unsupported rate: %d\n", m_StreamProcessorManager.getNominalRate());
396             return 0;
397     }
398 }
399 unsigned int
400 AmdtpTransmitStreamProcessor::getFDF() {
401     switch (m_StreamProcessorManager.getNominalRate()) {
402         case 32000: return IEC61883_FDF_SFC_32KHZ;
403         case 44100: return IEC61883_FDF_SFC_44K1HZ;
404         case 48000: return IEC61883_FDF_SFC_48KHZ;
405         case 88200: return IEC61883_FDF_SFC_88K2HZ;
406         case 96000: return IEC61883_FDF_SFC_96KHZ;
407         case 176400: return IEC61883_FDF_SFC_176K4HZ;
408         case 192000: return IEC61883_FDF_SFC_192KHZ;
409         default:
410             debugError("Unsupported rate: %d\n", m_StreamProcessorManager.getNominalRate());
411             return 0;
412     }
413 }
414
415 bool AmdtpTransmitStreamProcessor::prepareChild()
416 {
417     debugOutput ( DEBUG_LEVEL_VERBOSE, "Preparing (%p)...\n", this );
418     m_syt_interval = getSytInterval();
419     m_fdf = getFDF();
420
421     iec61883_cip_init (
422         &m_cip_status,
423         IEC61883_FMT_AMDTP,
424         m_fdf,
425         m_StreamProcessorManager.getNominalRate(),
426         m_dimension,
427         m_syt_interval );
428
429     if (!initPortCache()) {
430         debugError("Could not init port cache\n");
431         return false;
432     }
433
434     return true;
435 }
436
437 /*
438 * compose the event streams for the packets from the port buffers
439 */
440 bool AmdtpTransmitStreamProcessor::processWriteBlock ( char *data,
441         unsigned int nevents, unsigned int offset )
442 {
443     // update the variable parts of the cache
444     updatePortCache();
445
446     // encode audio data
447     switch(m_StreamProcessorManager.getAudioDataType()) {
448         case StreamProcessorManager::eADT_Int24:
449             encodeAudioPortsInt24((quadlet_t *)data, offset, nevents);
450             break;
451         case StreamProcessorManager::eADT_Float:
452             encodeAudioPortsFloat((quadlet_t *)data, offset, nevents);
453             break;
454     }
455
456     // do midi ports
457     encodeMidiPorts((quadlet_t *)data, offset, nevents);
458     return true;
459 }
460
461 bool
462 AmdtpTransmitStreamProcessor::transmitSilenceBlock(
463     char *data, unsigned int nevents, unsigned int offset)
464 {
465     // no need to update the port cache when transmitting silence since
466     // no dynamic values are used to do so.
467     encodeAudioPortsSilence((quadlet_t *)data, offset, nevents);
468     encodeMidiPortsSilence((quadlet_t *)data, offset, nevents);
469     return true;
470 }
471
472 /**
473  * @brief encodes all audio ports in the cache to events (silent data)
474  * @param data
475  * @param offset
476  * @param nevents
477  */
478 void
479 AmdtpTransmitStreamProcessor::encodeAudioPortsSilence(quadlet_t *data,
480                                                       unsigned int offset,
481                                                       unsigned int nevents)
482 {
483     unsigned int j;
484     quadlet_t *target_event;
485     unsigned int i;
486
487     for (i = 0; i < m_nb_audio_ports; i++) {
488         target_event = (quadlet_t *)(data + i);
489
490         for (j = 0;j < nevents; j += 1)
491         {
492             *target_event = 0x00000040;
493             target_event += m_dimension;
494         }
495     }
496 }
497
498 #ifdef __SSE2__
499 //#if 0
500 #include <emmintrin.h>
501 #warning SSE2 build
502
503 /**
504  * @brief mux all audio ports to events
505  * @param data
506  * @param offset
507  * @param nevents
508  */
509 void
510 AmdtpTransmitStreamProcessor::encodeAudioPortsFloat(quadlet_t *data,
511                                                     unsigned int offset,
512                                                     unsigned int nevents)
513 {
514     unsigned int j;
515     quadlet_t *target_event;
516     unsigned int i;
517
518     float * client_buffers[4];
519     float tmp_values[4] __attribute__ ((aligned (16)));
520     uint32_t tmp_values_int[4] __attribute__ ((aligned (16)));
521
522     // prepare the scratch buffer
523     assert(m_scratch_buffer_size_bytes > nevents * 4);
524     memset(m_scratch_buffer, 0, nevents * 4);
525
526     const __m128i label = _mm_set_epi32 (0x40000000, 0x40000000, 0x40000000, 0x40000000);
527     const __m128 mult = _mm_set_ps(AMDTP_FLOAT_MULTIPLIER, AMDTP_FLOAT_MULTIPLIER, AMDTP_FLOAT_MULTIPLIER, AMDTP_FLOAT_MULTIPLIER);
528
529     // this assumes that audio ports are sorted by position,
530     // and that there are no gaps
531     for (i = 0; i < m_nb_audio_ports-4; i += 4) {
532         struct _MBLA_port_cache *p;
533
534         // get the port buffers
535         for (j=0; j<4; j++) {
536             p = &(m_audio_ports.at(i+j));
537             if(p->buffer && p->enabled) {
538                 client_buffers[j] = (float *) p->buffer;
539                 client_buffers[j] += offset;
540             } else {
541                 // if a port is disabled or has no valid
542                 // buffer, use the scratch buffer (all zero's)
543                 client_buffers[j] = (float *) m_scratch_buffer;
544             }
545         }
546
547         // the base event for this position
548         target_event = (quadlet_t *)(data + i);
549
550         // process the events
551         for (j=0;j < nevents; j += 1)
552         {
553             // read the values
554             tmp_values[0] = *(client_buffers[0]);
555             tmp_values[1] = *(client_buffers[1]);
556             tmp_values[2] = *(client_buffers[2]);
557             tmp_values[3] = *(client_buffers[3]);
558
559             // now do the SSE based conversion/labeling
560             __m128 v_float = *((__m128*)tmp_values);
561             __m128i *target = (__m128i*)target_event;
562             __m128i v_int;
563
564             // multiply
565             v_float = _mm_mul_ps(v_float, mult);
566             // convert to signed integer
567             v_int = _mm_cvttps_epi32( v_float );
568             // shift right 8 bits
569             v_int = _mm_srli_epi32( v_int, 8 );
570             // label it
571             v_int = _mm_or_si128( v_int, label );
572
573             // do endian conversion (SSE is always little endian)
574             // do first swap
575             v_int = _mm_or_si128( _mm_slli_epi16( v_int, 8 ), _mm_srli_epi16( v_int, 8 ) );
576             // do second swap
577             v_int = _mm_or_si128( _mm_slli_epi32( v_int, 16 ), _mm_srli_epi32( v_int, 16 ) );
578
579             // store the packed int
580             // (target misalignment is assumed since we don't know the m_dimension)
581             _mm_storeu_si128 (target, v_int);
582
583             // increment the buffer pointers
584             client_buffers[0]++;
585             client_buffers[1]++;
586             client_buffers[2]++;
587             client_buffers[3]++;
588
589             // go to next target event position
590             target_event += m_dimension;
591         }
592     }
593
594     // do remaining ports
595     // NOTE: these can be time-SSE'd
596     for (; i < m_nb_audio_ports; i++) {
597         struct _MBLA_port_cache &p = m_audio_ports.at(i);
598         target_event = (quadlet_t *)(data + i);
599         assert(nevents + offset <= p.buffer_size );
600
601         if(p.buffer && p.enabled) {
602             float *buffer = (float *)(p.buffer);
603             buffer += offset;
604    
605             for (j = 0;j < nevents; j += 4)
606             {
607                 // read the values
608                 tmp_values[0] = *buffer;
609                 buffer++;
610                 tmp_values[1] = *buffer;
611                 buffer++;
612                 tmp_values[2] = *buffer;
613                 buffer++;
614                 tmp_values[3] = *buffer;
615                 buffer++;
616    
617                 // now do the SSE based conversion/labeling
618                 __m128 v_float = *((__m128*)tmp_values);
619                 __m128i v_int;
620    
621                 // multiply
622                 v_float = _mm_mul_ps(v_float, mult);
623                 // convert to signed integer
624                 v_int = _mm_cvttps_epi32( v_float );
625                 // shift right 8 bits
626                 v_int = _mm_srli_epi32( v_int, 8 );
627                 // label it
628                 v_int = _mm_or_si128( v_int, label );
629    
630                 // do endian conversion (SSE is always little endian)
631                 // do first swap
632                 v_int = _mm_or_si128( _mm_slli_epi16( v_int, 8 ), _mm_srli_epi16( v_int, 8 ) );
633                 // do second swap
634                 v_int = _mm_or_si128( _mm_slli_epi32( v_int, 16 ), _mm_srli_epi32( v_int, 16 ) );
635
636                 // store the packed int
637                 _mm_store_si128 ((__m128i *)(&tmp_values_int), v_int);
638
639                 // increment the buffer pointers
640                 *target_event = tmp_values_int[0];
641                 target_event += m_dimension;
642                 *target_event = tmp_values_int[1];
643                 target_event += m_dimension;
644                 *target_event = tmp_values_int[2];
645                 target_event += m_dimension;
646                 *target_event = tmp_values_int[3];
647                 target_event += m_dimension;
648             }
649
650             // do the remainder of the events
651             for(;j < nevents; j += 1) {
652                 float *in = (float *)buffer;
653                 float v = (*in) * AMDTP_FLOAT_MULTIPLIER;
654                 unsigned int tmp = ((int) v);
655                 tmp = ( tmp >> 8 ) | 0x40000000;
656                 *target_event = htonl((quadlet_t)tmp);
657                 buffer++;
658                 target_event += m_dimension;
659             }
660
661         } else {
662             for (j = 0;j < nevents; j += 1)
663             {
664                 // hardcoded byte swapped
665                 *target_event = 0x00000040;
666                 target_event += m_dimension;
667             }
668         }
669     }
670 }
671
672
673 /**
674  * @brief mux all audio ports to events
675  * @param data
676  * @param offset
677  * @param nevents
678  */
679 void
680 AmdtpTransmitStreamProcessor::encodeAudioPortsInt24(quadlet_t *data,
681                                                     unsigned int offset,
682                                                     unsigned int nevents)
683 {
684     unsigned int j;
685     quadlet_t *target_event;
686     unsigned int i;
687
688     uint32_t *client_buffers[4];
689     uint32_t tmp_values[4] __attribute__ ((aligned (16)));
690
691     // prepare the scratch buffer
692     assert(m_scratch_buffer_size_bytes > nevents * 4);
693     memset(m_scratch_buffer, 0, nevents * 4);
694
695     const __m128i label = _mm_set_epi32 (0x40000000, 0x40000000, 0x40000000, 0x40000000);
696     const __m128i mask  = _mm_set_epi32 (0x00FFFFFF, 0x00FFFFFF, 0x00FFFFFF, 0x00FFFFFF);
697
698     // this assumes that audio ports are sorted by position,
699     // and that there are no gaps
700     for (i = 0; i < m_nb_audio_ports-4; i += 4) {
701         struct _MBLA_port_cache *p;
702
703         // get the port buffers
704         for (j=0; j<4; j++) {
705             p = &(m_audio_ports.at(i+j));
706             if(p->buffer && p->enabled) {
707                 client_buffers[j] = (uint32_t *) p->buffer;
708                 client_buffers[j] += offset;
709             } else {
710                 // if a port is disabled or has no valid
711                 // buffer, use the scratch buffer (all zero's)
712                 client_buffers[j] = (uint32_t *) m_scratch_buffer;
713             }
714         }
715
716         // the base event for this position
717         target_event = (quadlet_t *)(data + i);
718
719         // process the events
720         for (j=0;j < nevents; j += 1)
721         {
722             // read the values
723             tmp_values[0] = *(client_buffers[0]);
724             tmp_values[1] = *(client_buffers[1]);
725             tmp_values[2] = *(client_buffers[2]);
726             tmp_values[3] = *(client_buffers[3]);
727
728             // now do the SSE based conversion/labeling
729             __m128i *target = (__m128i*)target_event;
730             __m128i v_int = *((__m128i*)tmp_values);;
731
732             // mask
733             v_int = _mm_and_si128( v_int, mask );
734             // label it
735             v_int = _mm_or_si128( v_int, label );
736
737             // do endian conversion (SSE is always little endian)
738             // do first swap
739             v_int = _mm_or_si128( _mm_slli_epi16( v_int, 8 ), _mm_srli_epi16( v_int, 8 ) );
740             // do second swap
741             v_int = _mm_or_si128( _mm_slli_epi32( v_int, 16 ), _mm_srli_epi32( v_int, 16 ) );
742
743             // store the packed int
744             // (target misalignment is assumed since we don't know the m_dimension)
745             _mm_storeu_si128 (target, v_int);
746
747             // increment the buffer pointers
748             client_buffers[0]++;
749             client_buffers[1]++;
750             client_buffers[2]++;
751             client_buffers[3]++;
752
753             // go to next target event position
754             target_event += m_dimension;
755         }
756     }
757
758     // do remaining ports
759     // NOTE: these can be time-SSE'd
760     for (; i < m_nb_audio_ports; i++) {
761         struct _MBLA_port_cache &p = m_audio_ports.at(i);
762         target_event = (quadlet_t *)(data + i);
763         assert(nevents + offset <= p.buffer_size );
764
765         if(p.buffer && p.enabled) {
766             uint32_t *buffer = (uint32_t *)(p.buffer);
767             buffer += offset;
768    
769             for (j = 0;j < nevents; j += 4)
770             {
771                 // read the values
772                 tmp_values[0] = *buffer;
773                 buffer++;
774                 tmp_values[1] = *buffer;
775                 buffer++;
776                 tmp_values[2] = *buffer;
777                 buffer++;
778                 tmp_values[3] = *buffer;
779                 buffer++;
780
781                 // now do the SSE based conversion/labeling
782                 __m128i v_int = *((__m128i*)tmp_values);;
783
784                 // mask
785                 v_int = _mm_and_si128( v_int, mask );
786                 // label it
787                 v_int = _mm_or_si128( v_int, label );
788
789                 // do endian conversion (SSE is always little endian)
790                 // do first swap
791                 v_int = _mm_or_si128( _mm_slli_epi16( v_int, 8 ), _mm_srli_epi16( v_int, 8 ) );
792                 // do second swap
793                 v_int = _mm_or_si128( _mm_slli_epi32( v_int, 16 ), _mm_srli_epi32( v_int, 16 ) );
794
795                 // store the packed int
796                 _mm_store_si128 ((__m128i *)(&tmp_values), v_int);
797
798                 // increment the buffer pointers
799                 *target_event = tmp_values[0];
800                 target_event += m_dimension;
801                 *target_event = tmp_values[1];
802                 target_event += m_dimension;
803                 *target_event = tmp_values[2];
804                 target_event += m_dimension;
805                 *target_event = tmp_values[3];
806                 target_event += m_dimension;
807             }
808
809             // do the remainder of the events
810             for(;j < nevents; j += 1) {
811                 uint32_t in = (uint32_t)(*buffer);
812                 *target_event = htonl((quadlet_t)((in & 0x00FFFFFF) | 0x40000000));
813                 buffer++;
814                 target_event += m_dimension;
815             }
816
817         } else {
818             for (j = 0;j < nevents; j += 1)
819             {
820                 // hardcoded byte swapped
821                 *target_event = 0x00000040;
822                 target_event += m_dimension;
823             }
824         }
825     }
826 }
827
828 #else
829
830 /**
831  * @brief mux all audio ports to events
832  * @param data
833  * @param offset
834  * @param nevents
835  */
836 void
837 AmdtpTransmitStreamProcessor::encodeAudioPortsInt24(quadlet_t *data,
838                                                     unsigned int offset,
839                                                     unsigned int nevents)
840 {
841     unsigned int j;
842     quadlet_t *target_event;
843     unsigned int i;
844
845     for (i = 0; i < m_nb_audio_ports; i++) {
846         struct _MBLA_port_cache &p = m_audio_ports.at(i);
847         target_event = (quadlet_t *)(data + i);
848         assert(nevents + offset <= p.buffer_size );
849
850         if(p.buffer && p.enabled) {
851             quadlet_t *buffer = (quadlet_t *)(p.buffer);
852             buffer += offset;
853    
854             for (j = 0;j < nevents; j += 1)
855             {
856                 uint32_t in = (uint32_t)(*buffer);
857                 *target_event = htonl((quadlet_t)((in & 0x00FFFFFF) | 0x40000000));
858                 buffer++;
859                 target_event += m_dimension;
860             }
861         } else {
862             for (j = 0;j < nevents; j += 1)
863             {
864                 *target_event = 0x00000040;
865                 target_event += m_dimension;
866             }
867         }
868     }
869 }
870
871 /**
872  * @brief mux all audio ports to events
873  * @param data
874  * @param offset
875  * @param nevents
876  */
877 void
878 AmdtpTransmitStreamProcessor::encodeAudioPortsFloat(quadlet_t *data,
879                                                     unsigned int offset,
880                                                     unsigned int nevents)
881 {
882     unsigned int j;
883     quadlet_t *target_event;
884     unsigned int i;
885
886     for (i = 0; i < m_nb_audio_ports; i++) {
887         struct _MBLA_port_cache &p = m_audio_ports.at(i);
888         target_event = (quadlet_t *)(data + i);
889         assert(nevents + offset <= p.buffer_size );
890
891         if(p.buffer && p.enabled) {
892             quadlet_t *buffer = (quadlet_t *)(p.buffer);
893             buffer += offset;
894    
895             for (j = 0;j < nevents; j += 1)
896             {
897                 float *in = (float *)buffer;
898                 float v = (*in) * AMDTP_FLOAT_MULTIPLIER;
899                 unsigned int tmp = ((int) v);
900                 tmp = ( tmp >> 8 ) | 0x40000000;
901                 *target_event = htonl((quadlet_t)tmp);
902                 buffer++;
903                 target_event += m_dimension;
904             }
905         } else {
906             for (j = 0;j < nevents; j += 1)
907             {
908                 // hardcoded little endian
909                 *target_event = 0x00000040;
910                 target_event += m_dimension;
911             }
912         }
913     }
914 }
915 #endif
916
917 /**
918  * @brief encodes all midi ports in the cache to events (silence)
919  * @param data
920  * @param offset
921  * @param nevents
922  */
923 void
924 AmdtpTransmitStreamProcessor::encodeMidiPortsSilence(quadlet_t *data,
925                                                      unsigned int offset,
926                                                      unsigned int nevents)
927 {
928     quadlet_t *target_event;
929     unsigned int i,j;
930
931     for (i = 0; i < m_nb_midi_ports; i++) {
932         struct _MIDI_port_cache &p = m_midi_ports.at(i);
933
934         for (j = p.location;j < nevents; j += 8) {
935             target_event = (quadlet_t *) (data + ((j * m_dimension) + p.position));
936             *target_event = htonl(IEC61883_AM824_SET_LABEL(0, IEC61883_AM824_LABEL_MIDI_NO_DATA));
937         }
938     }
939 }
940
941 /**
942  * @brief encodes all midi ports in the cache to events
943  * @param data
944  * @param offset
945  * @param nevents
946  */
947 void
948 AmdtpTransmitStreamProcessor::encodeMidiPorts(quadlet_t *data,
949                                               unsigned int offset,
950                                               unsigned int nevents)
951 {
952     quadlet_t *target_event;
953     unsigned int i,j;
954
955     for (i = 0; i < m_nb_midi_ports; i++) {
956         struct _MIDI_port_cache &p = m_midi_ports.at(i);
957         if (p.buffer && p.enabled) {
958             uint32_t *buffer = (quadlet_t *)(p.buffer);
959             buffer += offset;
960
961             for (j = p.location;j < nevents; j += 8) {
962                 target_event = (quadlet_t *) (data + ((j * m_dimension) + p.position));
963
964                 if ( *buffer & 0xFF000000 )   // we can send a byte
965                 {
966                     quadlet_t tmpval;
967                     tmpval = ((*buffer)<<16) & 0x00FF0000;
968                     tmpval = IEC61883_AM824_SET_LABEL(tmpval, IEC61883_AM824_LABEL_MIDI_1X);
969                     *target_event = htonl(tmpval);
970
971 //                     debugOutput ( DEBUG_LEVEL_VERBOSE, "MIDI port %s, pos=%u, loc=%u, nevents=%u, dim=%d\n",
972 //                                p.port->getName().c_str(), p.position, p.location, nevents, m_dimension );
973 //                     debugOutput ( DEBUG_LEVEL_VERBOSE, "base=%p, target=%p, value=%08X\n",
974 //                                data, target_event, tmpval );
975                 } else {
976                     // can't send a byte, either because there is no byte,
977                     // or because this would exceed the maximum rate
978                     // FIXME: this can be ifdef optimized since it's a constant
979                     *target_event = htonl(IEC61883_AM824_SET_LABEL(0, IEC61883_AM824_LABEL_MIDI_NO_DATA));
980                 }
981                 buffer+=8;
982             }
983         } else {
984             for (j = p.location;j < nevents; j += 8) {
985                 target_event = (quadlet_t *)(data + ((j * m_dimension) + p.position));
986                 __builtin_prefetch(target_event, 1, 0); // prefetch events for write, no temporal locality
987                 *target_event = htonl(IEC61883_AM824_SET_LABEL(0, IEC61883_AM824_LABEL_MIDI_NO_DATA));
988             }
989         }
990     }
991 }
992
993 bool
994 AmdtpTransmitStreamProcessor::initPortCache() {
995     // make use of the fact that audio ports are the first ports in
996     // the cluster as per AMDTP. so we can sort the ports by position
997     // and have very efficient lookups:
998     // m_float_ports.at(i).buffer -> audio stream i buffer
999     // for midi ports we simply cache all port info since they are (usually) not
1000     // that numerous
1001     m_nb_audio_ports = 0;
1002     m_audio_ports.clear();
1003    
1004     m_nb_midi_ports = 0;
1005     m_midi_ports.clear();
1006    
1007     for(PortVectorIterator it = m_Ports.begin();
1008         it != m_Ports.end();
1009         ++it )
1010     {
1011         AmdtpPortInfo *pinfo=dynamic_cast<AmdtpPortInfo *>(*it);
1012         assert(pinfo); // this should not fail!!
1013
1014         switch( pinfo->getFormat() )
1015         {
1016             case AmdtpPortInfo::E_MBLA:
1017                 m_nb_audio_ports++;
1018                 break;
1019             case AmdtpPortInfo::E_SPDIF: // still unimplemented
1020                 break;
1021             case AmdtpPortInfo::E_Midi:
1022                 m_nb_midi_ports++;
1023                 break;
1024             default: // ignore
1025                 break;
1026         }
1027     }
1028
1029     unsigned int idx;
1030     for (idx = 0; idx < m_nb_audio_ports; idx++) {
1031         for(PortVectorIterator it = m_Ports.begin();
1032             it != m_Ports.end();
1033             ++it )
1034         {
1035             AmdtpPortInfo *pinfo=dynamic_cast<AmdtpPortInfo *>(*it);
1036             debugOutput(DEBUG_LEVEL_VERY_VERBOSE, "idx %u: looking at port %s at position %u\n",
1037                                               idx, (*it)->getName().c_str(), pinfo->getPosition());
1038             if(pinfo->getPosition() == idx) {
1039                 struct _MBLA_port_cache p;
1040                 p.port = dynamic_cast<AmdtpAudioPort *>(*it);
1041                 if(p.port == NULL) {
1042                     debugError("Port is not an AmdtpAudioPort!\n");
1043                     return false;
1044                 }
1045                 p.buffer = NULL; // to be filled by updatePortCache
1046                 #ifdef DEBUG
1047                 p.buffer_size = (*it)->getBufferSize();
1048                 #endif
1049
1050                 m_audio_ports.push_back(p);
1051                 debugOutput(DEBUG_LEVEL_VERBOSE, "Cached port %s at position %u\n",
1052                                                  p.port->getName().c_str(), idx);
1053                 goto next_index;
1054             }
1055         }
1056         debugError("No MBLA port found for position %d\n", idx);
1057         return false;
1058 next_index:
1059         continue;
1060     }
1061
1062     for(PortVectorIterator it = m_Ports.begin();
1063         it != m_Ports.end();
1064         ++it )
1065     {
1066         AmdtpPortInfo *pinfo=dynamic_cast<AmdtpPortInfo *>(*it);
1067         debugOutput(DEBUG_LEVEL_VERY_VERBOSE, "idx %u: looking at port %s at position %u, location %u\n",
1068                                         idx, (*it)->getName().c_str(), pinfo->getPosition(), pinfo->getLocation());
1069         if ((*it)->getPortType() == Port::E_Midi) {
1070             struct _MIDI_port_cache p;
1071             p.port = dynamic_cast<AmdtpMidiPort *>(*it);
1072             if(p.port == NULL) {
1073                 debugError("Port is not an AmdtpMidiPort!\n");
1074                 return false;
1075             }
1076             p.position = pinfo->getPosition();
1077             p.location = pinfo->getLocation();
1078             p.buffer = NULL; // to be filled by updatePortCache
1079             #ifdef DEBUG
1080             p.buffer_size = (*it)->getBufferSize();
1081             #endif
1082
1083             m_midi_ports.push_back(p);
1084             debugOutput(DEBUG_LEVEL_VERBOSE, "Cached port %s at position %u, location %u\n",
1085                                             p.port->getName().c_str(), p.position, p.location);
1086         }
1087     }
1088
1089     return true;
1090 }
1091
1092 void
1093 AmdtpTransmitStreamProcessor::updatePortCache() {
1094     unsigned int idx;
1095     for (idx = 0; idx < m_nb_audio_ports; idx++) {
1096         struct _MBLA_port_cache& p = m_audio_ports.at(idx);
1097         AmdtpAudioPort *port = p.port;
1098         p.buffer = port->getBufferAddress();
1099         p.enabled = !port->isDisabled();
1100     }
1101     for (idx = 0; idx < m_nb_midi_ports; idx++) {
1102         struct _MIDI_port_cache& p = m_midi_ports.at(idx);
1103         AmdtpMidiPort *port = p.port;
1104         p.buffer = port->getBufferAddress();
1105         p.enabled = !port->isDisabled();
1106     }
1107 }
1108
1109 } // end of namespace Streaming
Note: See TracBrowser for help on using the browser.