root/trunk/libffado/src/libstreaming/amdtp/AmdtpTransmitStreamProcessor.cpp

Revision 1021, 39.7 kB (checked in by ppalmers, 13 years ago)

Allow to disable sending payload on no-data packets though config.h.in. The DICE-II devices cannot cope with them.

Line 
1 /*
2  * Copyright (C) 2005-2008 by Pieter Palmers
3  *
4  * This file is part of FFADO
5  * FFADO = Free Firewire (pro-)audio drivers for linux
6  *
7  * FFADO is based upon FreeBoB.
8  *
9  * This program is free software: you can redistribute it and/or modify
10  * it under the terms of the GNU General Public License as published by
11  * the Free Software Foundation, either version 2 of the License, or
12  * (at your option) version 3 of the License.
13  *
14  * This program is distributed in the hope that it will be useful,
15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17  * GNU General Public License for more details.
18  *
19  * You should have received a copy of the GNU General Public License
20  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
21  *
22  */
23
24 #include "config.h"
25 #include "AmdtpTransmitStreamProcessor.h"
26 #include "AmdtpPort.h"
27 #include "../StreamProcessorManager.h"
28 #include "devicemanager.h"
29
30 #include "libutil/Time.h"
31 #include "libutil/float_cast.h"
32
33 #include "libieee1394/ieee1394service.h"
34 #include "libieee1394/IsoHandlerManager.h"
35 #include "libieee1394/cycletimer.h"
36
37 #include <netinet/in.h>
38 #include <assert.h>
39
40 #define AMDTP_FLOAT_MULTIPLIER 2147483392.0
41
42 namespace Streaming
43 {
44
45 /* transmit */
46 AmdtpTransmitStreamProcessor::AmdtpTransmitStreamProcessor(FFADODevice &parent, int dimension)
47         : StreamProcessor(parent, ePT_Transmit)
48         , m_dimension( dimension )
49         , m_dbc( 0 )
50         , m_nb_audio_ports( 0 )
51         , m_nb_midi_ports( 0 )
52 #if AMDTP_ALLOW_PAYLOAD_IN_NODATA_XMIT
53         , m_send_nodata_payload ( AMDTP_SEND_PAYLOAD_IN_NODATA_XMIT_BY_DEFAULT )
54 #endif
55 {}
56
57 enum StreamProcessor::eChildReturnValue
58 AmdtpTransmitStreamProcessor::generatePacketHeader (
59     unsigned char *data, unsigned int *length,
60     unsigned char *tag, unsigned char *sy,
61     int cycle, unsigned int dropped, unsigned int max_length )
62 {
63     __builtin_prefetch(data, 1, 0); // prefetch events for write, no temporal locality
64     struct iec61883_packet *packet = (struct iec61883_packet *)data;
65     /* Our node ID can change after a bus reset, so it is best to fetch
66     * our node ID for each packet. */
67     packet->sid = m_local_node_id;
68
69     packet->dbs = m_dimension;
70     packet->fn = 0;
71     packet->qpc = 0;
72     packet->sph = 0;
73     packet->reserved = 0;
74     packet->dbc = m_dbc;
75     packet->eoh1 = 2;
76     packet->fmt = IEC61883_FMT_AMDTP;
77
78     *tag = IEC61883_TAG_WITH_CIP;
79     *sy = 0;
80
81     signed int fc;
82     uint64_t presentation_time;
83     unsigned int presentation_cycle;
84     int cycles_until_presentation;
85
86     uint64_t transmit_at_time;
87     unsigned int transmit_at_cycle;
88     int cycles_until_transmit;
89
90     debugOutputExtreme( DEBUG_LEVEL_ULTRA_VERBOSE,
91                         "Try for cycle %d\n", cycle );
92     // check whether the packet buffer has packets for us to send.
93     // the base timestamp is the one of the next sample in the buffer
94     ffado_timestamp_t ts_head_tmp;
95     m_data_buffer->getBufferHeadTimestamp ( &ts_head_tmp, &fc ); // thread safe
96
97     // the timestamp gives us the time at which we want the sample block
98     // to be output by the device
99     presentation_time = ( uint64_t ) ts_head_tmp;
100
101     // now we calculate the time when we have to transmit the sample block
102     transmit_at_time = substractTicks ( presentation_time, AMDTP_TRANSMIT_TRANSFER_DELAY );
103
104     // calculate the cycle this block should be presented in
105     // (this is just a virtual calculation since at that time it should
106     //  already be in the device's buffer)
107     presentation_cycle = ( unsigned int ) ( TICKS_TO_CYCLES ( presentation_time ) );
108
109     // calculate the cycle this block should be transmitted in
110     transmit_at_cycle = ( unsigned int ) ( TICKS_TO_CYCLES ( transmit_at_time ) );
111
112     // we can check whether this cycle is within the 'window' we have
113     // to send this packet.
114     // first calculate the number of cycles left before presentation time
115     cycles_until_presentation = diffCycles ( presentation_cycle, cycle );
116
117     // we can check whether this cycle is within the 'window' we have
118     // to send this packet.
119     // first calculate the number of cycles left before presentation time
120     cycles_until_transmit = diffCycles ( transmit_at_cycle, cycle );
121
122     if (dropped) {
123         debugOutput( DEBUG_LEVEL_VERBOSE,
124                      "Gen HDR: CY=%04u, TC=%04u, CUT=%04d, TST=%011llu (%04u), TSP=%011llu (%04u)\n",
125                      cycle,
126                      transmit_at_cycle, cycles_until_transmit,
127                      transmit_at_time, (unsigned int)TICKS_TO_CYCLES(transmit_at_time),
128                      presentation_time, (unsigned int)TICKS_TO_CYCLES(presentation_time) );
129     }
130     // two different options:
131     // 1) there are not enough frames for one packet
132     //      => determine wether this is a problem, since we might still
133     //         have some time to send it
134     // 2) there are enough packets
135     //      => determine whether we have to send them in this packet
136     if ( fc < ( signed int ) m_syt_interval )
137     {
138         // not enough frames in the buffer,
139
140         // we can still postpone the queueing of the packets
141         // if we are far enough ahead of the presentation time
142         if ( cycles_until_presentation <= AMDTP_MIN_CYCLES_BEFORE_PRESENTATION )
143         {
144             debugOutput( DEBUG_LEVEL_NORMAL,
145                          "Insufficient frames (P): N=%02d, CY=%04u, TC=%04u, CUT=%04d\n",
146                          fc, cycle, transmit_at_cycle, cycles_until_transmit );
147             // we are too late
148             return eCRV_XRun;
149         }
150         else
151         {
152             #if DEBUG_EXTREME
153             unsigned int now_cycle = ( unsigned int ) ( TICKS_TO_CYCLES ( m_1394service.getCycleTimerTicks() ) );
154
155             debugOutputExtreme(DEBUG_LEVEL_VERBOSE,
156                                "Insufficient frames (NP): N=%02d, CY=%04u, TC=%04u, CUT=%04d, NOW=%04d\n",
157                                fc, cycle, transmit_at_cycle, cycles_until_transmit, now_cycle );
158             #endif
159
160             // there is still time left to send the packet
161             // we want the system to give this packet another go at a later time instant
162             return eCRV_Again; // note that the raw1394 again system doesn't work as expected
163
164             // we could wait here for a certain time before trying again. However, this
165             // is not going to work since we then block the iterator thread, hence also
166             // the receiving code, meaning that we are not processing received packets,
167             // and hence there is no progression in the number of frames available.
168
169             // for example:
170             // SleepRelativeUsec(125); // one cycle
171             // goto try_block_of_frames;
172
173             // or more advanced, calculate how many cycles we are ahead of 'now' and
174             // base the sleep on that.
175
176             // note that this requires that there is one thread for each IsoHandler,
177             // otherwise we're in the deadlock described above.
178         }
179     }
180     else
181     {
182         // there are enough frames, so check the time they are intended for
183         // all frames have a certain 'time window' in which they can be sent
184         // this corresponds to the range of the timestamp mechanism:
185         // we can send a packet 15 cycles in advance of the 'presentation time'
186         // in theory we can send the packet up till one cycle before the presentation time,
187         // however this is not very smart.
188
189         // There are 3 options:
190         // 1) the frame block is too early
191         //      => send an empty packet
192         // 2) the frame block is within the window
193         //      => send it
194         // 3) the frame block is too late
195         //      => discard (and raise xrun?)
196         //         get next block of frames and repeat
197
198         if(cycles_until_transmit < 0)
199         {
200             // we are too late
201             debugOutput(DEBUG_LEVEL_NORMAL,
202                         "Too late: CY=%04u, TC=%04u, CUT=%04d, TSP=%011llu (%04u)\n",
203                         cycle,
204                         transmit_at_cycle, cycles_until_transmit,
205                         presentation_time, (unsigned int)TICKS_TO_CYCLES(presentation_time) );
206             //debugShowBackLogLines(200);
207 //             // however, if we can send this sufficiently before the presentation
208 //             // time, it could be harmless.
209 //             // NOTE: dangerous since the device has no way of reporting that it didn't get
210 //             //       this packet on time.
211 //             if(cycles_until_presentation >= AMDTP_MIN_CYCLES_BEFORE_PRESENTATION)
212 //             {
213 //                 // we are not that late and can still try to transmit the packet
214 //                 m_dbc += fillDataPacketHeader(packet, length, m_last_timestamp);
215 //                 m_last_timestamp = presentation_time;
216 //                 return (fc < (signed)(2*m_syt_interval) ? eCRV_Defer : eCRV_Packet);
217 //             }
218 //             else   // definitely too late
219 //             {
220                 return eCRV_XRun;
221 //             }
222         }
223         else if(cycles_until_transmit <= AMDTP_MAX_CYCLES_TO_TRANSMIT_EARLY)
224         {
225             // it's time send the packet
226             m_dbc += fillDataPacketHeader(packet, length, m_last_timestamp);
227             m_last_timestamp = presentation_time;
228
229             // FIXME: this should not be multiplied by 2
230             return (fc < (signed)(2*m_syt_interval) ? eCRV_Defer : eCRV_Packet);
231         }
232         else
233         {
234             debugOutputExtreme(DEBUG_LEVEL_VERY_VERBOSE,
235                                "Too early: CY=%04u, TC=%04u, CUT=%04d, TST=%011llu (%04u), TSP=%011llu (%04u)\n",
236                                cycle,
237                                transmit_at_cycle, cycles_until_transmit,
238                                transmit_at_time, (unsigned int)TICKS_TO_CYCLES(transmit_at_time),
239                                presentation_time, (unsigned int)TICKS_TO_CYCLES(presentation_time));
240 #ifdef DEBUG
241             if ( cycles_until_transmit > AMDTP_MAX_CYCLES_TO_TRANSMIT_EARLY + 1 )
242             {
243                 debugOutputExtreme(DEBUG_LEVEL_VERY_VERBOSE,
244                                    "Way too early: CY=%04u, TC=%04u, CUT=%04d, TST=%011llu (%04u), TSP=%011llu (%04u)\n",
245                                    cycle,
246                                    transmit_at_cycle, cycles_until_transmit,
247                                    transmit_at_time, (unsigned int)TICKS_TO_CYCLES(transmit_at_time),
248                                    presentation_time, (unsigned int)TICKS_TO_CYCLES(presentation_time));
249             }
250 #endif
251             // we are too early, send only an empty packet
252             return eCRV_EmptyPacket;
253         }
254     }
255     return eCRV_Invalid;
256 }
257
258 enum StreamProcessor::eChildReturnValue
259 AmdtpTransmitStreamProcessor::generatePacketData (
260     unsigned char *data, unsigned int *length,
261     unsigned char *tag, unsigned char *sy,
262     int cycle, unsigned int dropped, unsigned int max_length )
263 {
264     if ( m_data_buffer->readFrames ( m_syt_interval, ( char * ) ( data + 8 ) ) )
265     {
266         debugOutputExtreme(DEBUG_LEVEL_ULTRA_VERBOSE,
267                            "XMIT DATA (cy %04d): TSP=%011llu (%04u)\n",
268                            cycle, m_last_timestamp, (unsigned int)TICKS_TO_CYCLES(m_last_timestamp));
269         return eCRV_OK;
270     }
271     else return eCRV_XRun;
272
273 }
274
275 enum StreamProcessor::eChildReturnValue
276 AmdtpTransmitStreamProcessor::generateSilentPacketHeader (
277     unsigned char *data, unsigned int *length,
278     unsigned char *tag, unsigned char *sy,
279     int cycle, unsigned int dropped, unsigned int max_length )
280 {
281     struct iec61883_packet *packet = ( struct iec61883_packet * ) data;
282     debugOutputExtreme(DEBUG_LEVEL_ULTRA_VERBOSE,
283                        "XMIT SILENT (cy %04d): CY=%04u, TSP=%011llu (%04u)\n",
284                        cycle, m_last_timestamp, (unsigned int)TICKS_TO_CYCLES(m_last_timestamp));
285
286     packet->sid = m_local_node_id;
287
288     packet->dbs = m_dimension;
289     packet->fn = 0;
290     packet->qpc = 0;
291     packet->sph = 0;
292     packet->reserved = 0;
293     packet->dbc = m_dbc;
294     packet->eoh1 = 2;
295     packet->fmt = IEC61883_FMT_AMDTP;
296
297     *tag = IEC61883_TAG_WITH_CIP;
298     *sy = 0;
299
300     m_dbc += fillNoDataPacketHeader ( packet, length );
301     return eCRV_Packet;
302 }
303
304 enum StreamProcessor::eChildReturnValue
305 AmdtpTransmitStreamProcessor::generateSilentPacketData (
306     unsigned char *data, unsigned int *length,
307     unsigned char *tag, unsigned char *sy,
308     int cycle, unsigned int dropped, unsigned int max_length )
309 {
310     return eCRV_OK; // no need to do anything
311 }
312
313 enum StreamProcessor::eChildReturnValue
314 AmdtpTransmitStreamProcessor::generateEmptyPacketHeader (
315     unsigned char *data, unsigned int *length,
316     unsigned char *tag, unsigned char *sy,
317     int cycle, unsigned int dropped, unsigned int max_length )
318 {
319     struct iec61883_packet *packet = ( struct iec61883_packet * ) data;
320     debugOutputExtreme(DEBUG_LEVEL_ULTRA_VERBOSE,
321                        "XMIT EMPTY (cy %04d): CY=%04u, TSP=%011llu (%04u)\n",
322                        cycle, m_last_timestamp, (unsigned int)TICKS_TO_CYCLES(m_last_timestamp) );
323     packet->sid = m_local_node_id;
324
325     packet->dbs = m_dimension;
326     packet->fn = 0;
327     packet->qpc = 0;
328     packet->sph = 0;
329     packet->reserved = 0;
330     packet->dbc = m_dbc;
331     packet->eoh1 = 2;
332     packet->fmt = IEC61883_FMT_AMDTP;
333
334     *tag = IEC61883_TAG_WITH_CIP;
335     *sy = 0;
336
337     m_dbc += fillNoDataPacketHeader ( packet, length );
338     return eCRV_OK;
339 }
340
341 enum StreamProcessor::eChildReturnValue
342 AmdtpTransmitStreamProcessor::generateEmptyPacketData (
343     unsigned char *data, unsigned int *length,
344     unsigned char *tag, unsigned char *sy,
345     int cycle, unsigned int dropped, unsigned int max_length )
346 {
347     return eCRV_OK; // no need to do anything
348 }
349
350 unsigned int AmdtpTransmitStreamProcessor::fillDataPacketHeader (
351     struct iec61883_packet *packet, unsigned int* length,
352     uint32_t ts )
353 {
354
355     packet->fdf = m_fdf;
356
357     // convert the timestamp to SYT format
358     uint16_t timestamp_SYT = TICKS_TO_SYT ( ts );
359     packet->syt = ntohs ( timestamp_SYT );
360
361     // FIXME: use a precomputed value here
362     *length = m_syt_interval*sizeof ( quadlet_t ) *m_dimension + 8;
363
364     return m_syt_interval;
365 }
366
367 unsigned int AmdtpTransmitStreamProcessor::fillNoDataPacketHeader (
368     struct iec61883_packet *packet, unsigned int* length )
369 {
370     // no-data packets have syt=0xFFFF
371     // and (can) have the usual amount of events as dummy data
372     // DBC is not increased
373     packet->fdf = IEC61883_FDF_NODATA;
374     packet->syt = 0xffff;
375
376 #if AMDTP_ALLOW_PAYLOAD_IN_NODATA_XMIT
377     if ( m_send_nodata_payload )
378     { // no-data packets with payload (NOTE: DICE-II doesn't like that)
379         *length = 2*sizeof ( quadlet_t ) + m_syt_interval * m_dimension * sizeof ( quadlet_t );
380         return m_syt_interval;
381     } else { // no-data packets without payload
382         *length = 2*sizeof ( quadlet_t );
383         return 0;
384     }
385 #else
386     // no-data packets without payload
387     *length = 2*sizeof ( quadlet_t );
388     return 0;
389 #endif
390 }
391
392 unsigned int
393 AmdtpTransmitStreamProcessor::getSytInterval() {
394     switch (m_StreamProcessorManager.getNominalRate()) {
395         case 32000:
396         case 44100:
397         case 48000:
398             return 8;
399         case 88200:
400         case 96000:
401             return 16;
402         case 176400:
403         case 192000:
404             return 32;
405         default:
406             debugError("Unsupported rate: %d\n", m_StreamProcessorManager.getNominalRate());
407             return 0;
408     }
409 }
410 unsigned int
411 AmdtpTransmitStreamProcessor::getFDF() {
412     switch (m_StreamProcessorManager.getNominalRate()) {
413         case 32000: return IEC61883_FDF_SFC_32KHZ;
414         case 44100: return IEC61883_FDF_SFC_44K1HZ;
415         case 48000: return IEC61883_FDF_SFC_48KHZ;
416         case 88200: return IEC61883_FDF_SFC_88K2HZ;
417         case 96000: return IEC61883_FDF_SFC_96KHZ;
418         case 176400: return IEC61883_FDF_SFC_176K4HZ;
419         case 192000: return IEC61883_FDF_SFC_192KHZ;
420         default:
421             debugError("Unsupported rate: %d\n", m_StreamProcessorManager.getNominalRate());
422             return 0;
423     }
424 }
425
426 bool AmdtpTransmitStreamProcessor::prepareChild()
427 {
428     debugOutput ( DEBUG_LEVEL_VERBOSE, "Preparing (%p)...\n", this );
429     m_syt_interval = getSytInterval();
430     m_fdf = getFDF();
431
432     iec61883_cip_init (
433         &m_cip_status,
434         IEC61883_FMT_AMDTP,
435         m_fdf,
436         m_StreamProcessorManager.getNominalRate(),
437         m_dimension,
438         m_syt_interval );
439
440     if (!initPortCache()) {
441         debugError("Could not init port cache\n");
442         return false;
443     }
444
445     return true;
446 }
447
448 /*
449 * compose the event streams for the packets from the port buffers
450 */
451 bool AmdtpTransmitStreamProcessor::processWriteBlock ( char *data,
452         unsigned int nevents, unsigned int offset )
453 {
454     // update the variable parts of the cache
455     updatePortCache();
456
457     // encode audio data
458     switch(m_StreamProcessorManager.getAudioDataType()) {
459         case StreamProcessorManager::eADT_Int24:
460             encodeAudioPortsInt24((quadlet_t *)data, offset, nevents);
461             break;
462         case StreamProcessorManager::eADT_Float:
463             encodeAudioPortsFloat((quadlet_t *)data, offset, nevents);
464             break;
465     }
466
467     // do midi ports
468     encodeMidiPorts((quadlet_t *)data, offset, nevents);
469     return true;
470 }
471
472 bool
473 AmdtpTransmitStreamProcessor::transmitSilenceBlock(
474     char *data, unsigned int nevents, unsigned int offset)
475 {
476     // no need to update the port cache when transmitting silence since
477     // no dynamic values are used to do so.
478     encodeAudioPortsSilence((quadlet_t *)data, offset, nevents);
479     encodeMidiPortsSilence((quadlet_t *)data, offset, nevents);
480     return true;
481 }
482
483 /**
484  * @brief encodes all audio ports in the cache to events (silent data)
485  * @param data
486  * @param offset
487  * @param nevents
488  */
489 void
490 AmdtpTransmitStreamProcessor::encodeAudioPortsSilence(quadlet_t *data,
491                                                       unsigned int offset,
492                                                       unsigned int nevents)
493 {
494     unsigned int j;
495     quadlet_t *target_event;
496     unsigned int i;
497
498     for (i = 0; i < m_nb_audio_ports; i++) {
499         target_event = (quadlet_t *)(data + i);
500
501         for (j = 0;j < nevents; j += 1)
502         {
503             *target_event = 0x00000040;
504             target_event += m_dimension;
505         }
506     }
507 }
508
509 #ifdef __SSE2__
510 //#if 0
511 #include <emmintrin.h>
512 #warning SSE2 build
513
514 /**
515  * @brief mux all audio ports to events
516  * @param data
517  * @param offset
518  * @param nevents
519  */
520 void
521 AmdtpTransmitStreamProcessor::encodeAudioPortsFloat(quadlet_t *data,
522                                                     unsigned int offset,
523                                                     unsigned int nevents)
524 {
525     unsigned int j;
526     quadlet_t *target_event;
527     unsigned int i;
528
529     float * client_buffers[4];
530     float tmp_values[4] __attribute__ ((aligned (16)));
531     uint32_t tmp_values_int[4] __attribute__ ((aligned (16)));
532
533     // prepare the scratch buffer
534     assert(m_scratch_buffer_size_bytes > nevents * 4);
535     memset(m_scratch_buffer, 0, nevents * 4);
536
537     const __m128i label = _mm_set_epi32 (0x40000000, 0x40000000, 0x40000000, 0x40000000);
538     const __m128 mult = _mm_set_ps(AMDTP_FLOAT_MULTIPLIER, AMDTP_FLOAT_MULTIPLIER, AMDTP_FLOAT_MULTIPLIER, AMDTP_FLOAT_MULTIPLIER);
539
540 #if AMDTP_CLIP_FLOATS
541     const __m128 v_max = _mm_set_ps(1.0, 1.0, 1.0, 1.0);
542     const __m128 v_min = _mm_set_ps(-1.0, -1.0, -1.0, -1.0);
543 #endif
544
545     // this assumes that audio ports are sorted by position,
546     // and that there are no gaps
547     for (i = 0; i < m_nb_audio_ports-4; i += 4) {
548         struct _MBLA_port_cache *p;
549
550         // get the port buffers
551         for (j=0; j<4; j++) {
552             p = &(m_audio_ports.at(i+j));
553             if(p->buffer && p->enabled) {
554                 client_buffers[j] = (float *) p->buffer;
555                 client_buffers[j] += offset;
556             } else {
557                 // if a port is disabled or has no valid
558                 // buffer, use the scratch buffer (all zero's)
559                 client_buffers[j] = (float *) m_scratch_buffer;
560             }
561         }
562
563         // the base event for this position
564         target_event = (quadlet_t *)(data + i);
565
566         // process the events
567         for (j=0;j < nevents; j += 1)
568         {
569             // read the values
570             tmp_values[0] = *(client_buffers[0]);
571             tmp_values[1] = *(client_buffers[1]);
572             tmp_values[2] = *(client_buffers[2]);
573             tmp_values[3] = *(client_buffers[3]);
574
575             // now do the SSE based conversion/labeling
576             __m128 v_float = *((__m128*)tmp_values);
577             __m128i *target = (__m128i*)target_event;
578             __m128i v_int;
579
580             // clip
581 #if AMDTP_CLIP_FLOATS
582             // do SSE clipping
583             v_float = _mm_max_ps(v_float, v_min);
584             v_float = _mm_min_ps(v_float, v_max);
585 #endif
586
587             // multiply
588             v_float = _mm_mul_ps(v_float, mult);
589             // convert to signed integer
590             v_int = _mm_cvttps_epi32( v_float );
591             // shift right 8 bits
592             v_int = _mm_srli_epi32( v_int, 8 );
593             // label it
594             v_int = _mm_or_si128( v_int, label );
595
596             // do endian conversion (SSE is always little endian)
597             // do first swap
598             v_int = _mm_or_si128( _mm_slli_epi16( v_int, 8 ), _mm_srli_epi16( v_int, 8 ) );
599             // do second swap
600             v_int = _mm_or_si128( _mm_slli_epi32( v_int, 16 ), _mm_srli_epi32( v_int, 16 ) );
601
602             // store the packed int
603             // (target misalignment is assumed since we don't know the m_dimension)
604             _mm_storeu_si128 (target, v_int);
605
606             // increment the buffer pointers
607             client_buffers[0]++;
608             client_buffers[1]++;
609             client_buffers[2]++;
610             client_buffers[3]++;
611
612             // go to next target event position
613             target_event += m_dimension;
614         }
615     }
616
617     // do remaining ports
618     // NOTE: these can be time-SSE'd
619     for (; i < m_nb_audio_ports; i++) {
620         struct _MBLA_port_cache &p = m_audio_ports.at(i);
621         target_event = (quadlet_t *)(data + i);
622         assert(nevents + offset <= p.buffer_size );
623
624         if(p.buffer && p.enabled) {
625             float *buffer = (float *)(p.buffer);
626             buffer += offset;
627    
628             for (j = 0;j < nevents; j += 4)
629             {
630                 // read the values
631                 tmp_values[0] = *buffer;
632                 buffer++;
633                 tmp_values[1] = *buffer;
634                 buffer++;
635                 tmp_values[2] = *buffer;
636                 buffer++;
637                 tmp_values[3] = *buffer;
638                 buffer++;
639
640                 // now do the SSE based conversion/labeling
641                 __m128 v_float = *((__m128*)tmp_values);
642                 __m128i v_int;
643
644 #if AMDTP_CLIP_FLOATS
645                 // do SSE clipping
646                 v_float = _mm_max_ps(v_float, v_min);
647                 v_float = _mm_min_ps(v_float, v_max);
648 #endif
649
650                 // multiply
651                 v_float = _mm_mul_ps(v_float, mult);
652                 // convert to signed integer
653                 v_int = _mm_cvttps_epi32( v_float );
654                 // shift right 8 bits
655                 v_int = _mm_srli_epi32( v_int, 8 );
656                 // label it
657                 v_int = _mm_or_si128( v_int, label );
658    
659                 // do endian conversion (SSE is always little endian)
660                 // do first swap
661                 v_int = _mm_or_si128( _mm_slli_epi16( v_int, 8 ), _mm_srli_epi16( v_int, 8 ) );
662                 // do second swap
663                 v_int = _mm_or_si128( _mm_slli_epi32( v_int, 16 ), _mm_srli_epi32( v_int, 16 ) );
664
665                 // store the packed int
666                 _mm_store_si128 ((__m128i *)(&tmp_values_int), v_int);
667
668                 // increment the buffer pointers
669                 *target_event = tmp_values_int[0];
670                 target_event += m_dimension;
671                 *target_event = tmp_values_int[1];
672                 target_event += m_dimension;
673                 *target_event = tmp_values_int[2];
674                 target_event += m_dimension;
675                 *target_event = tmp_values_int[3];
676                 target_event += m_dimension;
677             }
678
679             // do the remainder of the events
680             for(;j < nevents; j += 1) {
681                 float *in = (float *)buffer;
682 #if AMDTP_CLIP_FLOATS
683                 if(*in > 1.0) *in=1.0;
684                 if(*in < -1.0) *in=-1.0;
685 #endif
686                 float v = (*in) * AMDTP_FLOAT_MULTIPLIER;
687                 unsigned int tmp = ((int) v);
688                 tmp = ( tmp >> 8 ) | 0x40000000;
689                 *target_event = htonl((quadlet_t)tmp);
690                 buffer++;
691                 target_event += m_dimension;
692             }
693
694         } else {
695             for (j = 0;j < nevents; j += 1)
696             {
697                 // hardcoded byte swapped
698                 *target_event = 0x00000040;
699                 target_event += m_dimension;
700             }
701         }
702     }
703 }
704
705
706 /**
707  * @brief mux all audio ports to events
708  * @param data
709  * @param offset
710  * @param nevents
711  */
712 void
713 AmdtpTransmitStreamProcessor::encodeAudioPortsInt24(quadlet_t *data,
714                                                     unsigned int offset,
715                                                     unsigned int nevents)
716 {
717     unsigned int j;
718     quadlet_t *target_event;
719     unsigned int i;
720
721     uint32_t *client_buffers[4];
722     uint32_t tmp_values[4] __attribute__ ((aligned (16)));
723
724     // prepare the scratch buffer
725     assert(m_scratch_buffer_size_bytes > nevents * 4);
726     memset(m_scratch_buffer, 0, nevents * 4);
727
728     const __m128i label = _mm_set_epi32 (0x40000000, 0x40000000, 0x40000000, 0x40000000);
729     const __m128i mask  = _mm_set_epi32 (0x00FFFFFF, 0x00FFFFFF, 0x00FFFFFF, 0x00FFFFFF);
730
731     // this assumes that audio ports are sorted by position,
732     // and that there are no gaps
733     for (i = 0; i < m_nb_audio_ports-4; i += 4) {
734         struct _MBLA_port_cache *p;
735
736         // get the port buffers
737         for (j=0; j<4; j++) {
738             p = &(m_audio_ports.at(i+j));
739             if(p->buffer && p->enabled) {
740                 client_buffers[j] = (uint32_t *) p->buffer;
741                 client_buffers[j] += offset;
742             } else {
743                 // if a port is disabled or has no valid
744                 // buffer, use the scratch buffer (all zero's)
745                 client_buffers[j] = (uint32_t *) m_scratch_buffer;
746             }
747         }
748
749         // the base event for this position
750         target_event = (quadlet_t *)(data + i);
751
752         // process the events
753         for (j=0;j < nevents; j += 1)
754         {
755             // read the values
756             tmp_values[0] = *(client_buffers[0]);
757             tmp_values[1] = *(client_buffers[1]);
758             tmp_values[2] = *(client_buffers[2]);
759             tmp_values[3] = *(client_buffers[3]);
760
761             // now do the SSE based conversion/labeling
762             __m128i *target = (__m128i*)target_event;
763             __m128i v_int = *((__m128i*)tmp_values);;
764
765             // mask
766             v_int = _mm_and_si128( v_int, mask );
767             // label it
768             v_int = _mm_or_si128( v_int, label );
769
770             // do endian conversion (SSE is always little endian)
771             // do first swap
772             v_int = _mm_or_si128( _mm_slli_epi16( v_int, 8 ), _mm_srli_epi16( v_int, 8 ) );
773             // do second swap
774             v_int = _mm_or_si128( _mm_slli_epi32( v_int, 16 ), _mm_srli_epi32( v_int, 16 ) );
775
776             // store the packed int
777             // (target misalignment is assumed since we don't know the m_dimension)
778             _mm_storeu_si128 (target, v_int);
779
780             // increment the buffer pointers
781             client_buffers[0]++;
782             client_buffers[1]++;
783             client_buffers[2]++;
784             client_buffers[3]++;
785
786             // go to next target event position
787             target_event += m_dimension;
788         }
789     }
790
791     // do remaining ports
792     // NOTE: these can be time-SSE'd
793     for (; i < m_nb_audio_ports; i++) {
794         struct _MBLA_port_cache &p = m_audio_ports.at(i);
795         target_event = (quadlet_t *)(data + i);
796         assert(nevents + offset <= p.buffer_size );
797
798         if(p.buffer && p.enabled) {
799             uint32_t *buffer = (uint32_t *)(p.buffer);
800             buffer += offset;
801    
802             for (j = 0;j < nevents; j += 4)
803             {
804                 // read the values
805                 tmp_values[0] = *buffer;
806                 buffer++;
807                 tmp_values[1] = *buffer;
808                 buffer++;
809                 tmp_values[2] = *buffer;
810                 buffer++;
811                 tmp_values[3] = *buffer;
812                 buffer++;
813
814                 // now do the SSE based conversion/labeling
815                 __m128i v_int = *((__m128i*)tmp_values);;
816
817                 // mask
818                 v_int = _mm_and_si128( v_int, mask );
819                 // label it
820                 v_int = _mm_or_si128( v_int, label );
821
822                 // do endian conversion (SSE is always little endian)
823                 // do first swap
824                 v_int = _mm_or_si128( _mm_slli_epi16( v_int, 8 ), _mm_srli_epi16( v_int, 8 ) );
825                 // do second swap
826                 v_int = _mm_or_si128( _mm_slli_epi32( v_int, 16 ), _mm_srli_epi32( v_int, 16 ) );
827
828                 // store the packed int
829                 _mm_store_si128 ((__m128i *)(&tmp_values), v_int);
830
831                 // increment the buffer pointers
832                 *target_event = tmp_values[0];
833                 target_event += m_dimension;
834                 *target_event = tmp_values[1];
835                 target_event += m_dimension;
836                 *target_event = tmp_values[2];
837                 target_event += m_dimension;
838                 *target_event = tmp_values[3];
839                 target_event += m_dimension;
840             }
841
842             // do the remainder of the events
843             for(;j < nevents; j += 1) {
844                 uint32_t in = (uint32_t)(*buffer);
845                 *target_event = htonl((quadlet_t)((in & 0x00FFFFFF) | 0x40000000));
846                 buffer++;
847                 target_event += m_dimension;
848             }
849
850         } else {
851             for (j = 0;j < nevents; j += 1)
852             {
853                 // hardcoded byte swapped
854                 *target_event = 0x00000040;
855                 target_event += m_dimension;
856             }
857         }
858     }
859 }
860
861 #else
862
863 /**
864  * @brief mux all audio ports to events
865  * @param data
866  * @param offset
867  * @param nevents
868  */
869 void
870 AmdtpTransmitStreamProcessor::encodeAudioPortsInt24(quadlet_t *data,
871                                                     unsigned int offset,
872                                                     unsigned int nevents)
873 {
874     unsigned int j;
875     quadlet_t *target_event;
876     unsigned int i;
877
878     for (i = 0; i < m_nb_audio_ports; i++) {
879         struct _MBLA_port_cache &p = m_audio_ports.at(i);
880         target_event = (quadlet_t *)(data + i);
881         assert(nevents + offset <= p.buffer_size );
882
883         if(p.buffer && p.enabled) {
884             quadlet_t *buffer = (quadlet_t *)(p.buffer);
885             buffer += offset;
886    
887             for (j = 0;j < nevents; j += 1)
888             {
889                 uint32_t in = (uint32_t)(*buffer);
890                 *target_event = htonl((quadlet_t)((in & 0x00FFFFFF) | 0x40000000));
891                 buffer++;
892                 target_event += m_dimension;
893             }
894         } else {
895             for (j = 0;j < nevents; j += 1)
896             {
897                 *target_event = 0x00000040;
898                 target_event += m_dimension;
899             }
900         }
901     }
902 }
903
904 /**
905  * @brief mux all audio ports to events
906  * @param data
907  * @param offset
908  * @param nevents
909  */
910 void
911 AmdtpTransmitStreamProcessor::encodeAudioPortsFloat(quadlet_t *data,
912                                                     unsigned int offset,
913                                                     unsigned int nevents)
914 {
915     unsigned int j;
916     quadlet_t *target_event;
917     unsigned int i;
918
919     for (i = 0; i < m_nb_audio_ports; i++) {
920         struct _MBLA_port_cache &p = m_audio_ports.at(i);
921         target_event = (quadlet_t *)(data + i);
922         assert(nevents + offset <= p.buffer_size );
923
924         if(p.buffer && p.enabled) {
925             quadlet_t *buffer = (quadlet_t *)(p.buffer);
926             buffer += offset;
927    
928             for (j = 0;j < nevents; j += 1)
929             {
930                 float *in = (float *)buffer;
931 #if AMDTP_CLIP_FLOATS
932                 if(*in > 1.0) *in=1.0;
933                 if(*in < -1.0) *in=-1.0;
934 #endif
935                 float v = (*in) * AMDTP_FLOAT_MULTIPLIER;
936                 unsigned int tmp = ((int) lrintf(v));
937
938                 tmp = ( tmp >> 8 ) | 0x40000000;
939                 *target_event = htonl((quadlet_t)tmp);
940                 buffer++;
941                 target_event += m_dimension;
942             }
943         } else {
944             for (j = 0;j < nevents; j += 1)
945             {
946                 // hardcoded little endian
947                 *target_event = 0x00000040;
948                 target_event += m_dimension;
949             }
950         }
951     }
952 }
953 #endif
954
955 /**
956  * @brief encodes all midi ports in the cache to events (silence)
957  * @param data
958  * @param offset
959  * @param nevents
960  */
961 void
962 AmdtpTransmitStreamProcessor::encodeMidiPortsSilence(quadlet_t *data,
963                                                      unsigned int offset,
964                                                      unsigned int nevents)
965 {
966     quadlet_t *target_event;
967     unsigned int i,j;
968
969     for (i = 0; i < m_nb_midi_ports; i++) {
970         struct _MIDI_port_cache &p = m_midi_ports.at(i);
971
972         for (j = p.location;j < nevents; j += 8) {
973             target_event = (quadlet_t *) (data + ((j * m_dimension) + p.position));
974             *target_event = htonl(IEC61883_AM824_SET_LABEL(0, IEC61883_AM824_LABEL_MIDI_NO_DATA));
975         }
976     }
977 }
978
979 /**
980  * @brief encodes all midi ports in the cache to events
981  * @param data
982  * @param offset
983  * @param nevents
984  */
985 void
986 AmdtpTransmitStreamProcessor::encodeMidiPorts(quadlet_t *data,
987                                               unsigned int offset,
988                                               unsigned int nevents)
989 {
990     quadlet_t *target_event;
991     unsigned int i,j;
992
993     for (i = 0; i < m_nb_midi_ports; i++) {
994         struct _MIDI_port_cache &p = m_midi_ports.at(i);
995         if (p.buffer && p.enabled) {
996             uint32_t *buffer = (quadlet_t *)(p.buffer);
997             buffer += offset;
998
999             for (j = p.location;j < nevents; j += 8) {
1000                 target_event = (quadlet_t *) (data + ((j * m_dimension) + p.position));
1001
1002                 if ( *buffer & 0xFF000000 )   // we can send a byte
1003                 {
1004                     quadlet_t tmpval;
1005                     tmpval = ((*buffer)<<16) & 0x00FF0000;
1006                     tmpval = IEC61883_AM824_SET_LABEL(tmpval, IEC61883_AM824_LABEL_MIDI_1X);
1007                     *target_event = htonl(tmpval);
1008
1009 //                     debugOutput ( DEBUG_LEVEL_VERBOSE, "MIDI port %s, pos=%u, loc=%u, nevents=%u, dim=%d\n",
1010 //                                p.port->getName().c_str(), p.position, p.location, nevents, m_dimension );
1011 //                     debugOutput ( DEBUG_LEVEL_VERBOSE, "base=%p, target=%p, value=%08X\n",
1012 //                                data, target_event, tmpval );
1013                 } else {
1014                     // can't send a byte, either because there is no byte,
1015                     // or because this would exceed the maximum rate
1016                     // FIXME: this can be ifdef optimized since it's a constant
1017                     *target_event = htonl(IEC61883_AM824_SET_LABEL(0, IEC61883_AM824_LABEL_MIDI_NO_DATA));
1018                 }
1019                 buffer+=8;
1020             }
1021         } else {
1022             for (j = p.location;j < nevents; j += 8) {
1023                 target_event = (quadlet_t *)(data + ((j * m_dimension) + p.position));
1024                 __builtin_prefetch(target_event, 1, 0); // prefetch events for write, no temporal locality
1025                 *target_event = htonl(IEC61883_AM824_SET_LABEL(0, IEC61883_AM824_LABEL_MIDI_NO_DATA));
1026             }
1027         }
1028     }
1029 }
1030
1031 bool
1032 AmdtpTransmitStreamProcessor::initPortCache() {
1033     // make use of the fact that audio ports are the first ports in
1034     // the cluster as per AMDTP. so we can sort the ports by position
1035     // and have very efficient lookups:
1036     // m_float_ports.at(i).buffer -> audio stream i buffer
1037     // for midi ports we simply cache all port info since they are (usually) not
1038     // that numerous
1039     m_nb_audio_ports = 0;
1040     m_audio_ports.clear();
1041    
1042     m_nb_midi_ports = 0;
1043     m_midi_ports.clear();
1044    
1045     for(PortVectorIterator it = m_Ports.begin();
1046         it != m_Ports.end();
1047         ++it )
1048     {
1049         AmdtpPortInfo *pinfo=dynamic_cast<AmdtpPortInfo *>(*it);
1050         assert(pinfo); // this should not fail!!
1051
1052         switch( pinfo->getFormat() )
1053         {
1054             case AmdtpPortInfo::E_MBLA:
1055                 m_nb_audio_ports++;
1056                 break;
1057             case AmdtpPortInfo::E_SPDIF: // still unimplemented
1058                 break;
1059             case AmdtpPortInfo::E_Midi:
1060                 m_nb_midi_ports++;
1061                 break;
1062             default: // ignore
1063                 break;
1064         }
1065     }
1066
1067     unsigned int idx;
1068     for (idx = 0; idx < m_nb_audio_ports; idx++) {
1069         for(PortVectorIterator it = m_Ports.begin();
1070             it != m_Ports.end();
1071             ++it )
1072         {
1073             AmdtpPortInfo *pinfo=dynamic_cast<AmdtpPortInfo *>(*it);
1074             debugOutput(DEBUG_LEVEL_VERY_VERBOSE,
1075                         "idx %u: looking at port %s at position %u\n",
1076                         idx, (*it)->getName().c_str(), pinfo->getPosition());
1077             if(pinfo->getPosition() == idx) {
1078                 struct _MBLA_port_cache p;
1079                 p.port = dynamic_cast<AmdtpAudioPort *>(*it);
1080                 if(p.port == NULL) {
1081                     debugError("Port is not an AmdtpAudioPort!\n");
1082                     return false;
1083                 }
1084                 p.buffer = NULL; // to be filled by updatePortCache
1085                 #ifdef DEBUG
1086                 p.buffer_size = (*it)->getBufferSize();
1087                 #endif
1088
1089                 m_audio_ports.push_back(p);
1090                 debugOutput(DEBUG_LEVEL_VERBOSE,
1091                             "Cached port %s at position %u\n",
1092                             p.port->getName().c_str(), idx);
1093                 goto next_index;
1094             }
1095         }
1096         debugError("No MBLA port found for position %d\n", idx);
1097         return false;
1098 next_index:
1099         continue;
1100     }
1101
1102     for(PortVectorIterator it = m_Ports.begin();
1103         it != m_Ports.end();
1104         ++it )
1105     {
1106         AmdtpPortInfo *pinfo=dynamic_cast<AmdtpPortInfo *>(*it);
1107         debugOutput(DEBUG_LEVEL_VERY_VERBOSE,
1108                     "idx %u: looking at port %s at position %u, location %u\n",
1109                     idx, (*it)->getName().c_str(), pinfo->getPosition(), pinfo->getLocation());
1110         if ((*it)->getPortType() == Port::E_Midi) {
1111             struct _MIDI_port_cache p;
1112             p.port = dynamic_cast<AmdtpMidiPort *>(*it);
1113             if(p.port == NULL) {
1114                 debugError("Port is not an AmdtpMidiPort!\n");
1115                 return false;
1116             }
1117             p.position = pinfo->getPosition();
1118             p.location = pinfo->getLocation();
1119             p.buffer = NULL; // to be filled by updatePortCache
1120             #ifdef DEBUG
1121             p.buffer_size = (*it)->getBufferSize();
1122             #endif
1123
1124             m_midi_ports.push_back(p);
1125             debugOutput(DEBUG_LEVEL_VERBOSE,
1126                         "Cached port %s at position %u, location %u\n",
1127                         p.port->getName().c_str(), p.position, p.location);
1128         }
1129     }
1130
1131     return true;
1132 }
1133
1134 void
1135 AmdtpTransmitStreamProcessor::updatePortCache() {
1136     unsigned int idx;
1137     for (idx = 0; idx < m_nb_audio_ports; idx++) {
1138         struct _MBLA_port_cache& p = m_audio_ports.at(idx);
1139         AmdtpAudioPort *port = p.port;
1140         p.buffer = port->getBufferAddress();
1141         p.enabled = !port->isDisabled();
1142     }
1143     for (idx = 0; idx < m_nb_midi_ports; idx++) {
1144         struct _MIDI_port_cache& p = m_midi_ports.at(idx);
1145         AmdtpMidiPort *port = p.port;
1146         p.buffer = port->getBufferAddress();
1147         p.enabled = !port->isDisabled();
1148     }
1149 }
1150
1151 } // end of namespace Streaming
Note: See TracBrowser for help on using the browser.