root/trunk/libffado/src/libstreaming/amdtp/AmdtpTransmitStreamProcessor.cpp

Revision 1763, 42.2 kB (checked in by ppalmers, 12 years ago)

Merged revisions 1536,1541,1544-1546,1549,1554-1562,1571,1579-1581,1618,1632,1634-1635,1661,1677-1679,1703-1704,1715,1720-1723,1743-1745,1755 via svnmerge from
svn+ssh://ffadosvn@ffado.org/ffado/branches/libffado-2.0

Also fix remaining format string warnings.

Line 
1 /*
2  * Copyright (C) 2005-2008 by Pieter Palmers
3  *
4  * This file is part of FFADO
5  * FFADO = Free Firewire (pro-)audio drivers for linux
6  *
7  * FFADO is based upon FreeBoB.
8  *
9  * This program is free software: you can redistribute it and/or modify
10  * it under the terms of the GNU General Public License as published by
11  * the Free Software Foundation, either version 2 of the License, or
12  * (at your option) version 3 of the License.
13  *
14  * This program is distributed in the hope that it will be useful,
15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17  * GNU General Public License for more details.
18  *
19  * You should have received a copy of the GNU General Public License
20  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
21  *
22  */
23
24 #include "config.h"
25
26 #include "AmdtpTransmitStreamProcessor.h"
27 #include "AmdtpPort.h"
28 #include "../StreamProcessorManager.h"
29 #include "devicemanager.h"
30
31 #include "libutil/Time.h"
32 #include "libutil/float_cast.h"
33
34 #include "libieee1394/ieee1394service.h"
35 #include "libieee1394/IsoHandlerManager.h"
36 #include "libieee1394/cycletimer.h"
37
38 #include "libutil/ByteSwap.h"
39 #include <assert.h>
40 #include <cstring>
41
42 #define likely(x)   __builtin_expect((x),1)
43 #define unlikely(x) __builtin_expect((x),0)
44
45 #define AMDTP_FLOAT_MULTIPLIER (1.0f * ((1<<23) - 1))
46 namespace Streaming
47 {
48
49 /* transmit */
50 AmdtpTransmitStreamProcessor::AmdtpTransmitStreamProcessor(FFADODevice &parent, int dimension)
51         : StreamProcessor(parent, ePT_Transmit)
52         , m_dimension( dimension )
53         , m_dbc( 0 )
54 #if AMDTP_ALLOW_PAYLOAD_IN_NODATA_XMIT
55         , m_send_nodata_payload ( AMDTP_SEND_PAYLOAD_IN_NODATA_XMIT_BY_DEFAULT )
56 #endif
57         , m_max_cycles_to_transmit_early ( AMDTP_MAX_CYCLES_TO_TRANSMIT_EARLY )
58         , m_transmit_transfer_delay ( AMDTP_TRANSMIT_TRANSFER_DELAY )
59         , m_min_cycles_before_presentation ( AMDTP_MIN_CYCLES_BEFORE_PRESENTATION )
60         , m_nb_audio_ports( 0 )
61         , m_nb_midi_ports( 0 )
62 {}
63
64 enum StreamProcessor::eChildReturnValue
65 AmdtpTransmitStreamProcessor::generatePacketHeader (
66     unsigned char *data, unsigned int *length,
67     unsigned char *tag, unsigned char *sy,
68     uint32_t pkt_ctr )
69 {
70     __builtin_prefetch(data, 1, 0); // prefetch events for write, no temporal locality
71     struct iec61883_packet *packet = (struct iec61883_packet *)data;
72     /* Our node ID can change after a bus reset, so it is best to fetch
73     * our node ID for each packet. */
74     packet->sid = m_local_node_id;
75
76     packet->dbs = m_dimension;
77     packet->fn = 0;
78     packet->qpc = 0;
79     packet->sph = 0;
80     packet->reserved = 0;
81     packet->dbc = m_dbc;
82     packet->eoh1 = 2;
83     packet->fmt = IEC61883_FMT_AMDTP;
84
85     *tag = IEC61883_TAG_WITH_CIP;
86     *sy = 0;
87
88     signed int fc;
89     uint64_t presentation_time;
90     unsigned int presentation_cycle;
91     int cycles_until_presentation;
92
93     uint64_t transmit_at_time;
94     unsigned int transmit_at_cycle;
95     int cycles_until_transmit;
96
97     debugOutputExtreme( DEBUG_LEVEL_ULTRA_VERBOSE,
98                         "Try for cycle %d\n", (int) CYCLE_TIMER_GET_CYCLES(pkt_ctr) );
99     // check whether the packet buffer has packets for us to send.
100     // the base timestamp is the one of the next sample in the buffer
101     ffado_timestamp_t ts_head_tmp;
102     m_data_buffer->getBufferHeadTimestamp( &ts_head_tmp, &fc ); // thread safe
103
104     // the timestamp gives us the time at which we want the sample block
105     // to be output by the device
106     presentation_time = ( uint64_t ) ts_head_tmp;
107
108     // now we calculate the time when we have to transmit the sample block
109     transmit_at_time = substractTicks( presentation_time, m_transmit_transfer_delay );
110
111     // calculate the cycle this block should be presented in
112     // (this is just a virtual calculation since at that time it should
113     //  already be in the device's buffer)
114     presentation_cycle = ( unsigned int ) ( TICKS_TO_CYCLES ( presentation_time ) );
115
116     // calculate the cycle this block should be transmitted in
117     transmit_at_cycle = ( unsigned int ) ( TICKS_TO_CYCLES ( transmit_at_time ) );
118
119     // we can check whether this cycle is within the 'window' we have
120     // to send this packet.
121     // first calculate the number of cycles left before presentation time
122     cycles_until_presentation = diffCycles ( presentation_cycle, CYCLE_TIMER_GET_CYCLES(pkt_ctr) );
123
124     // we can check whether this cycle is within the 'window' we have
125     // to send this packet.
126     // first calculate the number of cycles left before presentation time
127     cycles_until_transmit = diffCycles ( transmit_at_cycle, CYCLE_TIMER_GET_CYCLES(pkt_ctr) );
128
129     // two different options:
130     // 1) there are not enough frames for one packet
131     //      => determine wether this is a problem, since we might still
132     //         have some time to send it
133     // 2) there are enough packets
134     //      => determine whether we have to send them in this packet
135     if ( fc < ( signed int ) m_syt_interval )
136     {
137         // not enough frames in the buffer,
138
139         // we can still postpone the queueing of the packets
140         // if we are far enough ahead of the presentation time
141         if ( cycles_until_presentation <= m_min_cycles_before_presentation )
142         {
143             debugOutput( DEBUG_LEVEL_NORMAL,
144                          "Insufficient frames (P): N=%02d, CY=%04d, TC=%04u, CUT=%04d\n",
145                          fc, (int)CYCLE_TIMER_GET_CYCLES(pkt_ctr),
146                          transmit_at_cycle, cycles_until_transmit );
147             // we are too late
148             return eCRV_XRun;
149         }
150         else
151         {
152             #if DEBUG_EXTREME
153             unsigned int now_cycle = ( unsigned int ) ( TICKS_TO_CYCLES ( m_1394service.getCycleTimerTicks() ) );
154
155             debugOutputExtreme(DEBUG_LEVEL_VERBOSE,
156                                "Insufficient frames (NP): N=%02d, CY=%04d, TC=%04u, CUT=%04d, NOW=%04d\n",
157                                fc, (int)CYCLE_TIMER_GET_CYCLES(pkt_ctr),
158                                transmit_at_cycle, cycles_until_transmit, now_cycle );
159             #endif
160
161             // there is still time left to send the packet
162             // we want the system to give this packet another go at a later time instant
163             return eCRV_Again; // note that the raw1394 again system doesn't work as expected
164
165             // we could wait here for a certain time before trying again. However, this
166             // is not going to work since we then block the iterator thread, hence also
167             // the receiving code, meaning that we are not processing received packets,
168             // and hence there is no progression in the number of frames available.
169
170             // for example:
171             // SleepRelativeUsec(125); // one cycle
172             // goto try_block_of_frames;
173
174             // or more advanced, calculate how many cycles we are ahead of 'now' and
175             // base the sleep on that.
176
177             // note that this requires that there is one thread for each IsoHandler,
178             // otherwise we're in the deadlock described above.
179         }
180     }
181     else
182     {
183         // there are enough frames, so check the time they are intended for
184         // all frames have a certain 'time window' in which they can be sent
185         // this corresponds to the range of the timestamp mechanism:
186         // we can send a packet 15 cycles in advance of the 'presentation time'
187         // in theory we can send the packet up till one cycle before the presentation time,
188         // however this is not very smart.
189
190         // There are 3 options:
191         // 1) the frame block is too early
192         //      => send an empty packet
193         // 2) the frame block is within the window
194         //      => send it
195         // 3) the frame block is too late
196         //      => discard (and raise xrun?)
197         //         get next block of frames and repeat
198
199         if(cycles_until_transmit < 0)
200         {
201             // we are too late
202             debugOutput(DEBUG_LEVEL_VERBOSE,
203                         "Too late: CY=%04d, TC=%04u, CUT=%04d, TSP=%011"PRIu64" (%04u)\n",
204                         (int)CYCLE_TIMER_GET_CYCLES(pkt_ctr),
205                         transmit_at_cycle, cycles_until_transmit,
206                         presentation_time, (unsigned int)TICKS_TO_CYCLES(presentation_time) );
207             //debugShowBackLogLines(200);
208             // however, if we can send this sufficiently before the presentation
209             // time, it could be harmless.
210             // NOTE: dangerous since the device has no way of reporting that it didn't get
211             //       this packet on time.
212             if(cycles_until_presentation >= m_min_cycles_before_presentation)
213             {
214                 // we are not that late and can still try to transmit the packet
215                 m_dbc += fillDataPacketHeader(packet, length, presentation_time);
216                 m_last_timestamp = presentation_time;
217                 return (fc < (signed)(2*m_syt_interval) ? eCRV_Defer : eCRV_Packet);
218             }
219             else   // definitely too late
220             {
221                 return eCRV_XRun;
222             }
223         }
224         else if(cycles_until_transmit <= m_max_cycles_to_transmit_early)
225         {
226             // it's time send the packet
227             m_dbc += fillDataPacketHeader(packet, length, presentation_time);
228             m_last_timestamp = presentation_time;
229
230             // for timestamp tracing
231             debugOutputExtreme(DEBUG_LEVEL_VERY_VERBOSE,
232                                "XMIT PKT: TSP= %011"PRIu64" (%04u) (%04u) (%04u)\n",
233                                presentation_time,
234                                (unsigned int)CYCLE_TIMER_GET_CYCLES(pkt_ctr),
235                                presentation_cycle, transmit_at_cycle);
236
237             return (fc < (signed)(m_syt_interval) ? eCRV_Defer : eCRV_Packet);
238         }
239         else
240         {
241             debugOutputExtreme(DEBUG_LEVEL_VERY_VERBOSE,
242                                "Too early: CY=%04u, TC=%04u, CUT=%04d, TST=%011"PRIu64" (%04u), TSP=%011"PRId64" (%04u)\n",
243                                (int)CYCLE_TIMER_GET_CYCLES(pkt_ctr),
244                                transmit_at_cycle, cycles_until_transmit,
245                                transmit_at_time, (unsigned int)TICKS_TO_CYCLES(transmit_at_time),
246                                presentation_time, (unsigned int)TICKS_TO_CYCLES(presentation_time));
247 #ifdef DEBUG
248             if ( cycles_until_transmit > m_max_cycles_to_transmit_early + 1 )
249             {
250                 debugOutputExtreme(DEBUG_LEVEL_VERY_VERBOSE,
251                                    "Way too early: CY=%04u, TC=%04u, CUT=%04d, TST=%011"PRIu64" (%04u), TSP=%011"PRId64"(%04u)\n",
252                                    (int)CYCLE_TIMER_GET_CYCLES(pkt_ctr),
253                                    transmit_at_cycle, cycles_until_transmit,
254                                    transmit_at_time, (unsigned int)TICKS_TO_CYCLES(transmit_at_time),
255                                    presentation_time, (unsigned int)TICKS_TO_CYCLES(presentation_time));
256             }
257 #endif
258             // we are too early, send only an empty packet
259             return eCRV_EmptyPacket;
260         }
261     }
262     return eCRV_Invalid;
263 }
264
265 enum StreamProcessor::eChildReturnValue
266 AmdtpTransmitStreamProcessor::generatePacketData (
267     unsigned char *data, unsigned int *length )
268 {
269     if (m_data_buffer->readFrames(m_syt_interval, (char *)(data + 8)))
270     {
271         debugOutputExtreme(DEBUG_LEVEL_VERBOSE,
272                            "XMIT DATA: TSP= %011"PRIu64" (%04u)\n",
273                            m_last_timestamp,
274                            (unsigned int)TICKS_TO_CYCLES(m_last_timestamp));
275         #if 0
276         // debug code to output the packet content
277         char tmpbuff[8192];
278         int cnt=0;
279         quadlet_t *tmp = (quadlet_t *)((char *)(data + 8));
280
281         for(int i=0; i<m_syt_interval; i++) {
282             cnt += snprintf(tmpbuff + cnt, 8192-cnt, "[%02d] ", i);
283             for(int j=0; j<m_dimension; j++) {
284                 cnt += snprintf(tmpbuff + cnt, 8192-cnt, "%08X ", *tmp);
285                 tmp++;
286             }
287             cnt += snprintf(tmpbuff + cnt, 8192-cnt, "\n");
288         }
289         debugOutput(DEBUG_LEVEL_VERBOSE, "\n%s\n", tmpbuff);
290         #endif
291         return eCRV_OK;
292     }
293     else return eCRV_XRun;
294 }
295
296 enum StreamProcessor::eChildReturnValue
297 AmdtpTransmitStreamProcessor::generateSilentPacketHeader (
298     unsigned char *data, unsigned int *length,
299     unsigned char *tag, unsigned char *sy,
300     uint32_t pkt_ctr )
301 {
302     struct iec61883_packet *packet = ( struct iec61883_packet * ) data;
303     debugOutputExtreme(DEBUG_LEVEL_ULTRA_VERBOSE,
304                        "XMIT SILENT (cy %04d): TSP=%011"PRIu64" (%04u)\n",
305                        (int)CYCLE_TIMER_GET_CYCLES(pkt_ctr), m_last_timestamp,
306                        (unsigned int)TICKS_TO_CYCLES(m_last_timestamp));
307
308     packet->sid = m_local_node_id;
309
310     packet->dbs = m_dimension;
311     packet->fn = 0;
312     packet->qpc = 0;
313     packet->sph = 0;
314     packet->reserved = 0;
315     packet->dbc = m_dbc;
316     packet->eoh1 = 2;
317     packet->fmt = IEC61883_FMT_AMDTP;
318
319     *tag = IEC61883_TAG_WITH_CIP;
320     *sy = 0;
321
322     m_dbc += fillNoDataPacketHeader(packet, length);
323     return eCRV_Packet;
324 }
325
326 enum StreamProcessor::eChildReturnValue
327 AmdtpTransmitStreamProcessor::generateSilentPacketData (
328     unsigned char *data, unsigned int *length )
329 {
330     return eCRV_OK; // no need to do anything
331 }
332
333 enum StreamProcessor::eChildReturnValue
334 AmdtpTransmitStreamProcessor::generateEmptyPacketHeader (
335     unsigned char *data, unsigned int *length,
336     unsigned char *tag, unsigned char *sy,
337     uint32_t pkt_ctr )
338 {
339     struct iec61883_packet *packet = ( struct iec61883_packet * ) data;
340     debugOutputExtreme(DEBUG_LEVEL_ULTRA_VERBOSE,
341                        "XMIT EMPTY (cy %04d): TSP=%011"PRIu64" (%04u)\n",
342                        (int)CYCLE_TIMER_GET_CYCLES(pkt_ctr), m_last_timestamp,
343                        (unsigned int)TICKS_TO_CYCLES(m_last_timestamp) );
344     packet->sid = m_local_node_id;
345
346     packet->dbs = m_dimension;
347     packet->fn = 0;
348     packet->qpc = 0;
349     packet->sph = 0;
350     packet->reserved = 0;
351     packet->dbc = m_dbc;
352     packet->eoh1 = 2;
353     packet->fmt = IEC61883_FMT_AMDTP;
354
355     *tag = IEC61883_TAG_WITH_CIP;
356     *sy = 0;
357
358     m_dbc += fillNoDataPacketHeader(packet, length);
359     return eCRV_OK;
360 }
361
362 enum StreamProcessor::eChildReturnValue
363 AmdtpTransmitStreamProcessor::generateEmptyPacketData (
364     unsigned char *data, unsigned int *length )
365 {
366     return eCRV_OK; // no need to do anything
367 }
368
369 unsigned int AmdtpTransmitStreamProcessor::fillDataPacketHeader (
370     struct iec61883_packet *packet, unsigned int* length,
371     uint32_t ts )
372 {
373
374     packet->fdf = m_fdf;
375
376     // convert the timestamp to SYT format
377     uint16_t timestamp_SYT = TICKS_TO_SYT ( ts );
378     packet->syt = CondSwapToBus16 ( timestamp_SYT );
379
380     // FIXME: use a precomputed value here
381     *length = m_syt_interval*sizeof ( quadlet_t ) *m_dimension + 8;
382
383     return m_syt_interval;
384 }
385
386 unsigned int AmdtpTransmitStreamProcessor::fillNoDataPacketHeader (
387     struct iec61883_packet *packet, unsigned int* length )
388 {
389     // no-data packets have syt=0xFFFF
390     // and (can) have the usual amount of events as dummy data
391     // DBC is not increased
392     packet->fdf = IEC61883_FDF_NODATA;
393     packet->syt = 0xffff;
394
395 #if AMDTP_ALLOW_PAYLOAD_IN_NODATA_XMIT
396     if ( m_send_nodata_payload )
397     { // no-data packets with payload (NOTE: DICE-II doesn't like that)
398         *length = 2*sizeof ( quadlet_t ) + m_syt_interval * m_dimension * sizeof ( quadlet_t );
399         return m_syt_interval;
400     } else { // no-data packets without payload
401         *length = 2*sizeof ( quadlet_t );
402         return 0;
403     }
404 #else
405     // no-data packets without payload
406     *length = 2*sizeof ( quadlet_t );
407     return 0;
408 #endif
409 }
410
411 unsigned int
412 AmdtpTransmitStreamProcessor::getSytInterval() {
413     switch (m_StreamProcessorManager.getNominalRate()) {
414         case 32000:
415         case 44100:
416         case 48000:
417             return 8;
418         case 88200:
419         case 96000:
420             return 16;
421         case 176400:
422         case 192000:
423             return 32;
424         default:
425             debugError("Unsupported rate: %d\n", m_StreamProcessorManager.getNominalRate());
426             return 0;
427     }
428 }
429
430 unsigned int
431 AmdtpTransmitStreamProcessor::getFDF() {
432     switch (m_StreamProcessorManager.getNominalRate()) {
433         case 32000: return IEC61883_FDF_SFC_32KHZ;
434         case 44100: return IEC61883_FDF_SFC_44K1HZ;
435         case 48000: return IEC61883_FDF_SFC_48KHZ;
436         case 88200: return IEC61883_FDF_SFC_88K2HZ;
437         case 96000: return IEC61883_FDF_SFC_96KHZ;
438         case 176400: return IEC61883_FDF_SFC_176K4HZ;
439         case 192000: return IEC61883_FDF_SFC_192KHZ;
440         default:
441             debugError("Unsupported rate: %d\n", m_StreamProcessorManager.getNominalRate());
442             return 0;
443     }
444 }
445
446 bool AmdtpTransmitStreamProcessor::prepareChild()
447 {
448     debugOutput ( DEBUG_LEVEL_VERBOSE, "Preparing (%p)...\n", this );
449     m_syt_interval = getSytInterval();
450     m_fdf = getFDF();
451
452     debugOutput ( DEBUG_LEVEL_VERBOSE, " SYT interval / FDF             : %d / %d\n", m_syt_interval, m_fdf );
453 #if AMDTP_ALLOW_PAYLOAD_IN_NODATA_XMIT
454     debugOutput ( DEBUG_LEVEL_VERBOSE, " Send payload in No-Data packets: %s \n", m_send_nodata_payload?"Yes":"No" );
455 #endif
456     debugOutput ( DEBUG_LEVEL_VERBOSE, " Max early transmit cycles      : %d\n", m_max_cycles_to_transmit_early );
457     debugOutput ( DEBUG_LEVEL_VERBOSE, " Transfer delay                 : %d\n", m_transmit_transfer_delay );
458     debugOutput ( DEBUG_LEVEL_VERBOSE, " Min cycles before presentation : %d\n", m_min_cycles_before_presentation );
459
460     iec61883_cip_init (
461         &m_cip_status,
462         IEC61883_FMT_AMDTP,
463         m_fdf,
464         m_StreamProcessorManager.getNominalRate(),
465         m_dimension,
466         m_syt_interval );
467
468     if (!initPortCache()) {
469         debugError("Could not init port cache\n");
470         return false;
471     }
472
473     return true;
474 }
475
476 /*
477 * compose the event streams for the packets from the port buffers
478 */
479 bool AmdtpTransmitStreamProcessor::processWriteBlock ( char *data,
480         unsigned int nevents, unsigned int offset )
481 {
482     // update the variable parts of the cache
483     updatePortCache();
484
485     // encode audio data
486     switch(m_StreamProcessorManager.getAudioDataType()) {
487         case StreamProcessorManager::eADT_Int24:
488             encodeAudioPortsInt24((quadlet_t *)data, offset, nevents);
489             break;
490         case StreamProcessorManager::eADT_Float:
491             encodeAudioPortsFloat((quadlet_t *)data, offset, nevents);
492             break;
493     }
494
495     // do midi ports
496     encodeMidiPorts((quadlet_t *)data, offset, nevents);
497     return true;
498 }
499
500 bool
501 AmdtpTransmitStreamProcessor::transmitSilenceBlock(
502     char *data, unsigned int nevents, unsigned int offset)
503 {
504     // no need to update the port cache when transmitting silence since
505     // no dynamic values are used to do so.
506     encodeAudioPortsSilence((quadlet_t *)data, offset, nevents);
507     encodeMidiPortsSilence((quadlet_t *)data, offset, nevents);
508     return true;
509 }
510
511 /**
512  * @brief encodes all audio ports in the cache to events (silent data)
513  * @param data
514  * @param offset
515  * @param nevents
516  */
517 void
518 AmdtpTransmitStreamProcessor::encodeAudioPortsSilence(quadlet_t *data,
519                                                       unsigned int offset,
520                                                       unsigned int nevents)
521 {
522     unsigned int j;
523     quadlet_t *target_event;
524     int i;
525
526     for (i = 0; i < m_nb_audio_ports; i++) {
527         target_event = (quadlet_t *)(data + i);
528
529         for (j = 0;j < nevents; j += 1)
530         {
531             *target_event = CONDSWAPTOBUS32_CONST(0x40000000);
532             target_event += m_dimension;
533         }
534     }
535 }
536
537 #ifdef __SSE2__
538 #include <emmintrin.h>
539 #warning SSE2 build
540
541 /**
542  * @brief mux all audio ports to events
543  * @param data
544  * @param offset
545  * @param nevents
546  */
547 void
548 AmdtpTransmitStreamProcessor::encodeAudioPortsFloat(quadlet_t *data,
549                                                     unsigned int offset,
550                                                     unsigned int nevents)
551 {
552     unsigned int j;
553     quadlet_t *target_event;
554     int i;
555
556     float * client_buffers[4];
557     float tmp_values[4] __attribute__ ((aligned (16)));
558     uint32_t tmp_values_int[4] __attribute__ ((aligned (16)));
559
560     // prepare the scratch buffer
561     assert(m_scratch_buffer_size_bytes > nevents * 4);
562     memset(m_scratch_buffer, 0, nevents * 4);
563
564     const __m128i label = _mm_set_epi32 (0x40000000, 0x40000000, 0x40000000, 0x40000000);
565     const __m128i mask = _mm_set_epi32 (0x00FFFFFF, 0x00FFFFFF, 0x00FFFFFF, 0x00FFFFFF);
566     const __m128 mult = _mm_set_ps(AMDTP_FLOAT_MULTIPLIER, AMDTP_FLOAT_MULTIPLIER, AMDTP_FLOAT_MULTIPLIER, AMDTP_FLOAT_MULTIPLIER);
567
568 #if AMDTP_CLIP_FLOATS
569     const __m128 v_max = _mm_set_ps(1.0, 1.0, 1.0, 1.0);
570     const __m128 v_min = _mm_set_ps(-1.0, -1.0, -1.0, -1.0);
571 #endif
572
573     // this assumes that audio ports are sorted by position,
574     // and that there are no gaps
575     for (i = 0; i < ((int)m_nb_audio_ports)-4; i += 4) {
576         struct _MBLA_port_cache *p;
577
578         // get the port buffers
579         for (j=0; j<4; j++) {
580             p = &(m_audio_ports.at(i+j));
581             if(likely(p->buffer && p->enabled)) {
582                 client_buffers[j] = (float *) p->buffer;
583                 client_buffers[j] += offset;
584             } else {
585                 // if a port is disabled or has no valid
586                 // buffer, use the scratch buffer (all zero's)
587                 client_buffers[j] = (float *) m_scratch_buffer;
588             }
589         }
590
591         // the base event for this position
592         target_event = (quadlet_t *)(data + i);
593         // process the events
594         for (j=0;j < nevents; j += 1)
595         {
596             // read the values
597             tmp_values[0] = *(client_buffers[0]);
598             tmp_values[1] = *(client_buffers[1]);
599             tmp_values[2] = *(client_buffers[2]);
600             tmp_values[3] = *(client_buffers[3]);
601
602             // now do the SSE based conversion/labeling
603             __m128 v_float = *((__m128*)tmp_values);
604             __m128i *target = (__m128i*)target_event;
605             __m128i v_int;
606
607             // clip
608 #if AMDTP_CLIP_FLOATS
609             // do SSE clipping
610             v_float = _mm_max_ps(v_float, v_min);
611             v_float = _mm_min_ps(v_float, v_max);
612 #endif
613
614             // multiply
615             v_float = _mm_mul_ps(v_float, mult);
616             // convert to signed integer
617             v_int = _mm_cvttps_epi32( v_float );
618             // mask
619             v_int = _mm_and_si128( v_int, mask );
620             // label it
621             v_int = _mm_or_si128( v_int, label );
622
623             // do endian conversion (SSE is always little endian)
624             // do first swap
625             v_int = _mm_or_si128( _mm_slli_epi16( v_int, 8 ), _mm_srli_epi16( v_int, 8 ) );
626             // do second swap
627             v_int = _mm_or_si128( _mm_slli_epi32( v_int, 16 ), _mm_srli_epi32( v_int, 16 ) );
628             // store the packed int
629             // (target misalignment is assumed since we don't know the m_dimension)
630             _mm_storeu_si128 (target, v_int);
631
632             // increment the buffer pointers
633             client_buffers[0]++;
634             client_buffers[1]++;
635             client_buffers[2]++;
636             client_buffers[3]++;
637
638             // go to next target event position
639             target_event += m_dimension;
640         }
641     }
642
643     // do remaining ports
644     // NOTE: these can be time-SSE'd
645     for (; i < (int)m_nb_audio_ports; i++) {
646         struct _MBLA_port_cache &p = m_audio_ports.at(i);
647         target_event = (quadlet_t *)(data + i);
648         assert(nevents + offset <= p.buffer_size );
649
650         if(likely(p.buffer && p.enabled)) {
651             float *buffer = (float *)(p.buffer);
652             buffer += offset;
653    
654             for (j = 0;j < nevents; j += 4)
655             {
656                 // read the values
657                 tmp_values[0] = *buffer;
658                 buffer++;
659                 tmp_values[1] = *buffer;
660                 buffer++;
661                 tmp_values[2] = *buffer;
662                 buffer++;
663                 tmp_values[3] = *buffer;
664                 buffer++;
665
666                 // now do the SSE based conversion/labeling
667                 __m128 v_float = *((__m128*)tmp_values);
668                 __m128i v_int;
669
670 #if AMDTP_CLIP_FLOATS
671                 // do SSE clipping
672                 v_float = _mm_max_ps(v_float, v_min);
673                 v_float = _mm_min_ps(v_float, v_max);
674 #endif
675                 // multiply
676                 v_float = _mm_mul_ps(v_float, mult);
677                 // convert to signed integer
678                 v_int = _mm_cvttps_epi32( v_float );
679                 // mask
680                 v_int = _mm_and_si128( v_int, mask );
681                 // label it
682                 v_int = _mm_or_si128( v_int, label );
683    
684                 // do endian conversion (SSE is always little endian)
685                 // do first swap
686                 v_int = _mm_or_si128( _mm_slli_epi16( v_int, 8 ), _mm_srli_epi16( v_int, 8 ) );
687                 // do second swap
688                 v_int = _mm_or_si128( _mm_slli_epi32( v_int, 16 ), _mm_srli_epi32( v_int, 16 ) );
689
690                 // store the packed int
691                 _mm_store_si128 ((__m128i *)(&tmp_values_int), v_int);
692
693                 // increment the buffer pointers
694                 *target_event = tmp_values_int[0];
695                 target_event += m_dimension;
696                 *target_event = tmp_values_int[1];
697                 target_event += m_dimension;
698                 *target_event = tmp_values_int[2];
699                 target_event += m_dimension;
700                 *target_event = tmp_values_int[3];
701                 target_event += m_dimension;
702             }
703
704             // do the remainder of the events
705             for(;j < nevents; j += 1) {
706                 float *in = (float *)buffer;
707 #if AMDTP_CLIP_FLOATS
708                 // clip directly to the value of a maxed event
709                 if(unlikely(*in > 1.0)) {
710                     *target_event = CONDSWAPTOBUS32_CONST(0x407FFFFF);
711                 } else if(unlikely(*in < -1.0)) {
712                     *target_event = CONDSWAPTOBUS32_CONST(0x40800001);
713                 } else {
714                     float v = (*in) * AMDTP_FLOAT_MULTIPLIER;
715                     unsigned int tmp = ((int) v);
716                     tmp = ( tmp & 0x00FFFFFF ) | 0x40000000;
717                     *target_event = CondSwapToBus32((quadlet_t)tmp);
718                 }
719 #else
720                 float v = (*in) * AMDTP_FLOAT_MULTIPLIER;
721                 unsigned int tmp = ((int) v);
722                 tmp = ( tmp & 0x00FFFFFF ) | 0x40000000;
723                 *target_event = CondSwapToBus32((quadlet_t)tmp);
724 #endif
725                 buffer++;
726                 target_event += m_dimension;
727             }
728
729         } else {
730             for (j = 0;j < nevents; j += 1)
731             {
732                 // hardcoded byte swapped
733                 *target_event = 0x00000040;
734                 target_event += m_dimension;
735             }
736         }
737     }
738 }
739
740
741 /**
742  * @brief mux all audio ports to events
743  * @param data
744  * @param offset
745  * @param nevents
746  */
747 void
748 AmdtpTransmitStreamProcessor::encodeAudioPortsInt24(quadlet_t *data,
749                                                     unsigned int offset,
750                                                     unsigned int nevents)
751 {
752     unsigned int j;
753     quadlet_t *target_event;
754     int i;
755
756     uint32_t *client_buffers[4];
757     uint32_t tmp_values[4] __attribute__ ((aligned (16)));
758
759     // prepare the scratch buffer
760     assert(m_scratch_buffer_size_bytes > nevents * 4);
761     memset(m_scratch_buffer, 0, nevents * 4);
762
763     const __m128i label = _mm_set_epi32 (0x40000000, 0x40000000, 0x40000000, 0x40000000);
764     const __m128i mask  = _mm_set_epi32 (0x00FFFFFF, 0x00FFFFFF, 0x00FFFFFF, 0x00FFFFFF);
765
766     // this assumes that audio ports are sorted by position,
767     // and that there are no gaps
768     for (i = 0; i < ((int)m_nb_audio_ports)-4; i += 4) {
769         struct _MBLA_port_cache *p;
770
771         // get the port buffers
772         for (j=0; j<4; j++) {
773             p = &(m_audio_ports.at(i+j));
774             if(likely(p->buffer && p->enabled)) {
775                 client_buffers[j] = (uint32_t *) p->buffer;
776                 client_buffers[j] += offset;
777             } else {
778                 // if a port is disabled or has no valid
779                 // buffer, use the scratch buffer (all zero's)
780                 client_buffers[j] = (uint32_t *) m_scratch_buffer;
781             }
782         }
783
784         // the base event for this position
785         target_event = (quadlet_t *)(data + i);
786
787         // process the events
788         for (j=0;j < nevents; j += 1)
789         {
790             // read the values
791             tmp_values[0] = *(client_buffers[0]);
792             tmp_values[1] = *(client_buffers[1]);
793             tmp_values[2] = *(client_buffers[2]);
794             tmp_values[3] = *(client_buffers[3]);
795
796             // now do the SSE based conversion/labeling
797             __m128i *target = (__m128i*)target_event;
798             __m128i v_int = *((__m128i*)tmp_values);;
799
800             // mask
801             v_int = _mm_and_si128( v_int, mask );
802             // label it
803             v_int = _mm_or_si128( v_int, label );
804
805             // do endian conversion (SSE is always little endian)
806             // do first swap
807             v_int = _mm_or_si128( _mm_slli_epi16( v_int, 8 ), _mm_srli_epi16( v_int, 8 ) );
808             // do second swap
809             v_int = _mm_or_si128( _mm_slli_epi32( v_int, 16 ), _mm_srli_epi32( v_int, 16 ) );
810
811             // store the packed int
812             // (target misalignment is assumed since we don't know the m_dimension)
813             _mm_storeu_si128 (target, v_int);
814
815             // increment the buffer pointers
816             client_buffers[0]++;
817             client_buffers[1]++;
818             client_buffers[2]++;
819             client_buffers[3]++;
820
821             // go to next target event position
822             target_event += m_dimension;
823         }
824     }
825
826     // do remaining ports
827     // NOTE: these can be time-SSE'd
828     for (; i < ((int)m_nb_audio_ports); i++) {
829         struct _MBLA_port_cache &p = m_audio_ports.at(i);
830         target_event = (quadlet_t *)(data + i);
831         assert(nevents + offset <= p.buffer_size );
832
833         if(likely(p.buffer && p.enabled)) {
834             uint32_t *buffer = (uint32_t *)(p.buffer);
835             buffer += offset;
836    
837             for (j = 0;j < nevents; j += 4)
838             {
839                 // read the values
840                 tmp_values[0] = *buffer;
841                 buffer++;
842                 tmp_values[1] = *buffer;
843                 buffer++;
844                 tmp_values[2] = *buffer;
845                 buffer++;
846                 tmp_values[3] = *buffer;
847                 buffer++;
848
849                 // now do the SSE based conversion/labeling
850                 __m128i v_int = *((__m128i*)tmp_values);;
851
852                 // mask
853                 v_int = _mm_and_si128( v_int, mask );
854                 // label it
855                 v_int = _mm_or_si128( v_int, label );
856
857                 // do endian conversion (SSE is always little endian)
858                 // do first swap
859                 v_int = _mm_or_si128( _mm_slli_epi16( v_int, 8 ), _mm_srli_epi16( v_int, 8 ) );
860                 // do second swap
861                 v_int = _mm_or_si128( _mm_slli_epi32( v_int, 16 ), _mm_srli_epi32( v_int, 16 ) );
862
863                 // store the packed int
864                 _mm_store_si128 ((__m128i *)(&tmp_values), v_int);
865
866                 // increment the buffer pointers
867                 *target_event = tmp_values[0];
868                 target_event += m_dimension;
869                 *target_event = tmp_values[1];
870                 target_event += m_dimension;
871                 *target_event = tmp_values[2];
872                 target_event += m_dimension;
873                 *target_event = tmp_values[3];
874                 target_event += m_dimension;
875             }
876
877             // do the remainder of the events
878             for(;j < nevents; j += 1) {
879                 uint32_t in = (uint32_t)(*buffer);
880                 *target_event = CondSwapToBus32((quadlet_t)((in & 0x00FFFFFF) | 0x40000000));
881                 buffer++;
882                 target_event += m_dimension;
883             }
884
885         } else {
886             for (j = 0;j < nevents; j += 1)
887             {
888                 // hardcoded byte swapped
889                 *target_event = 0x00000040;
890                 target_event += m_dimension;
891             }
892         }
893     }
894 }
895
896 #else
897
898 /**
899  * @brief mux all audio ports to events
900  * @param data
901  * @param offset
902  * @param nevents
903  */
904 void
905 AmdtpTransmitStreamProcessor::encodeAudioPortsInt24(quadlet_t *data,
906                                                     unsigned int offset,
907                                                     unsigned int nevents)
908 {
909     unsigned int j;
910     quadlet_t *target_event;
911     int i;
912
913     for (i = 0; i < m_nb_audio_ports; i++) {
914         struct _MBLA_port_cache &p = m_audio_ports.at(i);
915         target_event = (quadlet_t *)(data + i);
916         assert(nevents + offset <= p.buffer_size );
917
918         if(likely(p.buffer && p.enabled)) {
919             quadlet_t *buffer = (quadlet_t *)(p.buffer);
920             buffer += offset;
921    
922             for (j = 0;j < nevents; j += 1)
923             {
924                 uint32_t in = (uint32_t)(*buffer);
925                 *target_event = CondSwapToBus32((quadlet_t)((in & 0x00FFFFFF) | 0x40000000));
926                 buffer++;
927                 target_event += m_dimension;
928             }
929         } else {
930             for (j = 0;j < nevents; j += 1)
931             {
932                 *target_event = CONDSWAPTOBUS32_CONST(0x40000000);
933                 target_event += m_dimension;
934             }
935         }
936     }
937 }
938
939 /**
940  * @brief mux all audio ports to events
941  * @param data
942  * @param offset
943  * @param nevents
944  */
945 void
946 AmdtpTransmitStreamProcessor::encodeAudioPortsFloat(quadlet_t *data,
947                                                     unsigned int offset,
948                                                     unsigned int nevents)
949 {
950     unsigned int j;
951     quadlet_t *target_event;
952     int i;
953
954     for (i = 0; i < m_nb_audio_ports; i++) {
955         struct _MBLA_port_cache &p = m_audio_ports.at(i);
956         target_event = (quadlet_t *)(data + i);
957         assert(nevents + offset <= p.buffer_size );
958
959         if(likely(p.buffer && p.enabled)) {
960             quadlet_t *buffer = (quadlet_t *)(p.buffer);
961             buffer += offset;
962    
963             for (j = 0;j < nevents; j += 1)
964             {
965                 float *in = (float *)buffer;
966 #if AMDTP_CLIP_FLOATS
967                 // clip directly to the value of a maxed event
968                 if(unlikely(*in > 1.0)) {
969                     *target_event = CONDSWAPTOBUS32_CONST(0x407FFFFF);
970                 } else if(unlikely(*in < -1.0)) {
971                     *target_event = CONDSWAPTOBUS32_CONST(0x40800001);
972                 } else {
973                     float v = (*in) * AMDTP_FLOAT_MULTIPLIER;
974                     unsigned int tmp = ((int) v);
975                     tmp = ( tmp & 0x00FFFFFF ) | 0x40000000;
976                     *target_event = CondSwapToBus32((quadlet_t)tmp);
977                 }
978 #else
979                 float v = (*in) * AMDTP_FLOAT_MULTIPLIER;
980                 unsigned int tmp = ((int) v);
981                 tmp = ( tmp & 0x00FFFFFF ) | 0x40000000;
982                 *target_event = CondSwapToBus32((quadlet_t)tmp);
983 #endif
984                 buffer++;
985                 target_event += m_dimension;
986             }
987         } else {
988             for (j = 0;j < nevents; j += 1)
989             {
990                 *target_event = CONDSWAPTOBUS32_CONST(0x40000000);
991                 target_event += m_dimension;
992             }
993         }
994     }
995 }
996 #endif
997
998 /**
999  * @brief encodes all midi ports in the cache to events (silence)
1000  * @param data
1001  * @param offset
1002  * @param nevents
1003  */
1004 void
1005 AmdtpTransmitStreamProcessor::encodeMidiPortsSilence(quadlet_t *data,
1006                                                      unsigned int offset,
1007                                                      unsigned int nevents)
1008 {
1009     quadlet_t *target_event;
1010     int i;
1011     unsigned int j;
1012
1013     for (i = 0; i < m_nb_midi_ports; i++) {
1014         struct _MIDI_port_cache &p = m_midi_ports.at(i);
1015
1016         for (j = p.location;j < nevents; j += 8) {
1017             target_event = (quadlet_t *) (data + ((j * m_dimension) + p.position));
1018             *target_event = CondSwapToBus32(IEC61883_AM824_SET_LABEL(0, IEC61883_AM824_LABEL_MIDI_NO_DATA));
1019         }
1020     }
1021 }
1022
1023 /**
1024  * @brief encodes all midi ports in the cache to events
1025  * @param data
1026  * @param offset
1027  * @param nevents
1028  */
1029 void
1030 AmdtpTransmitStreamProcessor::encodeMidiPorts(quadlet_t *data,
1031                                               unsigned int offset,
1032                                               unsigned int nevents)
1033 {
1034     quadlet_t *target_event;
1035     int i;
1036     unsigned int j;
1037
1038     for (i = 0; i < m_nb_midi_ports; i++) {
1039         struct _MIDI_port_cache &p = m_midi_ports.at(i);
1040         if (p.buffer && p.enabled) {
1041             uint32_t *buffer = (quadlet_t *)(p.buffer);
1042             buffer += offset;
1043
1044             for (j = p.location;j < nevents; j += 8) {
1045                 target_event = (quadlet_t *) (data + ((j * m_dimension) + p.position));
1046
1047                 if ( *buffer & 0xFF000000 )   // we can send a byte
1048                 {
1049                     quadlet_t tmpval;
1050                     tmpval = ((*buffer)<<16) & 0x00FF0000;
1051                     tmpval = IEC61883_AM824_SET_LABEL(tmpval, IEC61883_AM824_LABEL_MIDI_1X);
1052                     *target_event = CondSwapToBus32(tmpval);
1053
1054                     debugOutputExtreme( DEBUG_LEVEL_VERBOSE, "MIDI port %s, pos=%u, loc=%u, nevents=%u, dim=%d\n",
1055                                p.port->getName().c_str(), p.position, p.location, nevents, m_dimension );
1056                     debugOutputExtreme( DEBUG_LEVEL_VERBOSE, "base=%p, target=%p, value=%08X\n",
1057                                data, target_event, tmpval );
1058                 } else {
1059                     // can't send a byte, either because there is no byte,
1060                     // or because this would exceed the maximum rate
1061                     // FIXME: this can be ifdef optimized since it's a constant
1062                     *target_event = CondSwapToBus32(IEC61883_AM824_SET_LABEL(0, IEC61883_AM824_LABEL_MIDI_NO_DATA));
1063                 }
1064                 buffer+=8;
1065             }
1066         } else {
1067             for (j = p.location;j < nevents; j += 8) {
1068                 target_event = (quadlet_t *)(data + ((j * m_dimension) + p.position));
1069                 __builtin_prefetch(target_event, 1, 0); // prefetch events for write, no temporal locality
1070                 *target_event = CondSwapToBus32(IEC61883_AM824_SET_LABEL(0, IEC61883_AM824_LABEL_MIDI_NO_DATA));
1071             }
1072         }
1073     }
1074 }
1075
1076 bool
1077 AmdtpTransmitStreamProcessor::initPortCache() {
1078     // make use of the fact that audio ports are the first ports in
1079     // the cluster as per AMDTP. so we can sort the ports by position
1080     // and have very efficient lookups:
1081     // m_float_ports.at(i).buffer -> audio stream i buffer
1082     // for midi ports we simply cache all port info since they are (usually) not
1083     // that numerous
1084     m_nb_audio_ports = 0;
1085     m_audio_ports.clear();
1086    
1087     m_nb_midi_ports = 0;
1088     m_midi_ports.clear();
1089    
1090     for(PortVectorIterator it = m_Ports.begin();
1091         it != m_Ports.end();
1092         ++it )
1093     {
1094         AmdtpPortInfo *pinfo=dynamic_cast<AmdtpPortInfo *>(*it);
1095         assert(pinfo); // this should not fail!!
1096
1097         switch( pinfo->getFormat() )
1098         {
1099             case AmdtpPortInfo::E_MBLA:
1100                 m_nb_audio_ports++;
1101                 break;
1102             case AmdtpPortInfo::E_SPDIF: // still unimplemented
1103                 break;
1104             case AmdtpPortInfo::E_Midi:
1105                 m_nb_midi_ports++;
1106                 break;
1107             default: // ignore
1108                 break;
1109         }
1110     }
1111
1112     int idx;
1113     for (idx = 0; idx < m_nb_audio_ports; idx++) {
1114         for(PortVectorIterator it = m_Ports.begin();
1115             it != m_Ports.end();
1116             ++it )
1117         {
1118             AmdtpPortInfo *pinfo=dynamic_cast<AmdtpPortInfo *>(*it);
1119             debugOutput(DEBUG_LEVEL_VERY_VERBOSE,
1120                         "idx %u: looking at port %s at position %u\n",
1121                         idx, (*it)->getName().c_str(), pinfo->getPosition());
1122             if(pinfo->getPosition() == (unsigned int)idx) {
1123                 struct _MBLA_port_cache p;
1124                 p.port = dynamic_cast<AmdtpAudioPort *>(*it);
1125                 if(p.port == NULL) {
1126                     debugError("Port is not an AmdtpAudioPort!\n");
1127                     return false;
1128                 }
1129                 p.buffer = NULL; // to be filled by updatePortCache
1130                 #ifdef DEBUG
1131                 p.buffer_size = (*it)->getBufferSize();
1132                 #endif
1133
1134                 m_audio_ports.push_back(p);
1135                 debugOutput(DEBUG_LEVEL_VERBOSE,
1136                             "Cached port %s at position %u\n",
1137                             p.port->getName().c_str(), idx);
1138                 goto next_index;
1139             }
1140         }
1141         debugError("No MBLA port found for position %d\n", idx);
1142         return false;
1143 next_index:
1144         continue;
1145     }
1146
1147     for(PortVectorIterator it = m_Ports.begin();
1148         it != m_Ports.end();
1149         ++it )
1150     {
1151         AmdtpPortInfo *pinfo=dynamic_cast<AmdtpPortInfo *>(*it);
1152         debugOutput(DEBUG_LEVEL_VERY_VERBOSE,
1153                     "idx %u: looking at port %s at position %u, location %u\n",
1154                     idx, (*it)->getName().c_str(), pinfo->getPosition(), pinfo->getLocation());
1155         if ((*it)->getPortType() == Port::E_Midi) {
1156             struct _MIDI_port_cache p;
1157             p.port = dynamic_cast<AmdtpMidiPort *>(*it);
1158             if(p.port == NULL) {
1159                 debugError("Port is not an AmdtpMidiPort!\n");
1160                 return false;
1161             }
1162             p.position = pinfo->getPosition();
1163             p.location = pinfo->getLocation();
1164             p.buffer = NULL; // to be filled by updatePortCache
1165             #ifdef DEBUG
1166             p.buffer_size = (*it)->getBufferSize();
1167             #endif
1168
1169             m_midi_ports.push_back(p);
1170             debugOutput(DEBUG_LEVEL_VERBOSE,
1171                         "Cached port %s at position %u, location %u\n",
1172                         p.port->getName().c_str(), p.position, p.location);
1173         }
1174     }
1175
1176     return true;
1177 }
1178
1179 void
1180 AmdtpTransmitStreamProcessor::updatePortCache() {
1181     int idx;
1182     for (idx = 0; idx < m_nb_audio_ports; idx++) {
1183         struct _MBLA_port_cache& p = m_audio_ports.at(idx);
1184         AmdtpAudioPort *port = p.port;
1185         p.buffer = port->getBufferAddress();
1186         p.enabled = !port->isDisabled();
1187     }
1188     for (idx = 0; idx < m_nb_midi_ports; idx++) {
1189         struct _MIDI_port_cache& p = m_midi_ports.at(idx);
1190         AmdtpMidiPort *port = p.port;
1191         p.buffer = port->getBufferAddress();
1192         p.enabled = !port->isDisabled();
1193     }
1194 }
1195
1196 } // end of namespace Streaming
Note: See TracBrowser for help on using the browser.