root/branches/libffado-2.0/src/libstreaming/amdtp/AmdtpTransmitStreamProcessor.cpp

Revision 1525, 41.6 kB (checked in by ppalmers, 12 years ago)

- Allow to specify the DLL bandwidths for the CycleTimerHelper? and the TimestampedBuffer? in absolute units (Hz). This ensures samplerate-independent operation
- Reduce the default DLL bandwidth for the TimestampedBuffer?'s. This improves timestamp timing with a factor 10x, which should benefit especially the timing sensitive devices (MOTU).
- Allow to specify the DLL bandwith and other transmit settings through the configuration file
- Implement a sanity check for the instantanous samplerate to detect bogus timestamp processing

Line 
1 /*
2  * Copyright (C) 2005-2008 by Pieter Palmers
3  *
4  * This file is part of FFADO
5  * FFADO = Free Firewire (pro-)audio drivers for linux
6  *
7  * FFADO is based upon FreeBoB.
8  *
9  * This program is free software: you can redistribute it and/or modify
10  * it under the terms of the GNU General Public License as published by
11  * the Free Software Foundation, either version 2 of the License, or
12  * (at your option) version 3 of the License.
13  *
14  * This program is distributed in the hope that it will be useful,
15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17  * GNU General Public License for more details.
18  *
19  * You should have received a copy of the GNU General Public License
20  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
21  *
22  */
23
24 #include "config.h"
25 #include "AmdtpTransmitStreamProcessor.h"
26 #include "AmdtpPort.h"
27 #include "../StreamProcessorManager.h"
28 #include "devicemanager.h"
29
30 #include "libutil/Time.h"
31 #include "libutil/float_cast.h"
32
33 #include "libieee1394/ieee1394service.h"
34 #include "libieee1394/IsoHandlerManager.h"
35 #include "libieee1394/cycletimer.h"
36
37 #include "libutil/ByteSwap.h"
38 #include <assert.h>
39 #include <cstring>
40
41 #define likely(x)   __builtin_expect((x),1)
42 #define unlikely(x) __builtin_expect((x),0)
43
44 #define AMDTP_FLOAT_MULTIPLIER (1.0f * ((1<<23) - 1))
45 namespace Streaming
46 {
47
48 /* transmit */
49 AmdtpTransmitStreamProcessor::AmdtpTransmitStreamProcessor(FFADODevice &parent, int dimension)
50         : StreamProcessor(parent, ePT_Transmit)
51         , m_dimension( dimension )
52         , m_dbc( 0 )
53 #if AMDTP_ALLOW_PAYLOAD_IN_NODATA_XMIT
54         , m_send_nodata_payload ( AMDTP_SEND_PAYLOAD_IN_NODATA_XMIT_BY_DEFAULT )
55 #endif
56         , m_max_cycles_to_transmit_early ( AMDTP_MAX_CYCLES_TO_TRANSMIT_EARLY )
57         , m_transmit_transfer_delay ( AMDTP_TRANSMIT_TRANSFER_DELAY )
58         , m_min_cycles_before_presentation ( AMDTP_MIN_CYCLES_BEFORE_PRESENTATION )
59         , m_nb_audio_ports( 0 )
60         , m_nb_midi_ports( 0 )
61 {}
62
63 enum StreamProcessor::eChildReturnValue
64 AmdtpTransmitStreamProcessor::generatePacketHeader (
65     unsigned char *data, unsigned int *length,
66     unsigned char *tag, unsigned char *sy,
67     uint32_t pkt_ctr )
68 {
69     __builtin_prefetch(data, 1, 0); // prefetch events for write, no temporal locality
70     struct iec61883_packet *packet = (struct iec61883_packet *)data;
71     /* Our node ID can change after a bus reset, so it is best to fetch
72     * our node ID for each packet. */
73     packet->sid = m_local_node_id;
74
75     packet->dbs = m_dimension;
76     packet->fn = 0;
77     packet->qpc = 0;
78     packet->sph = 0;
79     packet->reserved = 0;
80     packet->dbc = m_dbc;
81     packet->eoh1 = 2;
82     packet->fmt = IEC61883_FMT_AMDTP;
83
84     *tag = IEC61883_TAG_WITH_CIP;
85     *sy = 0;
86
87     signed int fc;
88     uint64_t presentation_time;
89     unsigned int presentation_cycle;
90     int cycles_until_presentation;
91
92     uint64_t transmit_at_time;
93     unsigned int transmit_at_cycle;
94     int cycles_until_transmit;
95
96     debugOutputExtreme( DEBUG_LEVEL_ULTRA_VERBOSE,
97                         "Try for cycle %d\n", CYCLE_TIMER_GET_CYCLES(pkt_ctr) );
98     // check whether the packet buffer has packets for us to send.
99     // the base timestamp is the one of the next sample in the buffer
100     ffado_timestamp_t ts_head_tmp;
101     m_data_buffer->getBufferHeadTimestamp( &ts_head_tmp, &fc ); // thread safe
102
103     // the timestamp gives us the time at which we want the sample block
104     // to be output by the device
105     presentation_time = ( uint64_t ) ts_head_tmp;
106
107     // now we calculate the time when we have to transmit the sample block
108     transmit_at_time = substractTicks( presentation_time, m_transmit_transfer_delay );
109
110     // calculate the cycle this block should be presented in
111     // (this is just a virtual calculation since at that time it should
112     //  already be in the device's buffer)
113     presentation_cycle = ( unsigned int ) ( TICKS_TO_CYCLES ( presentation_time ) );
114
115     // calculate the cycle this block should be transmitted in
116     transmit_at_cycle = ( unsigned int ) ( TICKS_TO_CYCLES ( transmit_at_time ) );
117
118     // we can check whether this cycle is within the 'window' we have
119     // to send this packet.
120     // first calculate the number of cycles left before presentation time
121     cycles_until_presentation = diffCycles ( presentation_cycle, CYCLE_TIMER_GET_CYCLES(pkt_ctr) );
122
123     // we can check whether this cycle is within the 'window' we have
124     // to send this packet.
125     // first calculate the number of cycles left before presentation time
126     cycles_until_transmit = diffCycles ( transmit_at_cycle, CYCLE_TIMER_GET_CYCLES(pkt_ctr) );
127
128     // two different options:
129     // 1) there are not enough frames for one packet
130     //      => determine wether this is a problem, since we might still
131     //         have some time to send it
132     // 2) there are enough packets
133     //      => determine whether we have to send them in this packet
134     if ( fc < ( signed int ) m_syt_interval )
135     {
136         // not enough frames in the buffer,
137
138         // we can still postpone the queueing of the packets
139         // if we are far enough ahead of the presentation time
140         if ( cycles_until_presentation <= m_min_cycles_before_presentation )
141         {
142             debugOutput( DEBUG_LEVEL_NORMAL,
143                          "Insufficient frames (P): N=%02d, CY=%04u, TC=%04u, CUT=%04d\n",
144                          fc, CYCLE_TIMER_GET_CYCLES(pkt_ctr),
145                          transmit_at_cycle, cycles_until_transmit );
146             // we are too late
147             return eCRV_XRun;
148         }
149         else
150         {
151             #if DEBUG_EXTREME
152             unsigned int now_cycle = ( unsigned int ) ( TICKS_TO_CYCLES ( m_1394service.getCycleTimerTicks() ) );
153
154             debugOutputExtreme(DEBUG_LEVEL_VERBOSE,
155                                "Insufficient frames (NP): N=%02d, CY=%04u, TC=%04u, CUT=%04d, NOW=%04d\n",
156                                fc, CYCLE_TIMER_GET_CYCLES(pkt_ctr),
157                                transmit_at_cycle, cycles_until_transmit, now_cycle );
158             #endif
159
160             // there is still time left to send the packet
161             // we want the system to give this packet another go at a later time instant
162             return eCRV_Again; // note that the raw1394 again system doesn't work as expected
163
164             // we could wait here for a certain time before trying again. However, this
165             // is not going to work since we then block the iterator thread, hence also
166             // the receiving code, meaning that we are not processing received packets,
167             // and hence there is no progression in the number of frames available.
168
169             // for example:
170             // SleepRelativeUsec(125); // one cycle
171             // goto try_block_of_frames;
172
173             // or more advanced, calculate how many cycles we are ahead of 'now' and
174             // base the sleep on that.
175
176             // note that this requires that there is one thread for each IsoHandler,
177             // otherwise we're in the deadlock described above.
178         }
179     }
180     else
181     {
182         // there are enough frames, so check the time they are intended for
183         // all frames have a certain 'time window' in which they can be sent
184         // this corresponds to the range of the timestamp mechanism:
185         // we can send a packet 15 cycles in advance of the 'presentation time'
186         // in theory we can send the packet up till one cycle before the presentation time,
187         // however this is not very smart.
188
189         // There are 3 options:
190         // 1) the frame block is too early
191         //      => send an empty packet
192         // 2) the frame block is within the window
193         //      => send it
194         // 3) the frame block is too late
195         //      => discard (and raise xrun?)
196         //         get next block of frames and repeat
197
198         if(cycles_until_transmit < 0)
199         {
200             // we are too late
201             debugOutput(DEBUG_LEVEL_VERBOSE,
202                         "Too late: CY=%04u, TC=%04u, CUT=%04d, TSP=%011llu (%04u)\n",
203                         CYCLE_TIMER_GET_CYCLES(pkt_ctr),
204                         transmit_at_cycle, cycles_until_transmit,
205                         presentation_time, (unsigned int)TICKS_TO_CYCLES(presentation_time) );
206             //debugShowBackLogLines(200);
207             // however, if we can send this sufficiently before the presentation
208             // time, it could be harmless.
209             // NOTE: dangerous since the device has no way of reporting that it didn't get
210             //       this packet on time.
211             if(cycles_until_presentation >= m_min_cycles_before_presentation)
212             {
213                 // we are not that late and can still try to transmit the packet
214                 m_dbc += fillDataPacketHeader(packet, length, presentation_time);
215                 m_last_timestamp = presentation_time;
216                 return (fc < (signed)(2*m_syt_interval) ? eCRV_Defer : eCRV_Packet);
217             }
218             else   // definitely too late
219             {
220                 return eCRV_XRun;
221             }
222         }
223         else if(cycles_until_transmit <= m_max_cycles_to_transmit_early)
224         {
225             // it's time send the packet
226             m_dbc += fillDataPacketHeader(packet, length, presentation_time);
227             m_last_timestamp = presentation_time;
228
229             // for timestamp tracing
230             debugOutputExtreme(DEBUG_LEVEL_VERY_VERBOSE,
231                                "XMIT PKT: TSP= %011llu (%04u) (%04u) (%04u)\n",
232                                presentation_time,
233                                (unsigned int)CYCLE_TIMER_GET_CYCLES(pkt_ctr),
234                                presentation_cycle, transmit_at_cycle);
235
236             return (fc < (signed)(m_syt_interval) ? eCRV_Defer : eCRV_Packet);
237         }
238         else
239         {
240             debugOutputExtreme(DEBUG_LEVEL_VERY_VERBOSE,
241                                "Too early: CY=%04u, TC=%04u, CUT=%04d, TST=%011llu (%04u), TSP=%011llu (%04u)\n",
242                                CYCLE_TIMER_GET_CYCLES(pkt_ctr),
243                                transmit_at_cycle, cycles_until_transmit,
244                                transmit_at_time, (unsigned int)TICKS_TO_CYCLES(transmit_at_time),
245                                presentation_time, (unsigned int)TICKS_TO_CYCLES(presentation_time));
246 #ifdef DEBUG
247             if ( cycles_until_transmit > m_max_cycles_to_transmit_early + 1 )
248             {
249                 debugOutputExtreme(DEBUG_LEVEL_VERY_VERBOSE,
250                                    "Way too early: CY=%04u, TC=%04u, CUT=%04d, TST=%011llu (%04u), TSP=%011llu (%04u)\n",
251                                    CYCLE_TIMER_GET_CYCLES(pkt_ctr),
252                                    transmit_at_cycle, cycles_until_transmit,
253                                    transmit_at_time, (unsigned int)TICKS_TO_CYCLES(transmit_at_time),
254                                    presentation_time, (unsigned int)TICKS_TO_CYCLES(presentation_time));
255             }
256 #endif
257             // we are too early, send only an empty packet
258             return eCRV_EmptyPacket;
259         }
260     }
261     return eCRV_Invalid;
262 }
263
264 enum StreamProcessor::eChildReturnValue
265 AmdtpTransmitStreamProcessor::generatePacketData (
266     unsigned char *data, unsigned int *length )
267 {
268     if (m_data_buffer->readFrames(m_syt_interval, (char *)(data + 8)))
269     {
270         debugOutputExtreme(DEBUG_LEVEL_VERY_VERBOSE,
271                            "XMIT DATA: TSP= %011llu (%04u)\n",
272                            m_last_timestamp,
273                            (unsigned int)TICKS_TO_CYCLES(m_last_timestamp));
274         return eCRV_OK;
275     }
276     else return eCRV_XRun;
277 }
278
279 enum StreamProcessor::eChildReturnValue
280 AmdtpTransmitStreamProcessor::generateSilentPacketHeader (
281     unsigned char *data, unsigned int *length,
282     unsigned char *tag, unsigned char *sy,
283     uint32_t pkt_ctr )
284 {
285     struct iec61883_packet *packet = ( struct iec61883_packet * ) data;
286     debugOutputExtreme(DEBUG_LEVEL_ULTRA_VERBOSE,
287                        "XMIT SILENT (cy %04d): CY=%04u, TSP=%011llu (%04u)\n",
288                        CYCLE_TIMER_GET_CYCLES(pkt_ctr), m_last_timestamp,
289                        (unsigned int)TICKS_TO_CYCLES(m_last_timestamp));
290
291     packet->sid = m_local_node_id;
292
293     packet->dbs = m_dimension;
294     packet->fn = 0;
295     packet->qpc = 0;
296     packet->sph = 0;
297     packet->reserved = 0;
298     packet->dbc = m_dbc;
299     packet->eoh1 = 2;
300     packet->fmt = IEC61883_FMT_AMDTP;
301
302     *tag = IEC61883_TAG_WITH_CIP;
303     *sy = 0;
304
305     m_dbc += fillNoDataPacketHeader(packet, length);
306     return eCRV_Packet;
307 }
308
309 enum StreamProcessor::eChildReturnValue
310 AmdtpTransmitStreamProcessor::generateSilentPacketData (
311     unsigned char *data, unsigned int *length )
312 {
313     return eCRV_OK; // no need to do anything
314 }
315
316 enum StreamProcessor::eChildReturnValue
317 AmdtpTransmitStreamProcessor::generateEmptyPacketHeader (
318     unsigned char *data, unsigned int *length,
319     unsigned char *tag, unsigned char *sy,
320     uint32_t pkt_ctr )
321 {
322     struct iec61883_packet *packet = ( struct iec61883_packet * ) data;
323     debugOutputExtreme(DEBUG_LEVEL_ULTRA_VERBOSE,
324                        "XMIT EMPTY (cy %04d): CY=%04u, TSP=%011llu (%04u)\n",
325                        CYCLE_TIMER_GET_CYCLES(pkt_ctr), m_last_timestamp,
326                        (unsigned int)TICKS_TO_CYCLES(m_last_timestamp) );
327     packet->sid = m_local_node_id;
328
329     packet->dbs = m_dimension;
330     packet->fn = 0;
331     packet->qpc = 0;
332     packet->sph = 0;
333     packet->reserved = 0;
334     packet->dbc = m_dbc;
335     packet->eoh1 = 2;
336     packet->fmt = IEC61883_FMT_AMDTP;
337
338     *tag = IEC61883_TAG_WITH_CIP;
339     *sy = 0;
340
341     m_dbc += fillNoDataPacketHeader(packet, length);
342     return eCRV_OK;
343 }
344
345 enum StreamProcessor::eChildReturnValue
346 AmdtpTransmitStreamProcessor::generateEmptyPacketData (
347     unsigned char *data, unsigned int *length )
348 {
349     return eCRV_OK; // no need to do anything
350 }
351
352 unsigned int AmdtpTransmitStreamProcessor::fillDataPacketHeader (
353     struct iec61883_packet *packet, unsigned int* length,
354     uint32_t ts )
355 {
356
357     packet->fdf = m_fdf;
358
359     // convert the timestamp to SYT format
360     uint16_t timestamp_SYT = TICKS_TO_SYT ( ts );
361     packet->syt = CondSwapToBus16 ( timestamp_SYT );
362
363     // FIXME: use a precomputed value here
364     *length = m_syt_interval*sizeof ( quadlet_t ) *m_dimension + 8;
365
366     return m_syt_interval;
367 }
368
369 unsigned int AmdtpTransmitStreamProcessor::fillNoDataPacketHeader (
370     struct iec61883_packet *packet, unsigned int* length )
371 {
372     // no-data packets have syt=0xFFFF
373     // and (can) have the usual amount of events as dummy data
374     // DBC is not increased
375     packet->fdf = IEC61883_FDF_NODATA;
376     packet->syt = 0xffff;
377
378 #if AMDTP_ALLOW_PAYLOAD_IN_NODATA_XMIT
379     if ( m_send_nodata_payload )
380     { // no-data packets with payload (NOTE: DICE-II doesn't like that)
381         *length = 2*sizeof ( quadlet_t ) + m_syt_interval * m_dimension * sizeof ( quadlet_t );
382         return m_syt_interval;
383     } else { // no-data packets without payload
384         *length = 2*sizeof ( quadlet_t );
385         return 0;
386     }
387 #else
388     // no-data packets without payload
389     *length = 2*sizeof ( quadlet_t );
390     return 0;
391 #endif
392 }
393
394 unsigned int
395 AmdtpTransmitStreamProcessor::getSytInterval() {
396     switch (m_StreamProcessorManager.getNominalRate()) {
397         case 32000:
398         case 44100:
399         case 48000:
400             return 8;
401         case 88200:
402         case 96000:
403             return 16;
404         case 176400:
405         case 192000:
406             return 32;
407         default:
408             debugError("Unsupported rate: %d\n", m_StreamProcessorManager.getNominalRate());
409             return 0;
410     }
411 }
412
413 unsigned int
414 AmdtpTransmitStreamProcessor::getFDF() {
415     switch (m_StreamProcessorManager.getNominalRate()) {
416         case 32000: return IEC61883_FDF_SFC_32KHZ;
417         case 44100: return IEC61883_FDF_SFC_44K1HZ;
418         case 48000: return IEC61883_FDF_SFC_48KHZ;
419         case 88200: return IEC61883_FDF_SFC_88K2HZ;
420         case 96000: return IEC61883_FDF_SFC_96KHZ;
421         case 176400: return IEC61883_FDF_SFC_176K4HZ;
422         case 192000: return IEC61883_FDF_SFC_192KHZ;
423         default:
424             debugError("Unsupported rate: %d\n", m_StreamProcessorManager.getNominalRate());
425             return 0;
426     }
427 }
428
429 bool AmdtpTransmitStreamProcessor::prepareChild()
430 {
431     debugOutput ( DEBUG_LEVEL_VERBOSE, "Preparing (%p)...\n", this );
432     m_syt_interval = getSytInterval();
433     m_fdf = getFDF();
434
435     debugOutput ( DEBUG_LEVEL_VERBOSE, " SYT interval / FDF             : %d / %d\n", m_syt_interval, m_fdf );
436 #if AMDTP_ALLOW_PAYLOAD_IN_NODATA_XMIT
437     debugOutput ( DEBUG_LEVEL_VERBOSE, " Send payload in No-Data packets: %s \n", m_send_nodata_payload?"Yes":"No" );
438 #endif
439     debugOutput ( DEBUG_LEVEL_VERBOSE, " Max early transmit cycles      : %d\n", m_max_cycles_to_transmit_early );
440     debugOutput ( DEBUG_LEVEL_VERBOSE, " Transfer delay                 : %d\n", m_transmit_transfer_delay );
441     debugOutput ( DEBUG_LEVEL_VERBOSE, " Min cycles before presentation : %d\n", m_min_cycles_before_presentation );
442
443     iec61883_cip_init (
444         &m_cip_status,
445         IEC61883_FMT_AMDTP,
446         m_fdf,
447         m_StreamProcessorManager.getNominalRate(),
448         m_dimension,
449         m_syt_interval );
450
451     if (!initPortCache()) {
452         debugError("Could not init port cache\n");
453         return false;
454     }
455
456     return true;
457 }
458
459 /*
460 * compose the event streams for the packets from the port buffers
461 */
462 bool AmdtpTransmitStreamProcessor::processWriteBlock ( char *data,
463         unsigned int nevents, unsigned int offset )
464 {
465     // update the variable parts of the cache
466     updatePortCache();
467
468     // encode audio data
469     switch(m_StreamProcessorManager.getAudioDataType()) {
470         case StreamProcessorManager::eADT_Int24:
471             encodeAudioPortsInt24((quadlet_t *)data, offset, nevents);
472             break;
473         case StreamProcessorManager::eADT_Float:
474             encodeAudioPortsFloat((quadlet_t *)data, offset, nevents);
475             break;
476     }
477
478     // do midi ports
479     encodeMidiPorts((quadlet_t *)data, offset, nevents);
480     return true;
481 }
482
483 bool
484 AmdtpTransmitStreamProcessor::transmitSilenceBlock(
485     char *data, unsigned int nevents, unsigned int offset)
486 {
487     // no need to update the port cache when transmitting silence since
488     // no dynamic values are used to do so.
489     encodeAudioPortsSilence((quadlet_t *)data, offset, nevents);
490     encodeMidiPortsSilence((quadlet_t *)data, offset, nevents);
491     return true;
492 }
493
494 /**
495  * @brief encodes all audio ports in the cache to events (silent data)
496  * @param data
497  * @param offset
498  * @param nevents
499  */
500 void
501 AmdtpTransmitStreamProcessor::encodeAudioPortsSilence(quadlet_t *data,
502                                                       unsigned int offset,
503                                                       unsigned int nevents)
504 {
505     unsigned int j;
506     quadlet_t *target_event;
507     int i;
508
509     for (i = 0; i < m_nb_audio_ports; i++) {
510         target_event = (quadlet_t *)(data + i);
511
512         for (j = 0;j < nevents; j += 1)
513         {
514             *target_event = CONDSWAPTOBUS32_CONST(0x40000000);
515             target_event += m_dimension;
516         }
517     }
518 }
519
520 #ifdef __SSE2__
521 #include <emmintrin.h>
522 #warning SSE2 build
523
524 /**
525  * @brief mux all audio ports to events
526  * @param data
527  * @param offset
528  * @param nevents
529  */
530 void
531 AmdtpTransmitStreamProcessor::encodeAudioPortsFloat(quadlet_t *data,
532                                                     unsigned int offset,
533                                                     unsigned int nevents)
534 {
535     unsigned int j;
536     quadlet_t *target_event;
537     int i;
538
539     float * client_buffers[4];
540     float tmp_values[4] __attribute__ ((aligned (16)));
541     uint32_t tmp_values_int[4] __attribute__ ((aligned (16)));
542
543     // prepare the scratch buffer
544     assert(m_scratch_buffer_size_bytes > nevents * 4);
545     memset(m_scratch_buffer, 0, nevents * 4);
546
547     const __m128i label = _mm_set_epi32 (0x40000000, 0x40000000, 0x40000000, 0x40000000);
548     const __m128i mask = _mm_set_epi32 (0x00FFFFFF, 0x00FFFFFF, 0x00FFFFFF, 0x00FFFFFF);
549     const __m128 mult = _mm_set_ps(AMDTP_FLOAT_MULTIPLIER, AMDTP_FLOAT_MULTIPLIER, AMDTP_FLOAT_MULTIPLIER, AMDTP_FLOAT_MULTIPLIER);
550
551 #if AMDTP_CLIP_FLOATS
552     const __m128 v_max = _mm_set_ps(1.0, 1.0, 1.0, 1.0);
553     const __m128 v_min = _mm_set_ps(-1.0, -1.0, -1.0, -1.0);
554 #endif
555
556     // this assumes that audio ports are sorted by position,
557     // and that there are no gaps
558     for (i = 0; i < ((int)m_nb_audio_ports)-4; i += 4) {
559         struct _MBLA_port_cache *p;
560
561         // get the port buffers
562         for (j=0; j<4; j++) {
563             p = &(m_audio_ports.at(i+j));
564             if(likely(p->buffer && p->enabled)) {
565                 client_buffers[j] = (float *) p->buffer;
566                 client_buffers[j] += offset;
567             } else {
568                 // if a port is disabled or has no valid
569                 // buffer, use the scratch buffer (all zero's)
570                 client_buffers[j] = (float *) m_scratch_buffer;
571             }
572         }
573
574         // the base event for this position
575         target_event = (quadlet_t *)(data + i);
576         // process the events
577         for (j=0;j < nevents; j += 1)
578         {
579             // read the values
580             tmp_values[0] = *(client_buffers[0]);
581             tmp_values[1] = *(client_buffers[1]);
582             tmp_values[2] = *(client_buffers[2]);
583             tmp_values[3] = *(client_buffers[3]);
584
585             // now do the SSE based conversion/labeling
586             __m128 v_float = *((__m128*)tmp_values);
587             __m128i *target = (__m128i*)target_event;
588             __m128i v_int;
589
590             // clip
591 #if AMDTP_CLIP_FLOATS
592             // do SSE clipping
593             v_float = _mm_max_ps(v_float, v_min);
594             v_float = _mm_min_ps(v_float, v_max);
595 #endif
596
597             // multiply
598             v_float = _mm_mul_ps(v_float, mult);
599             // convert to signed integer
600             v_int = _mm_cvttps_epi32( v_float );
601             // mask
602             v_int = _mm_and_si128( v_int, mask );
603             // label it
604             v_int = _mm_or_si128( v_int, label );
605
606             // do endian conversion (SSE is always little endian)
607             // do first swap
608             v_int = _mm_or_si128( _mm_slli_epi16( v_int, 8 ), _mm_srli_epi16( v_int, 8 ) );
609             // do second swap
610             v_int = _mm_or_si128( _mm_slli_epi32( v_int, 16 ), _mm_srli_epi32( v_int, 16 ) );
611             // store the packed int
612             // (target misalignment is assumed since we don't know the m_dimension)
613             _mm_storeu_si128 (target, v_int);
614
615             // increment the buffer pointers
616             client_buffers[0]++;
617             client_buffers[1]++;
618             client_buffers[2]++;
619             client_buffers[3]++;
620
621             // go to next target event position
622             target_event += m_dimension;
623         }
624     }
625
626     // do remaining ports
627     // NOTE: these can be time-SSE'd
628     for (; i < (int)m_nb_audio_ports; i++) {
629         struct _MBLA_port_cache &p = m_audio_ports.at(i);
630         target_event = (quadlet_t *)(data + i);
631         assert(nevents + offset <= p.buffer_size );
632
633         if(likely(p.buffer && p.enabled)) {
634             float *buffer = (float *)(p.buffer);
635             buffer += offset;
636    
637             for (j = 0;j < nevents; j += 4)
638             {
639                 // read the values
640                 tmp_values[0] = *buffer;
641                 buffer++;
642                 tmp_values[1] = *buffer;
643                 buffer++;
644                 tmp_values[2] = *buffer;
645                 buffer++;
646                 tmp_values[3] = *buffer;
647                 buffer++;
648
649                 // now do the SSE based conversion/labeling
650                 __m128 v_float = *((__m128*)tmp_values);
651                 __m128i v_int;
652
653 #if AMDTP_CLIP_FLOATS
654                 // do SSE clipping
655                 v_float = _mm_max_ps(v_float, v_min);
656                 v_float = _mm_min_ps(v_float, v_max);
657 #endif
658                 // multiply
659                 v_float = _mm_mul_ps(v_float, mult);
660                 // convert to signed integer
661                 v_int = _mm_cvttps_epi32( v_float );
662                 // mask
663                 v_int = _mm_and_si128( v_int, mask );
664                 // label it
665                 v_int = _mm_or_si128( v_int, label );
666    
667                 // do endian conversion (SSE is always little endian)
668                 // do first swap
669                 v_int = _mm_or_si128( _mm_slli_epi16( v_int, 8 ), _mm_srli_epi16( v_int, 8 ) );
670                 // do second swap
671                 v_int = _mm_or_si128( _mm_slli_epi32( v_int, 16 ), _mm_srli_epi32( v_int, 16 ) );
672
673                 // store the packed int
674                 _mm_store_si128 ((__m128i *)(&tmp_values_int), v_int);
675
676                 // increment the buffer pointers
677                 *target_event = tmp_values_int[0];
678                 target_event += m_dimension;
679                 *target_event = tmp_values_int[1];
680                 target_event += m_dimension;
681                 *target_event = tmp_values_int[2];
682                 target_event += m_dimension;
683                 *target_event = tmp_values_int[3];
684                 target_event += m_dimension;
685             }
686
687             // do the remainder of the events
688             for(;j < nevents; j += 1) {
689                 float *in = (float *)buffer;
690 #if AMDTP_CLIP_FLOATS
691                 // clip directly to the value of a maxed event
692                 if(unlikely(*in > 1.0)) {
693                     *target_event = CONDSWAPTOBUS32_CONST(0x407FFFFF);
694                 } else if(unlikely(*in < -1.0)) {
695                     *target_event = CONDSWAPTOBUS32_CONST(0x40800001);
696                 } else {
697                     float v = (*in) * AMDTP_FLOAT_MULTIPLIER;
698                     unsigned int tmp = ((int) v);
699                     tmp = ( tmp & 0x00FFFFFF ) | 0x40000000;
700                     *target_event = CondSwapToBus32((quadlet_t)tmp);
701                 }
702 #else
703                 float v = (*in) * AMDTP_FLOAT_MULTIPLIER;
704                 unsigned int tmp = ((int) v);
705                 tmp = ( tmp & 0x00FFFFFF ) | 0x40000000;
706                 *target_event = CondSwapToBus32((quadlet_t)tmp);
707 #endif
708                 buffer++;
709                 target_event += m_dimension;
710             }
711
712         } else {
713             for (j = 0;j < nevents; j += 1)
714             {
715                 // hardcoded byte swapped
716                 *target_event = 0x00000040;
717                 target_event += m_dimension;
718             }
719         }
720     }
721 }
722
723
724 /**
725  * @brief mux all audio ports to events
726  * @param data
727  * @param offset
728  * @param nevents
729  */
730 void
731 AmdtpTransmitStreamProcessor::encodeAudioPortsInt24(quadlet_t *data,
732                                                     unsigned int offset,
733                                                     unsigned int nevents)
734 {
735     unsigned int j;
736     quadlet_t *target_event;
737     int i;
738
739     uint32_t *client_buffers[4];
740     uint32_t tmp_values[4] __attribute__ ((aligned (16)));
741
742     // prepare the scratch buffer
743     assert(m_scratch_buffer_size_bytes > nevents * 4);
744     memset(m_scratch_buffer, 0, nevents * 4);
745
746     const __m128i label = _mm_set_epi32 (0x40000000, 0x40000000, 0x40000000, 0x40000000);
747     const __m128i mask  = _mm_set_epi32 (0x00FFFFFF, 0x00FFFFFF, 0x00FFFFFF, 0x00FFFFFF);
748
749     // this assumes that audio ports are sorted by position,
750     // and that there are no gaps
751     for (i = 0; i < ((int)m_nb_audio_ports)-4; i += 4) {
752         struct _MBLA_port_cache *p;
753
754         // get the port buffers
755         for (j=0; j<4; j++) {
756             p = &(m_audio_ports.at(i+j));
757             if(likely(p->buffer && p->enabled)) {
758                 client_buffers[j] = (uint32_t *) p->buffer;
759                 client_buffers[j] += offset;
760             } else {
761                 // if a port is disabled or has no valid
762                 // buffer, use the scratch buffer (all zero's)
763                 client_buffers[j] = (uint32_t *) m_scratch_buffer;
764             }
765         }
766
767         // the base event for this position
768         target_event = (quadlet_t *)(data + i);
769
770         // process the events
771         for (j=0;j < nevents; j += 1)
772         {
773             // read the values
774             tmp_values[0] = *(client_buffers[0]);
775             tmp_values[1] = *(client_buffers[1]);
776             tmp_values[2] = *(client_buffers[2]);
777             tmp_values[3] = *(client_buffers[3]);
778
779             // now do the SSE based conversion/labeling
780             __m128i *target = (__m128i*)target_event;
781             __m128i v_int = *((__m128i*)tmp_values);;
782
783             // mask
784             v_int = _mm_and_si128( v_int, mask );
785             // label it
786             v_int = _mm_or_si128( v_int, label );
787
788             // do endian conversion (SSE is always little endian)
789             // do first swap
790             v_int = _mm_or_si128( _mm_slli_epi16( v_int, 8 ), _mm_srli_epi16( v_int, 8 ) );
791             // do second swap
792             v_int = _mm_or_si128( _mm_slli_epi32( v_int, 16 ), _mm_srli_epi32( v_int, 16 ) );
793
794             // store the packed int
795             // (target misalignment is assumed since we don't know the m_dimension)
796             _mm_storeu_si128 (target, v_int);
797
798             // increment the buffer pointers
799             client_buffers[0]++;
800             client_buffers[1]++;
801             client_buffers[2]++;
802             client_buffers[3]++;
803
804             // go to next target event position
805             target_event += m_dimension;
806         }
807     }
808
809     // do remaining ports
810     // NOTE: these can be time-SSE'd
811     for (; i < ((int)m_nb_audio_ports); i++) {
812         struct _MBLA_port_cache &p = m_audio_ports.at(i);
813         target_event = (quadlet_t *)(data + i);
814         assert(nevents + offset <= p.buffer_size );
815
816         if(likely(p.buffer && p.enabled)) {
817             uint32_t *buffer = (uint32_t *)(p.buffer);
818             buffer += offset;
819    
820             for (j = 0;j < nevents; j += 4)
821             {
822                 // read the values
823                 tmp_values[0] = *buffer;
824                 buffer++;
825                 tmp_values[1] = *buffer;
826                 buffer++;
827                 tmp_values[2] = *buffer;
828                 buffer++;
829                 tmp_values[3] = *buffer;
830                 buffer++;
831
832                 // now do the SSE based conversion/labeling
833                 __m128i v_int = *((__m128i*)tmp_values);;
834
835                 // mask
836                 v_int = _mm_and_si128( v_int, mask );
837                 // label it
838                 v_int = _mm_or_si128( v_int, label );
839
840                 // do endian conversion (SSE is always little endian)
841                 // do first swap
842                 v_int = _mm_or_si128( _mm_slli_epi16( v_int, 8 ), _mm_srli_epi16( v_int, 8 ) );
843                 // do second swap
844                 v_int = _mm_or_si128( _mm_slli_epi32( v_int, 16 ), _mm_srli_epi32( v_int, 16 ) );
845
846                 // store the packed int
847                 _mm_store_si128 ((__m128i *)(&tmp_values), v_int);
848
849                 // increment the buffer pointers
850                 *target_event = tmp_values[0];
851                 target_event += m_dimension;
852                 *target_event = tmp_values[1];
853                 target_event += m_dimension;
854                 *target_event = tmp_values[2];
855                 target_event += m_dimension;
856                 *target_event = tmp_values[3];
857                 target_event += m_dimension;
858             }
859
860             // do the remainder of the events
861             for(;j < nevents; j += 1) {
862                 uint32_t in = (uint32_t)(*buffer);
863                 *target_event = CondSwapToBus32((quadlet_t)((in & 0x00FFFFFF) | 0x40000000));
864                 buffer++;
865                 target_event += m_dimension;
866             }
867
868         } else {
869             for (j = 0;j < nevents; j += 1)
870             {
871                 // hardcoded byte swapped
872                 *target_event = 0x00000040;
873                 target_event += m_dimension;
874             }
875         }
876     }
877 }
878
879 #else
880
881 /**
882  * @brief mux all audio ports to events
883  * @param data
884  * @param offset
885  * @param nevents
886  */
887 void
888 AmdtpTransmitStreamProcessor::encodeAudioPortsInt24(quadlet_t *data,
889                                                     unsigned int offset,
890                                                     unsigned int nevents)
891 {
892     unsigned int j;
893     quadlet_t *target_event;
894     int i;
895
896     for (i = 0; i < m_nb_audio_ports; i++) {
897         struct _MBLA_port_cache &p = m_audio_ports.at(i);
898         target_event = (quadlet_t *)(data + i);
899         assert(nevents + offset <= p.buffer_size );
900
901         if(likely(p.buffer && p.enabled)) {
902             quadlet_t *buffer = (quadlet_t *)(p.buffer);
903             buffer += offset;
904    
905             for (j = 0;j < nevents; j += 1)
906             {
907                 uint32_t in = (uint32_t)(*buffer);
908                 *target_event = CondSwapToBus32((quadlet_t)((in & 0x00FFFFFF) | 0x40000000));
909                 buffer++;
910                 target_event += m_dimension;
911             }
912         } else {
913             for (j = 0;j < nevents; j += 1)
914             {
915                 *target_event = CONDSWAPTOBUS32_CONST(0x40000000);
916                 target_event += m_dimension;
917             }
918         }
919     }
920 }
921
922 /**
923  * @brief mux all audio ports to events
924  * @param data
925  * @param offset
926  * @param nevents
927  */
928 void
929 AmdtpTransmitStreamProcessor::encodeAudioPortsFloat(quadlet_t *data,
930                                                     unsigned int offset,
931                                                     unsigned int nevents)
932 {
933     unsigned int j;
934     quadlet_t *target_event;
935     int i;
936
937     for (i = 0; i < m_nb_audio_ports; i++) {
938         struct _MBLA_port_cache &p = m_audio_ports.at(i);
939         target_event = (quadlet_t *)(data + i);
940         assert(nevents + offset <= p.buffer_size );
941
942         if(likely(p.buffer && p.enabled)) {
943             quadlet_t *buffer = (quadlet_t *)(p.buffer);
944             buffer += offset;
945    
946             for (j = 0;j < nevents; j += 1)
947             {
948                 float *in = (float *)buffer;
949 #if AMDTP_CLIP_FLOATS
950                 // clip directly to the value of a maxed event
951                 if(unlikely(*in > 1.0)) {
952                     *target_event = CONDSWAPTOBUS32_CONST(0x407FFFFF);
953                 } else if(unlikely(*in < -1.0)) {
954                     *target_event = CONDSWAPTOBUS32_CONST(0x40800001);
955                 } else {
956                     float v = (*in) * AMDTP_FLOAT_MULTIPLIER;
957                     unsigned int tmp = ((int) v);
958                     tmp = ( tmp & 0x00FFFFFF ) | 0x40000000;
959                     *target_event = CondSwapToBus32((quadlet_t)tmp);
960                 }
961 #else
962                 float v = (*in) * AMDTP_FLOAT_MULTIPLIER;
963                 unsigned int tmp = ((int) v);
964                 tmp = ( tmp & 0x00FFFFFF ) | 0x40000000;
965                 *target_event = CondSwapToBus32((quadlet_t)tmp);
966 #endif
967                 buffer++;
968                 target_event += m_dimension;
969             }
970         } else {
971             for (j = 0;j < nevents; j += 1)
972             {
973                 *target_event = CONDSWAPTOBUS32_CONST(0x40000000);
974                 target_event += m_dimension;
975             }
976         }
977     }
978 }
979 #endif
980
981 /**
982  * @brief encodes all midi ports in the cache to events (silence)
983  * @param data
984  * @param offset
985  * @param nevents
986  */
987 void
988 AmdtpTransmitStreamProcessor::encodeMidiPortsSilence(quadlet_t *data,
989                                                      unsigned int offset,
990                                                      unsigned int nevents)
991 {
992     quadlet_t *target_event;
993     int i;
994     unsigned int j;
995
996     for (i = 0; i < m_nb_midi_ports; i++) {
997         struct _MIDI_port_cache &p = m_midi_ports.at(i);
998
999         for (j = p.location;j < nevents; j += 8) {
1000             target_event = (quadlet_t *) (data + ((j * m_dimension) + p.position));
1001             *target_event = CondSwapToBus32(IEC61883_AM824_SET_LABEL(0, IEC61883_AM824_LABEL_MIDI_NO_DATA));
1002         }
1003     }
1004 }
1005
1006 /**
1007  * @brief encodes all midi ports in the cache to events
1008  * @param data
1009  * @param offset
1010  * @param nevents
1011  */
1012 void
1013 AmdtpTransmitStreamProcessor::encodeMidiPorts(quadlet_t *data,
1014                                               unsigned int offset,
1015                                               unsigned int nevents)
1016 {
1017     quadlet_t *target_event;
1018     int i;
1019     unsigned int j;
1020
1021     for (i = 0; i < m_nb_midi_ports; i++) {
1022         struct _MIDI_port_cache &p = m_midi_ports.at(i);
1023         if (p.buffer && p.enabled) {
1024             uint32_t *buffer = (quadlet_t *)(p.buffer);
1025             buffer += offset;
1026
1027             for (j = p.location;j < nevents; j += 8) {
1028                 target_event = (quadlet_t *) (data + ((j * m_dimension) + p.position));
1029
1030                 if ( *buffer & 0xFF000000 )   // we can send a byte
1031                 {
1032                     quadlet_t tmpval;
1033                     tmpval = ((*buffer)<<16) & 0x00FF0000;
1034                     tmpval = IEC61883_AM824_SET_LABEL(tmpval, IEC61883_AM824_LABEL_MIDI_1X);
1035                     *target_event = CondSwapToBus32(tmpval);
1036
1037 //                     debugOutput ( DEBUG_LEVEL_VERBOSE, "MIDI port %s, pos=%u, loc=%u, nevents=%u, dim=%d\n",
1038 //                                p.port->getName().c_str(), p.position, p.location, nevents, m_dimension );
1039 //                     debugOutput ( DEBUG_LEVEL_VERBOSE, "base=%p, target=%p, value=%08X\n",
1040 //                                data, target_event, tmpval );
1041                 } else {
1042                     // can't send a byte, either because there is no byte,
1043                     // or because this would exceed the maximum rate
1044                     // FIXME: this can be ifdef optimized since it's a constant
1045                     *target_event = CondSwapToBus32(IEC61883_AM824_SET_LABEL(0, IEC61883_AM824_LABEL_MIDI_NO_DATA));
1046                 }
1047                 buffer+=8;
1048             }
1049         } else {
1050             for (j = p.location;j < nevents; j += 8) {
1051                 target_event = (quadlet_t *)(data + ((j * m_dimension) + p.position));
1052                 __builtin_prefetch(target_event, 1, 0); // prefetch events for write, no temporal locality
1053                 *target_event = CondSwapToBus32(IEC61883_AM824_SET_LABEL(0, IEC61883_AM824_LABEL_MIDI_NO_DATA));
1054             }
1055         }
1056     }
1057 }
1058
1059 bool
1060 AmdtpTransmitStreamProcessor::initPortCache() {
1061     // make use of the fact that audio ports are the first ports in
1062     // the cluster as per AMDTP. so we can sort the ports by position
1063     // and have very efficient lookups:
1064     // m_float_ports.at(i).buffer -> audio stream i buffer
1065     // for midi ports we simply cache all port info since they are (usually) not
1066     // that numerous
1067     m_nb_audio_ports = 0;
1068     m_audio_ports.clear();
1069    
1070     m_nb_midi_ports = 0;
1071     m_midi_ports.clear();
1072    
1073     for(PortVectorIterator it = m_Ports.begin();
1074         it != m_Ports.end();
1075         ++it )
1076     {
1077         AmdtpPortInfo *pinfo=dynamic_cast<AmdtpPortInfo *>(*it);
1078         assert(pinfo); // this should not fail!!
1079
1080         switch( pinfo->getFormat() )
1081         {
1082             case AmdtpPortInfo::E_MBLA:
1083                 m_nb_audio_ports++;
1084                 break;
1085             case AmdtpPortInfo::E_SPDIF: // still unimplemented
1086                 break;
1087             case AmdtpPortInfo::E_Midi:
1088                 m_nb_midi_ports++;
1089                 break;
1090             default: // ignore
1091                 break;
1092         }
1093     }
1094
1095     int idx;
1096     for (idx = 0; idx < m_nb_audio_ports; idx++) {
1097         for(PortVectorIterator it = m_Ports.begin();
1098             it != m_Ports.end();
1099             ++it )
1100         {
1101             AmdtpPortInfo *pinfo=dynamic_cast<AmdtpPortInfo *>(*it);
1102             debugOutput(DEBUG_LEVEL_VERY_VERBOSE,
1103                         "idx %u: looking at port %s at position %u\n",
1104                         idx, (*it)->getName().c_str(), pinfo->getPosition());
1105             if(pinfo->getPosition() == (unsigned int)idx) {
1106                 struct _MBLA_port_cache p;
1107                 p.port = dynamic_cast<AmdtpAudioPort *>(*it);
1108                 if(p.port == NULL) {
1109                     debugError("Port is not an AmdtpAudioPort!\n");
1110                     return false;
1111                 }
1112                 p.buffer = NULL; // to be filled by updatePortCache
1113                 #ifdef DEBUG
1114                 p.buffer_size = (*it)->getBufferSize();
1115                 #endif
1116
1117                 m_audio_ports.push_back(p);
1118                 debugOutput(DEBUG_LEVEL_VERBOSE,
1119                             "Cached port %s at position %u\n",
1120                             p.port->getName().c_str(), idx);
1121                 goto next_index;
1122             }
1123         }
1124         debugError("No MBLA port found for position %d\n", idx);
1125         return false;
1126 next_index:
1127         continue;
1128     }
1129
1130     for(PortVectorIterator it = m_Ports.begin();
1131         it != m_Ports.end();
1132         ++it )
1133     {
1134         AmdtpPortInfo *pinfo=dynamic_cast<AmdtpPortInfo *>(*it);
1135         debugOutput(DEBUG_LEVEL_VERY_VERBOSE,
1136                     "idx %u: looking at port %s at position %u, location %u\n",
1137                     idx, (*it)->getName().c_str(), pinfo->getPosition(), pinfo->getLocation());
1138         if ((*it)->getPortType() == Port::E_Midi) {
1139             struct _MIDI_port_cache p;
1140             p.port = dynamic_cast<AmdtpMidiPort *>(*it);
1141             if(p.port == NULL) {
1142                 debugError("Port is not an AmdtpMidiPort!\n");
1143                 return false;
1144             }
1145             p.position = pinfo->getPosition();
1146             p.location = pinfo->getLocation();
1147             p.buffer = NULL; // to be filled by updatePortCache
1148             #ifdef DEBUG
1149             p.buffer_size = (*it)->getBufferSize();
1150             #endif
1151
1152             m_midi_ports.push_back(p);
1153             debugOutput(DEBUG_LEVEL_VERBOSE,
1154                         "Cached port %s at position %u, location %u\n",
1155                         p.port->getName().c_str(), p.position, p.location);
1156         }
1157     }
1158
1159     return true;
1160 }
1161
1162 void
1163 AmdtpTransmitStreamProcessor::updatePortCache() {
1164     int idx;
1165     for (idx = 0; idx < m_nb_audio_ports; idx++) {
1166         struct _MBLA_port_cache& p = m_audio_ports.at(idx);
1167         AmdtpAudioPort *port = p.port;
1168         p.buffer = port->getBufferAddress();
1169         p.enabled = !port->isDisabled();
1170     }
1171     for (idx = 0; idx < m_nb_midi_ports; idx++) {
1172         struct _MIDI_port_cache& p = m_midi_ports.at(idx);
1173         AmdtpMidiPort *port = p.port;
1174         p.buffer = port->getBufferAddress();
1175         p.enabled = !port->isDisabled();
1176     }
1177 }
1178
1179 } // end of namespace Streaming
Note: See TracBrowser for help on using the browser.