root/branches/libffado-2.0/src/libstreaming/amdtp/AmdtpTransmitStreamProcessor.cpp

Revision 1544, 42.2 kB (checked in by ppalmers, 12 years ago)

round the transmit safety buffer size to one packet size (in frames) to avoid messing up the MIDI time muxed stream position

Line 
1 /*
2  * Copyright (C) 2005-2008 by Pieter Palmers
3  *
4  * This file is part of FFADO
5  * FFADO = Free Firewire (pro-)audio drivers for linux
6  *
7  * FFADO is based upon FreeBoB.
8  *
9  * This program is free software: you can redistribute it and/or modify
10  * it under the terms of the GNU General Public License as published by
11  * the Free Software Foundation, either version 2 of the License, or
12  * (at your option) version 3 of the License.
13  *
14  * This program is distributed in the hope that it will be useful,
15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17  * GNU General Public License for more details.
18  *
19  * You should have received a copy of the GNU General Public License
20  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
21  *
22  */
23
24 #include "config.h"
25 #include "AmdtpTransmitStreamProcessor.h"
26 #include "AmdtpPort.h"
27 #include "../StreamProcessorManager.h"
28 #include "devicemanager.h"
29
30 #include "libutil/Time.h"
31 #include "libutil/float_cast.h"
32
33 #include "libieee1394/ieee1394service.h"
34 #include "libieee1394/IsoHandlerManager.h"
35 #include "libieee1394/cycletimer.h"
36
37 #include "libutil/ByteSwap.h"
38 #include <assert.h>
39 #include <cstring>
40
41 #define likely(x)   __builtin_expect((x),1)
42 #define unlikely(x) __builtin_expect((x),0)
43
44 #define AMDTP_FLOAT_MULTIPLIER (1.0f * ((1<<23) - 1))
45 namespace Streaming
46 {
47
48 /* transmit */
49 AmdtpTransmitStreamProcessor::AmdtpTransmitStreamProcessor(FFADODevice &parent, int dimension)
50         : StreamProcessor(parent, ePT_Transmit)
51         , m_dimension( dimension )
52         , m_dbc( 0 )
53 #if AMDTP_ALLOW_PAYLOAD_IN_NODATA_XMIT
54         , m_send_nodata_payload ( AMDTP_SEND_PAYLOAD_IN_NODATA_XMIT_BY_DEFAULT )
55 #endif
56         , m_max_cycles_to_transmit_early ( AMDTP_MAX_CYCLES_TO_TRANSMIT_EARLY )
57         , m_transmit_transfer_delay ( AMDTP_TRANSMIT_TRANSFER_DELAY )
58         , m_min_cycles_before_presentation ( AMDTP_MIN_CYCLES_BEFORE_PRESENTATION )
59         , m_nb_audio_ports( 0 )
60         , m_nb_midi_ports( 0 )
61 {}
62
63 enum StreamProcessor::eChildReturnValue
64 AmdtpTransmitStreamProcessor::generatePacketHeader (
65     unsigned char *data, unsigned int *length,
66     unsigned char *tag, unsigned char *sy,
67     uint32_t pkt_ctr )
68 {
69     __builtin_prefetch(data, 1, 0); // prefetch events for write, no temporal locality
70     struct iec61883_packet *packet = (struct iec61883_packet *)data;
71     /* Our node ID can change after a bus reset, so it is best to fetch
72     * our node ID for each packet. */
73     packet->sid = m_local_node_id;
74
75     packet->dbs = m_dimension;
76     packet->fn = 0;
77     packet->qpc = 0;
78     packet->sph = 0;
79     packet->reserved = 0;
80     packet->dbc = m_dbc;
81     packet->eoh1 = 2;
82     packet->fmt = IEC61883_FMT_AMDTP;
83
84     *tag = IEC61883_TAG_WITH_CIP;
85     *sy = 0;
86
87     signed int fc;
88     uint64_t presentation_time;
89     unsigned int presentation_cycle;
90     int cycles_until_presentation;
91
92     uint64_t transmit_at_time;
93     unsigned int transmit_at_cycle;
94     int cycles_until_transmit;
95
96     debugOutputExtreme( DEBUG_LEVEL_ULTRA_VERBOSE,
97                         "Try for cycle %d\n", CYCLE_TIMER_GET_CYCLES(pkt_ctr) );
98     // check whether the packet buffer has packets for us to send.
99     // the base timestamp is the one of the next sample in the buffer
100     ffado_timestamp_t ts_head_tmp;
101     m_data_buffer->getBufferHeadTimestamp( &ts_head_tmp, &fc ); // thread safe
102
103     // the timestamp gives us the time at which we want the sample block
104     // to be output by the device
105     presentation_time = ( uint64_t ) ts_head_tmp;
106
107     // now we calculate the time when we have to transmit the sample block
108     transmit_at_time = substractTicks( presentation_time, m_transmit_transfer_delay );
109
110     // calculate the cycle this block should be presented in
111     // (this is just a virtual calculation since at that time it should
112     //  already be in the device's buffer)
113     presentation_cycle = ( unsigned int ) ( TICKS_TO_CYCLES ( presentation_time ) );
114
115     // calculate the cycle this block should be transmitted in
116     transmit_at_cycle = ( unsigned int ) ( TICKS_TO_CYCLES ( transmit_at_time ) );
117
118     // we can check whether this cycle is within the 'window' we have
119     // to send this packet.
120     // first calculate the number of cycles left before presentation time
121     cycles_until_presentation = diffCycles ( presentation_cycle, CYCLE_TIMER_GET_CYCLES(pkt_ctr) );
122
123     // we can check whether this cycle is within the 'window' we have
124     // to send this packet.
125     // first calculate the number of cycles left before presentation time
126     cycles_until_transmit = diffCycles ( transmit_at_cycle, CYCLE_TIMER_GET_CYCLES(pkt_ctr) );
127
128     // two different options:
129     // 1) there are not enough frames for one packet
130     //      => determine wether this is a problem, since we might still
131     //         have some time to send it
132     // 2) there are enough packets
133     //      => determine whether we have to send them in this packet
134     if ( fc < ( signed int ) m_syt_interval )
135     {
136         // not enough frames in the buffer,
137
138         // we can still postpone the queueing of the packets
139         // if we are far enough ahead of the presentation time
140         if ( cycles_until_presentation <= m_min_cycles_before_presentation )
141         {
142             debugOutput( DEBUG_LEVEL_NORMAL,
143                          "Insufficient frames (P): N=%02d, CY=%04u, TC=%04u, CUT=%04d\n",
144                          fc, CYCLE_TIMER_GET_CYCLES(pkt_ctr),
145                          transmit_at_cycle, cycles_until_transmit );
146             // we are too late
147             return eCRV_XRun;
148         }
149         else
150         {
151             #if DEBUG_EXTREME
152             unsigned int now_cycle = ( unsigned int ) ( TICKS_TO_CYCLES ( m_1394service.getCycleTimerTicks() ) );
153
154             debugOutputExtreme(DEBUG_LEVEL_VERBOSE,
155                                "Insufficient frames (NP): N=%02d, CY=%04u, TC=%04u, CUT=%04d, NOW=%04d\n",
156                                fc, CYCLE_TIMER_GET_CYCLES(pkt_ctr),
157                                transmit_at_cycle, cycles_until_transmit, now_cycle );
158             #endif
159
160             // there is still time left to send the packet
161             // we want the system to give this packet another go at a later time instant
162             return eCRV_Again; // note that the raw1394 again system doesn't work as expected
163
164             // we could wait here for a certain time before trying again. However, this
165             // is not going to work since we then block the iterator thread, hence also
166             // the receiving code, meaning that we are not processing received packets,
167             // and hence there is no progression in the number of frames available.
168
169             // for example:
170             // SleepRelativeUsec(125); // one cycle
171             // goto try_block_of_frames;
172
173             // or more advanced, calculate how many cycles we are ahead of 'now' and
174             // base the sleep on that.
175
176             // note that this requires that there is one thread for each IsoHandler,
177             // otherwise we're in the deadlock described above.
178         }
179     }
180     else
181     {
182         // there are enough frames, so check the time they are intended for
183         // all frames have a certain 'time window' in which they can be sent
184         // this corresponds to the range of the timestamp mechanism:
185         // we can send a packet 15 cycles in advance of the 'presentation time'
186         // in theory we can send the packet up till one cycle before the presentation time,
187         // however this is not very smart.
188
189         // There are 3 options:
190         // 1) the frame block is too early
191         //      => send an empty packet
192         // 2) the frame block is within the window
193         //      => send it
194         // 3) the frame block is too late
195         //      => discard (and raise xrun?)
196         //         get next block of frames and repeat
197
198         if(cycles_until_transmit < 0)
199         {
200             // we are too late
201             debugOutput(DEBUG_LEVEL_VERBOSE,
202                         "Too late: CY=%04u, TC=%04u, CUT=%04d, TSP=%011llu (%04u)\n",
203                         CYCLE_TIMER_GET_CYCLES(pkt_ctr),
204                         transmit_at_cycle, cycles_until_transmit,
205                         presentation_time, (unsigned int)TICKS_TO_CYCLES(presentation_time) );
206             //debugShowBackLogLines(200);
207             // however, if we can send this sufficiently before the presentation
208             // time, it could be harmless.
209             // NOTE: dangerous since the device has no way of reporting that it didn't get
210             //       this packet on time.
211             if(cycles_until_presentation >= m_min_cycles_before_presentation)
212             {
213                 // we are not that late and can still try to transmit the packet
214                 m_dbc += fillDataPacketHeader(packet, length, presentation_time);
215                 m_last_timestamp = presentation_time;
216                 return (fc < (signed)(2*m_syt_interval) ? eCRV_Defer : eCRV_Packet);
217             }
218             else   // definitely too late
219             {
220                 return eCRV_XRun;
221             }
222         }
223         else if(cycles_until_transmit <= m_max_cycles_to_transmit_early)
224         {
225             // it's time send the packet
226             m_dbc += fillDataPacketHeader(packet, length, presentation_time);
227             m_last_timestamp = presentation_time;
228
229             // for timestamp tracing
230             debugOutputExtreme(DEBUG_LEVEL_VERY_VERBOSE,
231                                "XMIT PKT: TSP= %011llu (%04u) (%04u) (%04u)\n",
232                                presentation_time,
233                                (unsigned int)CYCLE_TIMER_GET_CYCLES(pkt_ctr),
234                                presentation_cycle, transmit_at_cycle);
235
236             return (fc < (signed)(m_syt_interval) ? eCRV_Defer : eCRV_Packet);
237         }
238         else
239         {
240             debugOutputExtreme(DEBUG_LEVEL_VERY_VERBOSE,
241                                "Too early: CY=%04u, TC=%04u, CUT=%04d, TST=%011llu (%04u), TSP=%011llu (%04u)\n",
242                                CYCLE_TIMER_GET_CYCLES(pkt_ctr),
243                                transmit_at_cycle, cycles_until_transmit,
244                                transmit_at_time, (unsigned int)TICKS_TO_CYCLES(transmit_at_time),
245                                presentation_time, (unsigned int)TICKS_TO_CYCLES(presentation_time));
246 #ifdef DEBUG
247             if ( cycles_until_transmit > m_max_cycles_to_transmit_early + 1 )
248             {
249                 debugOutputExtreme(DEBUG_LEVEL_VERY_VERBOSE,
250                                    "Way too early: CY=%04u, TC=%04u, CUT=%04d, TST=%011llu (%04u), TSP=%011llu (%04u)\n",
251                                    CYCLE_TIMER_GET_CYCLES(pkt_ctr),
252                                    transmit_at_cycle, cycles_until_transmit,
253                                    transmit_at_time, (unsigned int)TICKS_TO_CYCLES(transmit_at_time),
254                                    presentation_time, (unsigned int)TICKS_TO_CYCLES(presentation_time));
255             }
256 #endif
257             // we are too early, send only an empty packet
258             return eCRV_EmptyPacket;
259         }
260     }
261     return eCRV_Invalid;
262 }
263
264 enum StreamProcessor::eChildReturnValue
265 AmdtpTransmitStreamProcessor::generatePacketData (
266     unsigned char *data, unsigned int *length )
267 {
268     if (m_data_buffer->readFrames(m_syt_interval, (char *)(data + 8)))
269     {
270         debugOutputExtreme(DEBUG_LEVEL_VERBOSE,
271                            "XMIT DATA: TSP= %011llu (%04u)\n",
272                            m_last_timestamp,
273                            (unsigned int)TICKS_TO_CYCLES(m_last_timestamp));
274         #if 0
275         // debug code to output the packet content
276         char tmpbuff[8192];
277         int cnt=0;
278         quadlet_t *tmp = (quadlet_t *)((char *)(data + 8));
279
280         for(int i=0; i<m_syt_interval; i++) {
281             cnt += snprintf(tmpbuff + cnt, 8192-cnt, "[%02d] ", i);
282             for(int j=0; j<m_dimension; j++) {
283                 cnt += snprintf(tmpbuff + cnt, 8192-cnt, "%08X ", *tmp);
284                 tmp++;
285             }
286             cnt += snprintf(tmpbuff + cnt, 8192-cnt, "\n");
287         }
288         debugOutput(DEBUG_LEVEL_VERBOSE, "\n%s\n", tmpbuff);
289         #endif
290         return eCRV_OK;
291     }
292     else return eCRV_XRun;
293 }
294
295 enum StreamProcessor::eChildReturnValue
296 AmdtpTransmitStreamProcessor::generateSilentPacketHeader (
297     unsigned char *data, unsigned int *length,
298     unsigned char *tag, unsigned char *sy,
299     uint32_t pkt_ctr )
300 {
301     struct iec61883_packet *packet = ( struct iec61883_packet * ) data;
302     debugOutputExtreme(DEBUG_LEVEL_ULTRA_VERBOSE,
303                        "XMIT SILENT (cy %04d): CY=%04u, TSP=%011llu (%04u)\n",
304                        CYCLE_TIMER_GET_CYCLES(pkt_ctr), m_last_timestamp,
305                        (unsigned int)TICKS_TO_CYCLES(m_last_timestamp));
306
307     packet->sid = m_local_node_id;
308
309     packet->dbs = m_dimension;
310     packet->fn = 0;
311     packet->qpc = 0;
312     packet->sph = 0;
313     packet->reserved = 0;
314     packet->dbc = m_dbc;
315     packet->eoh1 = 2;
316     packet->fmt = IEC61883_FMT_AMDTP;
317
318     *tag = IEC61883_TAG_WITH_CIP;
319     *sy = 0;
320
321     m_dbc += fillNoDataPacketHeader(packet, length);
322     return eCRV_Packet;
323 }
324
325 enum StreamProcessor::eChildReturnValue
326 AmdtpTransmitStreamProcessor::generateSilentPacketData (
327     unsigned char *data, unsigned int *length )
328 {
329     return eCRV_OK; // no need to do anything
330 }
331
332 enum StreamProcessor::eChildReturnValue
333 AmdtpTransmitStreamProcessor::generateEmptyPacketHeader (
334     unsigned char *data, unsigned int *length,
335     unsigned char *tag, unsigned char *sy,
336     uint32_t pkt_ctr )
337 {
338     struct iec61883_packet *packet = ( struct iec61883_packet * ) data;
339     debugOutputExtreme(DEBUG_LEVEL_ULTRA_VERBOSE,
340                        "XMIT EMPTY (cy %04d): CY=%04u, TSP=%011llu (%04u)\n",
341                        CYCLE_TIMER_GET_CYCLES(pkt_ctr), m_last_timestamp,
342                        (unsigned int)TICKS_TO_CYCLES(m_last_timestamp) );
343     packet->sid = m_local_node_id;
344
345     packet->dbs = m_dimension;
346     packet->fn = 0;
347     packet->qpc = 0;
348     packet->sph = 0;
349     packet->reserved = 0;
350     packet->dbc = m_dbc;
351     packet->eoh1 = 2;
352     packet->fmt = IEC61883_FMT_AMDTP;
353
354     *tag = IEC61883_TAG_WITH_CIP;
355     *sy = 0;
356
357     m_dbc += fillNoDataPacketHeader(packet, length);
358     return eCRV_OK;
359 }
360
361 enum StreamProcessor::eChildReturnValue
362 AmdtpTransmitStreamProcessor::generateEmptyPacketData (
363     unsigned char *data, unsigned int *length )
364 {
365     return eCRV_OK; // no need to do anything
366 }
367
368 unsigned int AmdtpTransmitStreamProcessor::fillDataPacketHeader (
369     struct iec61883_packet *packet, unsigned int* length,
370     uint32_t ts )
371 {
372
373     packet->fdf = m_fdf;
374
375     // convert the timestamp to SYT format
376     uint16_t timestamp_SYT = TICKS_TO_SYT ( ts );
377     packet->syt = CondSwapToBus16 ( timestamp_SYT );
378
379     // FIXME: use a precomputed value here
380     *length = m_syt_interval*sizeof ( quadlet_t ) *m_dimension + 8;
381
382     return m_syt_interval;
383 }
384
385 unsigned int AmdtpTransmitStreamProcessor::fillNoDataPacketHeader (
386     struct iec61883_packet *packet, unsigned int* length )
387 {
388     // no-data packets have syt=0xFFFF
389     // and (can) have the usual amount of events as dummy data
390     // DBC is not increased
391     packet->fdf = IEC61883_FDF_NODATA;
392     packet->syt = 0xffff;
393
394 #if AMDTP_ALLOW_PAYLOAD_IN_NODATA_XMIT
395     if ( m_send_nodata_payload )
396     { // no-data packets with payload (NOTE: DICE-II doesn't like that)
397         *length = 2*sizeof ( quadlet_t ) + m_syt_interval * m_dimension * sizeof ( quadlet_t );
398         return m_syt_interval;
399     } else { // no-data packets without payload
400         *length = 2*sizeof ( quadlet_t );
401         return 0;
402     }
403 #else
404     // no-data packets without payload
405     *length = 2*sizeof ( quadlet_t );
406     return 0;
407 #endif
408 }
409
410 unsigned int
411 AmdtpTransmitStreamProcessor::getSytInterval() {
412     switch (m_StreamProcessorManager.getNominalRate()) {
413         case 32000:
414         case 44100:
415         case 48000:
416             return 8;
417         case 88200:
418         case 96000:
419             return 16;
420         case 176400:
421         case 192000:
422             return 32;
423         default:
424             debugError("Unsupported rate: %d\n", m_StreamProcessorManager.getNominalRate());
425             return 0;
426     }
427 }
428
429 unsigned int
430 AmdtpTransmitStreamProcessor::getFDF() {
431     switch (m_StreamProcessorManager.getNominalRate()) {
432         case 32000: return IEC61883_FDF_SFC_32KHZ;
433         case 44100: return IEC61883_FDF_SFC_44K1HZ;
434         case 48000: return IEC61883_FDF_SFC_48KHZ;
435         case 88200: return IEC61883_FDF_SFC_88K2HZ;
436         case 96000: return IEC61883_FDF_SFC_96KHZ;
437         case 176400: return IEC61883_FDF_SFC_176K4HZ;
438         case 192000: return IEC61883_FDF_SFC_192KHZ;
439         default:
440             debugError("Unsupported rate: %d\n", m_StreamProcessorManager.getNominalRate());
441             return 0;
442     }
443 }
444
445 bool AmdtpTransmitStreamProcessor::prepareChild()
446 {
447     debugOutput ( DEBUG_LEVEL_VERBOSE, "Preparing (%p)...\n", this );
448     m_syt_interval = getSytInterval();
449     m_fdf = getFDF();
450
451     debugOutput ( DEBUG_LEVEL_VERBOSE, " SYT interval / FDF             : %d / %d\n", m_syt_interval, m_fdf );
452 #if AMDTP_ALLOW_PAYLOAD_IN_NODATA_XMIT
453     debugOutput ( DEBUG_LEVEL_VERBOSE, " Send payload in No-Data packets: %s \n", m_send_nodata_payload?"Yes":"No" );
454 #endif
455     debugOutput ( DEBUG_LEVEL_VERBOSE, " Max early transmit cycles      : %d\n", m_max_cycles_to_transmit_early );
456     debugOutput ( DEBUG_LEVEL_VERBOSE, " Transfer delay                 : %d\n", m_transmit_transfer_delay );
457     debugOutput ( DEBUG_LEVEL_VERBOSE, " Min cycles before presentation : %d\n", m_min_cycles_before_presentation );
458
459     iec61883_cip_init (
460         &m_cip_status,
461         IEC61883_FMT_AMDTP,
462         m_fdf,
463         m_StreamProcessorManager.getNominalRate(),
464         m_dimension,
465         m_syt_interval );
466
467     if (!initPortCache()) {
468         debugError("Could not init port cache\n");
469         return false;
470     }
471
472     return true;
473 }
474
475 /*
476 * compose the event streams for the packets from the port buffers
477 */
478 bool AmdtpTransmitStreamProcessor::processWriteBlock ( char *data,
479         unsigned int nevents, unsigned int offset )
480 {
481     // update the variable parts of the cache
482     updatePortCache();
483
484     // encode audio data
485     switch(m_StreamProcessorManager.getAudioDataType()) {
486         case StreamProcessorManager::eADT_Int24:
487             encodeAudioPortsInt24((quadlet_t *)data, offset, nevents);
488             break;
489         case StreamProcessorManager::eADT_Float:
490             encodeAudioPortsFloat((quadlet_t *)data, offset, nevents);
491             break;
492     }
493
494     // do midi ports
495     encodeMidiPorts((quadlet_t *)data, offset, nevents);
496     return true;
497 }
498
499 bool
500 AmdtpTransmitStreamProcessor::transmitSilenceBlock(
501     char *data, unsigned int nevents, unsigned int offset)
502 {
503     // no need to update the port cache when transmitting silence since
504     // no dynamic values are used to do so.
505     encodeAudioPortsSilence((quadlet_t *)data, offset, nevents);
506     encodeMidiPortsSilence((quadlet_t *)data, offset, nevents);
507     return true;
508 }
509
510 /**
511  * @brief encodes all audio ports in the cache to events (silent data)
512  * @param data
513  * @param offset
514  * @param nevents
515  */
516 void
517 AmdtpTransmitStreamProcessor::encodeAudioPortsSilence(quadlet_t *data,
518                                                       unsigned int offset,
519                                                       unsigned int nevents)
520 {
521     unsigned int j;
522     quadlet_t *target_event;
523     int i;
524
525     for (i = 0; i < m_nb_audio_ports; i++) {
526         target_event = (quadlet_t *)(data + i);
527
528         for (j = 0;j < nevents; j += 1)
529         {
530             *target_event = CONDSWAPTOBUS32_CONST(0x40000000);
531             target_event += m_dimension;
532         }
533     }
534 }
535
536 #ifdef __SSE2__
537 #include <emmintrin.h>
538 #warning SSE2 build
539
540 /**
541  * @brief mux all audio ports to events
542  * @param data
543  * @param offset
544  * @param nevents
545  */
546 void
547 AmdtpTransmitStreamProcessor::encodeAudioPortsFloat(quadlet_t *data,
548                                                     unsigned int offset,
549                                                     unsigned int nevents)
550 {
551     unsigned int j;
552     quadlet_t *target_event;
553     int i;
554
555     float * client_buffers[4];
556     float tmp_values[4] __attribute__ ((aligned (16)));
557     uint32_t tmp_values_int[4] __attribute__ ((aligned (16)));
558
559     // prepare the scratch buffer
560     assert(m_scratch_buffer_size_bytes > nevents * 4);
561     memset(m_scratch_buffer, 0, nevents * 4);
562
563     const __m128i label = _mm_set_epi32 (0x40000000, 0x40000000, 0x40000000, 0x40000000);
564     const __m128i mask = _mm_set_epi32 (0x00FFFFFF, 0x00FFFFFF, 0x00FFFFFF, 0x00FFFFFF);
565     const __m128 mult = _mm_set_ps(AMDTP_FLOAT_MULTIPLIER, AMDTP_FLOAT_MULTIPLIER, AMDTP_FLOAT_MULTIPLIER, AMDTP_FLOAT_MULTIPLIER);
566
567 #if AMDTP_CLIP_FLOATS
568     const __m128 v_max = _mm_set_ps(1.0, 1.0, 1.0, 1.0);
569     const __m128 v_min = _mm_set_ps(-1.0, -1.0, -1.0, -1.0);
570 #endif
571
572     // this assumes that audio ports are sorted by position,
573     // and that there are no gaps
574     for (i = 0; i < ((int)m_nb_audio_ports)-4; i += 4) {
575         struct _MBLA_port_cache *p;
576
577         // get the port buffers
578         for (j=0; j<4; j++) {
579             p = &(m_audio_ports.at(i+j));
580             if(likely(p->buffer && p->enabled)) {
581                 client_buffers[j] = (float *) p->buffer;
582                 client_buffers[j] += offset;
583             } else {
584                 // if a port is disabled or has no valid
585                 // buffer, use the scratch buffer (all zero's)
586                 client_buffers[j] = (float *) m_scratch_buffer;
587             }
588         }
589
590         // the base event for this position
591         target_event = (quadlet_t *)(data + i);
592         // process the events
593         for (j=0;j < nevents; j += 1)
594         {
595             // read the values
596             tmp_values[0] = *(client_buffers[0]);
597             tmp_values[1] = *(client_buffers[1]);
598             tmp_values[2] = *(client_buffers[2]);
599             tmp_values[3] = *(client_buffers[3]);
600
601             // now do the SSE based conversion/labeling
602             __m128 v_float = *((__m128*)tmp_values);
603             __m128i *target = (__m128i*)target_event;
604             __m128i v_int;
605
606             // clip
607 #if AMDTP_CLIP_FLOATS
608             // do SSE clipping
609             v_float = _mm_max_ps(v_float, v_min);
610             v_float = _mm_min_ps(v_float, v_max);
611 #endif
612
613             // multiply
614             v_float = _mm_mul_ps(v_float, mult);
615             // convert to signed integer
616             v_int = _mm_cvttps_epi32( v_float );
617             // mask
618             v_int = _mm_and_si128( v_int, mask );
619             // label it
620             v_int = _mm_or_si128( v_int, label );
621
622             // do endian conversion (SSE is always little endian)
623             // do first swap
624             v_int = _mm_or_si128( _mm_slli_epi16( v_int, 8 ), _mm_srli_epi16( v_int, 8 ) );
625             // do second swap
626             v_int = _mm_or_si128( _mm_slli_epi32( v_int, 16 ), _mm_srli_epi32( v_int, 16 ) );
627             // store the packed int
628             // (target misalignment is assumed since we don't know the m_dimension)
629             _mm_storeu_si128 (target, v_int);
630
631             // increment the buffer pointers
632             client_buffers[0]++;
633             client_buffers[1]++;
634             client_buffers[2]++;
635             client_buffers[3]++;
636
637             // go to next target event position
638             target_event += m_dimension;
639         }
640     }
641
642     // do remaining ports
643     // NOTE: these can be time-SSE'd
644     for (; i < (int)m_nb_audio_ports; i++) {
645         struct _MBLA_port_cache &p = m_audio_ports.at(i);
646         target_event = (quadlet_t *)(data + i);
647         assert(nevents + offset <= p.buffer_size );
648
649         if(likely(p.buffer && p.enabled)) {
650             float *buffer = (float *)(p.buffer);
651             buffer += offset;
652    
653             for (j = 0;j < nevents; j += 4)
654             {
655                 // read the values
656                 tmp_values[0] = *buffer;
657                 buffer++;
658                 tmp_values[1] = *buffer;
659                 buffer++;
660                 tmp_values[2] = *buffer;
661                 buffer++;
662                 tmp_values[3] = *buffer;
663                 buffer++;
664
665                 // now do the SSE based conversion/labeling
666                 __m128 v_float = *((__m128*)tmp_values);
667                 __m128i v_int;
668
669 #if AMDTP_CLIP_FLOATS
670                 // do SSE clipping
671                 v_float = _mm_max_ps(v_float, v_min);
672                 v_float = _mm_min_ps(v_float, v_max);
673 #endif
674                 // multiply
675                 v_float = _mm_mul_ps(v_float, mult);
676                 // convert to signed integer
677                 v_int = _mm_cvttps_epi32( v_float );
678                 // mask
679                 v_int = _mm_and_si128( v_int, mask );
680                 // label it
681                 v_int = _mm_or_si128( v_int, label );
682    
683                 // do endian conversion (SSE is always little endian)
684                 // do first swap
685                 v_int = _mm_or_si128( _mm_slli_epi16( v_int, 8 ), _mm_srli_epi16( v_int, 8 ) );
686                 // do second swap
687                 v_int = _mm_or_si128( _mm_slli_epi32( v_int, 16 ), _mm_srli_epi32( v_int, 16 ) );
688
689                 // store the packed int
690                 _mm_store_si128 ((__m128i *)(&tmp_values_int), v_int);
691
692                 // increment the buffer pointers
693                 *target_event = tmp_values_int[0];
694                 target_event += m_dimension;
695                 *target_event = tmp_values_int[1];
696                 target_event += m_dimension;
697                 *target_event = tmp_values_int[2];
698                 target_event += m_dimension;
699                 *target_event = tmp_values_int[3];
700                 target_event += m_dimension;
701             }
702
703             // do the remainder of the events
704             for(;j < nevents; j += 1) {
705                 float *in = (float *)buffer;
706 #if AMDTP_CLIP_FLOATS
707                 // clip directly to the value of a maxed event
708                 if(unlikely(*in > 1.0)) {
709                     *target_event = CONDSWAPTOBUS32_CONST(0x407FFFFF);
710                 } else if(unlikely(*in < -1.0)) {
711                     *target_event = CONDSWAPTOBUS32_CONST(0x40800001);
712                 } else {
713                     float v = (*in) * AMDTP_FLOAT_MULTIPLIER;
714                     unsigned int tmp = ((int) v);
715                     tmp = ( tmp & 0x00FFFFFF ) | 0x40000000;
716                     *target_event = CondSwapToBus32((quadlet_t)tmp);
717                 }
718 #else
719                 float v = (*in) * AMDTP_FLOAT_MULTIPLIER;
720                 unsigned int tmp = ((int) v);
721                 tmp = ( tmp & 0x00FFFFFF ) | 0x40000000;
722                 *target_event = CondSwapToBus32((quadlet_t)tmp);
723 #endif
724                 buffer++;
725                 target_event += m_dimension;
726             }
727
728         } else {
729             for (j = 0;j < nevents; j += 1)
730             {
731                 // hardcoded byte swapped
732                 *target_event = 0x00000040;
733                 target_event += m_dimension;
734             }
735         }
736     }
737 }
738
739
740 /**
741  * @brief mux all audio ports to events
742  * @param data
743  * @param offset
744  * @param nevents
745  */
746 void
747 AmdtpTransmitStreamProcessor::encodeAudioPortsInt24(quadlet_t *data,
748                                                     unsigned int offset,
749                                                     unsigned int nevents)
750 {
751     unsigned int j;
752     quadlet_t *target_event;
753     int i;
754
755     uint32_t *client_buffers[4];
756     uint32_t tmp_values[4] __attribute__ ((aligned (16)));
757
758     // prepare the scratch buffer
759     assert(m_scratch_buffer_size_bytes > nevents * 4);
760     memset(m_scratch_buffer, 0, nevents * 4);
761
762     const __m128i label = _mm_set_epi32 (0x40000000, 0x40000000, 0x40000000, 0x40000000);
763     const __m128i mask  = _mm_set_epi32 (0x00FFFFFF, 0x00FFFFFF, 0x00FFFFFF, 0x00FFFFFF);
764
765     // this assumes that audio ports are sorted by position,
766     // and that there are no gaps
767     for (i = 0; i < ((int)m_nb_audio_ports)-4; i += 4) {
768         struct _MBLA_port_cache *p;
769
770         // get the port buffers
771         for (j=0; j<4; j++) {
772             p = &(m_audio_ports.at(i+j));
773             if(likely(p->buffer && p->enabled)) {
774                 client_buffers[j] = (uint32_t *) p->buffer;
775                 client_buffers[j] += offset;
776             } else {
777                 // if a port is disabled or has no valid
778                 // buffer, use the scratch buffer (all zero's)
779                 client_buffers[j] = (uint32_t *) m_scratch_buffer;
780             }
781         }
782
783         // the base event for this position
784         target_event = (quadlet_t *)(data + i);
785
786         // process the events
787         for (j=0;j < nevents; j += 1)
788         {
789             // read the values
790             tmp_values[0] = *(client_buffers[0]);
791             tmp_values[1] = *(client_buffers[1]);
792             tmp_values[2] = *(client_buffers[2]);
793             tmp_values[3] = *(client_buffers[3]);
794
795             // now do the SSE based conversion/labeling
796             __m128i *target = (__m128i*)target_event;
797             __m128i v_int = *((__m128i*)tmp_values);;
798
799             // mask
800             v_int = _mm_and_si128( v_int, mask );
801             // label it
802             v_int = _mm_or_si128( v_int, label );
803
804             // do endian conversion (SSE is always little endian)
805             // do first swap
806             v_int = _mm_or_si128( _mm_slli_epi16( v_int, 8 ), _mm_srli_epi16( v_int, 8 ) );
807             // do second swap
808             v_int = _mm_or_si128( _mm_slli_epi32( v_int, 16 ), _mm_srli_epi32( v_int, 16 ) );
809
810             // store the packed int
811             // (target misalignment is assumed since we don't know the m_dimension)
812             _mm_storeu_si128 (target, v_int);
813
814             // increment the buffer pointers
815             client_buffers[0]++;
816             client_buffers[1]++;
817             client_buffers[2]++;
818             client_buffers[3]++;
819
820             // go to next target event position
821             target_event += m_dimension;
822         }
823     }
824
825     // do remaining ports
826     // NOTE: these can be time-SSE'd
827     for (; i < ((int)m_nb_audio_ports); i++) {
828         struct _MBLA_port_cache &p = m_audio_ports.at(i);
829         target_event = (quadlet_t *)(data + i);
830         assert(nevents + offset <= p.buffer_size );
831
832         if(likely(p.buffer && p.enabled)) {
833             uint32_t *buffer = (uint32_t *)(p.buffer);
834             buffer += offset;
835    
836             for (j = 0;j < nevents; j += 4)
837             {
838                 // read the values
839                 tmp_values[0] = *buffer;
840                 buffer++;
841                 tmp_values[1] = *buffer;
842                 buffer++;
843                 tmp_values[2] = *buffer;
844                 buffer++;
845                 tmp_values[3] = *buffer;
846                 buffer++;
847
848                 // now do the SSE based conversion/labeling
849                 __m128i v_int = *((__m128i*)tmp_values);;
850
851                 // mask
852                 v_int = _mm_and_si128( v_int, mask );
853                 // label it
854                 v_int = _mm_or_si128( v_int, label );
855
856                 // do endian conversion (SSE is always little endian)
857                 // do first swap
858                 v_int = _mm_or_si128( _mm_slli_epi16( v_int, 8 ), _mm_srli_epi16( v_int, 8 ) );
859                 // do second swap
860                 v_int = _mm_or_si128( _mm_slli_epi32( v_int, 16 ), _mm_srli_epi32( v_int, 16 ) );
861
862                 // store the packed int
863                 _mm_store_si128 ((__m128i *)(&tmp_values), v_int);
864
865                 // increment the buffer pointers
866                 *target_event = tmp_values[0];
867                 target_event += m_dimension;
868                 *target_event = tmp_values[1];
869                 target_event += m_dimension;
870                 *target_event = tmp_values[2];
871                 target_event += m_dimension;
872                 *target_event = tmp_values[3];
873                 target_event += m_dimension;
874             }
875
876             // do the remainder of the events
877             for(;j < nevents; j += 1) {
878                 uint32_t in = (uint32_t)(*buffer);
879                 *target_event = CondSwapToBus32((quadlet_t)((in & 0x00FFFFFF) | 0x40000000));
880                 buffer++;
881                 target_event += m_dimension;
882             }
883
884         } else {
885             for (j = 0;j < nevents; j += 1)
886             {
887                 // hardcoded byte swapped
888                 *target_event = 0x00000040;
889                 target_event += m_dimension;
890             }
891         }
892     }
893 }
894
895 #else
896
897 /**
898  * @brief mux all audio ports to events
899  * @param data
900  * @param offset
901  * @param nevents
902  */
903 void
904 AmdtpTransmitStreamProcessor::encodeAudioPortsInt24(quadlet_t *data,
905                                                     unsigned int offset,
906                                                     unsigned int nevents)
907 {
908     unsigned int j;
909     quadlet_t *target_event;
910     int i;
911
912     for (i = 0; i < m_nb_audio_ports; i++) {
913         struct _MBLA_port_cache &p = m_audio_ports.at(i);
914         target_event = (quadlet_t *)(data + i);
915         assert(nevents + offset <= p.buffer_size );
916
917         if(likely(p.buffer && p.enabled)) {
918             quadlet_t *buffer = (quadlet_t *)(p.buffer);
919             buffer += offset;
920    
921             for (j = 0;j < nevents; j += 1)
922             {
923                 uint32_t in = (uint32_t)(*buffer);
924                 *target_event = CondSwapToBus32((quadlet_t)((in & 0x00FFFFFF) | 0x40000000));
925                 buffer++;
926                 target_event += m_dimension;
927             }
928         } else {
929             for (j = 0;j < nevents; j += 1)
930             {
931                 *target_event = CONDSWAPTOBUS32_CONST(0x40000000);
932                 target_event += m_dimension;
933             }
934         }
935     }
936 }
937
938 /**
939  * @brief mux all audio ports to events
940  * @param data
941  * @param offset
942  * @param nevents
943  */
944 void
945 AmdtpTransmitStreamProcessor::encodeAudioPortsFloat(quadlet_t *data,
946                                                     unsigned int offset,
947                                                     unsigned int nevents)
948 {
949     unsigned int j;
950     quadlet_t *target_event;
951     int i;
952
953     for (i = 0; i < m_nb_audio_ports; i++) {
954         struct _MBLA_port_cache &p = m_audio_ports.at(i);
955         target_event = (quadlet_t *)(data + i);
956         assert(nevents + offset <= p.buffer_size );
957
958         if(likely(p.buffer && p.enabled)) {
959             quadlet_t *buffer = (quadlet_t *)(p.buffer);
960             buffer += offset;
961    
962             for (j = 0;j < nevents; j += 1)
963             {
964                 float *in = (float *)buffer;
965 #if AMDTP_CLIP_FLOATS
966                 // clip directly to the value of a maxed event
967                 if(unlikely(*in > 1.0)) {
968                     *target_event = CONDSWAPTOBUS32_CONST(0x407FFFFF);
969                 } else if(unlikely(*in < -1.0)) {
970                     *target_event = CONDSWAPTOBUS32_CONST(0x40800001);
971                 } else {
972                     float v = (*in) * AMDTP_FLOAT_MULTIPLIER;
973                     unsigned int tmp = ((int) v);
974                     tmp = ( tmp & 0x00FFFFFF ) | 0x40000000;
975                     *target_event = CondSwapToBus32((quadlet_t)tmp);
976                 }
977 #else
978                 float v = (*in) * AMDTP_FLOAT_MULTIPLIER;
979                 unsigned int tmp = ((int) v);
980                 tmp = ( tmp & 0x00FFFFFF ) | 0x40000000;
981                 *target_event = CondSwapToBus32((quadlet_t)tmp);
982 #endif
983                 buffer++;
984                 target_event += m_dimension;
985             }
986         } else {
987             for (j = 0;j < nevents; j += 1)
988             {
989                 *target_event = CONDSWAPTOBUS32_CONST(0x40000000);
990                 target_event += m_dimension;
991             }
992         }
993     }
994 }
995 #endif
996
997 /**
998  * @brief encodes all midi ports in the cache to events (silence)
999  * @param data
1000  * @param offset
1001  * @param nevents
1002  */
1003 void
1004 AmdtpTransmitStreamProcessor::encodeMidiPortsSilence(quadlet_t *data,
1005                                                      unsigned int offset,
1006                                                      unsigned int nevents)
1007 {
1008     quadlet_t *target_event;
1009     int i;
1010     unsigned int j;
1011
1012     for (i = 0; i < m_nb_midi_ports; i++) {
1013         struct _MIDI_port_cache &p = m_midi_ports.at(i);
1014
1015         for (j = p.location;j < nevents; j += 8) {
1016             target_event = (quadlet_t *) (data + ((j * m_dimension) + p.position));
1017             *target_event = CondSwapToBus32(IEC61883_AM824_SET_LABEL(0, IEC61883_AM824_LABEL_MIDI_NO_DATA));
1018         }
1019     }
1020 }
1021
1022 /**
1023  * @brief encodes all midi ports in the cache to events
1024  * @param data
1025  * @param offset
1026  * @param nevents
1027  */
1028 void
1029 AmdtpTransmitStreamProcessor::encodeMidiPorts(quadlet_t *data,
1030                                               unsigned int offset,
1031                                               unsigned int nevents)
1032 {
1033     quadlet_t *target_event;
1034     int i;
1035     unsigned int j;
1036
1037     for (i = 0; i < m_nb_midi_ports; i++) {
1038         struct _MIDI_port_cache &p = m_midi_ports.at(i);
1039         if (p.buffer && p.enabled) {
1040             uint32_t *buffer = (quadlet_t *)(p.buffer);
1041             buffer += offset;
1042
1043             for (j = p.location;j < nevents; j += 8) {
1044                 target_event = (quadlet_t *) (data + ((j * m_dimension) + p.position));
1045
1046                 if ( *buffer & 0xFF000000 )   // we can send a byte
1047                 {
1048                     quadlet_t tmpval;
1049                     tmpval = ((*buffer)<<16) & 0x00FF0000;
1050                     tmpval = IEC61883_AM824_SET_LABEL(tmpval, IEC61883_AM824_LABEL_MIDI_1X);
1051                     tmpval = 0x817F0000;
1052                     *target_event = CondSwapToBus32(tmpval);
1053
1054                     debugOutputExtreme( DEBUG_LEVEL_VERBOSE, "MIDI port %s, pos=%u, loc=%u, nevents=%u, dim=%d\n",
1055                                p.port->getName().c_str(), p.position, p.location, nevents, m_dimension );
1056                     debugOutputExtreme( DEBUG_LEVEL_VERBOSE, "base=%p, target=%p, value=%08X\n",
1057                                data, target_event, tmpval );
1058                 } else {
1059                     // can't send a byte, either because there is no byte,
1060                     // or because this would exceed the maximum rate
1061                     // FIXME: this can be ifdef optimized since it's a constant
1062                     *target_event = CondSwapToBus32(IEC61883_AM824_SET_LABEL(0, IEC61883_AM824_LABEL_MIDI_NO_DATA));
1063                 }
1064                 buffer+=8;
1065             }
1066         } else {
1067             for (j = p.location;j < nevents; j += 8) {
1068                 target_event = (quadlet_t *)(data + ((j * m_dimension) + p.position));
1069                 __builtin_prefetch(target_event, 1, 0); // prefetch events for write, no temporal locality
1070                 *target_event = CondSwapToBus32(IEC61883_AM824_SET_LABEL(0, IEC61883_AM824_LABEL_MIDI_NO_DATA));
1071             }
1072         }
1073     }
1074 }
1075
1076 bool
1077 AmdtpTransmitStreamProcessor::initPortCache() {
1078     // make use of the fact that audio ports are the first ports in
1079     // the cluster as per AMDTP. so we can sort the ports by position
1080     // and have very efficient lookups:
1081     // m_float_ports.at(i).buffer -> audio stream i buffer
1082     // for midi ports we simply cache all port info since they are (usually) not
1083     // that numerous
1084     m_nb_audio_ports = 0;
1085     m_audio_ports.clear();
1086    
1087     m_nb_midi_ports = 0;
1088     m_midi_ports.clear();
1089    
1090     for(PortVectorIterator it = m_Ports.begin();
1091         it != m_Ports.end();
1092         ++it )
1093     {
1094         AmdtpPortInfo *pinfo=dynamic_cast<AmdtpPortInfo *>(*it);
1095         assert(pinfo); // this should not fail!!
1096
1097         switch( pinfo->getFormat() )
1098         {
1099             case AmdtpPortInfo::E_MBLA:
1100                 m_nb_audio_ports++;
1101                 break;
1102             case AmdtpPortInfo::E_SPDIF: // still unimplemented
1103                 break;
1104             case AmdtpPortInfo::E_Midi:
1105                 m_nb_midi_ports++;
1106                 break;
1107             default: // ignore
1108                 break;
1109         }
1110     }
1111
1112     int idx;
1113     for (idx = 0; idx < m_nb_audio_ports; idx++) {
1114         for(PortVectorIterator it = m_Ports.begin();
1115             it != m_Ports.end();
1116             ++it )
1117         {
1118             AmdtpPortInfo *pinfo=dynamic_cast<AmdtpPortInfo *>(*it);
1119             debugOutput(DEBUG_LEVEL_VERY_VERBOSE,
1120                         "idx %u: looking at port %s at position %u\n",
1121                         idx, (*it)->getName().c_str(), pinfo->getPosition());
1122             if(pinfo->getPosition() == (unsigned int)idx) {
1123                 struct _MBLA_port_cache p;
1124                 p.port = dynamic_cast<AmdtpAudioPort *>(*it);
1125                 if(p.port == NULL) {
1126                     debugError("Port is not an AmdtpAudioPort!\n");
1127                     return false;
1128                 }
1129                 p.buffer = NULL; // to be filled by updatePortCache
1130                 #ifdef DEBUG
1131                 p.buffer_size = (*it)->getBufferSize();
1132                 #endif
1133
1134                 m_audio_ports.push_back(p);
1135                 debugOutput(DEBUG_LEVEL_VERBOSE,
1136                             "Cached port %s at position %u\n",
1137                             p.port->getName().c_str(), idx);
1138                 goto next_index;
1139             }
1140         }
1141         debugError("No MBLA port found for position %d\n", idx);
1142         return false;
1143 next_index:
1144         continue;
1145     }
1146
1147     for(PortVectorIterator it = m_Ports.begin();
1148         it != m_Ports.end();
1149         ++it )
1150     {
1151         AmdtpPortInfo *pinfo=dynamic_cast<AmdtpPortInfo *>(*it);
1152         debugOutput(DEBUG_LEVEL_VERY_VERBOSE,
1153                     "idx %u: looking at port %s at position %u, location %u\n",
1154                     idx, (*it)->getName().c_str(), pinfo->getPosition(), pinfo->getLocation());
1155         if ((*it)->getPortType() == Port::E_Midi) {
1156             struct _MIDI_port_cache p;
1157             p.port = dynamic_cast<AmdtpMidiPort *>(*it);
1158             if(p.port == NULL) {
1159                 debugError("Port is not an AmdtpMidiPort!\n");
1160                 return false;
1161             }
1162             p.position = pinfo->getPosition();
1163             p.location = pinfo->getLocation();
1164             p.buffer = NULL; // to be filled by updatePortCache
1165             #ifdef DEBUG
1166             p.buffer_size = (*it)->getBufferSize();
1167             #endif
1168
1169             m_midi_ports.push_back(p);
1170             debugOutput(DEBUG_LEVEL_VERBOSE,
1171                         "Cached port %s at position %u, location %u\n",
1172                         p.port->getName().c_str(), p.position, p.location);
1173         }
1174     }
1175
1176     return true;
1177 }
1178
1179 void
1180 AmdtpTransmitStreamProcessor::updatePortCache() {
1181     int idx;
1182     for (idx = 0; idx < m_nb_audio_ports; idx++) {
1183         struct _MBLA_port_cache& p = m_audio_ports.at(idx);
1184         AmdtpAudioPort *port = p.port;
1185         p.buffer = port->getBufferAddress();
1186         p.enabled = !port->isDisabled();
1187     }
1188     for (idx = 0; idx < m_nb_midi_ports; idx++) {
1189         struct _MIDI_port_cache& p = m_midi_ports.at(idx);
1190         AmdtpMidiPort *port = p.port;
1191         p.buffer = port->getBufferAddress();
1192         p.enabled = !port->isDisabled();
1193     }
1194 }
1195
1196 } // end of namespace Streaming
Note: See TracBrowser for help on using the browser.