root/trunk/libffado/src/libstreaming/amdtp/AmdtpTransmitStreamProcessor.cpp

Revision 1531, 41.6 kB (checked in by ppalmers, 14 years ago)

svn merge -r 1506:HEAD svn+ssh://ffadosvn@ffado.org/ffado/branches/libffado-2.0

Line 
1 /*
2  * Copyright (C) 2005-2008 by Pieter Palmers
3  *
4  * This file is part of FFADO
5  * FFADO = Free Firewire (pro-)audio drivers for linux
6  *
7  * FFADO is based upon FreeBoB.
8  *
9  * This program is free software: you can redistribute it and/or modify
10  * it under the terms of the GNU General Public License as published by
11  * the Free Software Foundation, either version 2 of the License, or
12  * (at your option) version 3 of the License.
13  *
14  * This program is distributed in the hope that it will be useful,
15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17  * GNU General Public License for more details.
18  *
19  * You should have received a copy of the GNU General Public License
20  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
21  *
22  */
23
24 #include "config.h"
25
26 #include "AmdtpTransmitStreamProcessor.h"
27 #include "AmdtpPort.h"
28 #include "../StreamProcessorManager.h"
29 #include "devicemanager.h"
30
31 #include "libutil/Time.h"
32 #include "libutil/float_cast.h"
33
34 #include "libieee1394/ieee1394service.h"
35 #include "libieee1394/IsoHandlerManager.h"
36 #include "libieee1394/cycletimer.h"
37
38 #include "libutil/ByteSwap.h"
39 #include <assert.h>
40 #include <cstring>
41
42 #define likely(x)   __builtin_expect((x),1)
43 #define unlikely(x) __builtin_expect((x),0)
44
45 #define AMDTP_FLOAT_MULTIPLIER (1.0f * ((1<<23) - 1))
46 namespace Streaming
47 {
48
49 /* transmit */
50 AmdtpTransmitStreamProcessor::AmdtpTransmitStreamProcessor(FFADODevice &parent, int dimension)
51         : StreamProcessor(parent, ePT_Transmit)
52         , m_dimension( dimension )
53         , m_dbc( 0 )
54 #if AMDTP_ALLOW_PAYLOAD_IN_NODATA_XMIT
55         , m_send_nodata_payload ( AMDTP_SEND_PAYLOAD_IN_NODATA_XMIT_BY_DEFAULT )
56 #endif
57         , m_max_cycles_to_transmit_early ( AMDTP_MAX_CYCLES_TO_TRANSMIT_EARLY )
58         , m_transmit_transfer_delay ( AMDTP_TRANSMIT_TRANSFER_DELAY )
59         , m_min_cycles_before_presentation ( AMDTP_MIN_CYCLES_BEFORE_PRESENTATION )
60         , m_nb_audio_ports( 0 )
61         , m_nb_midi_ports( 0 )
62 {}
63
64 enum StreamProcessor::eChildReturnValue
65 AmdtpTransmitStreamProcessor::generatePacketHeader (
66     unsigned char *data, unsigned int *length,
67     unsigned char *tag, unsigned char *sy,
68     uint32_t pkt_ctr )
69 {
70     __builtin_prefetch(data, 1, 0); // prefetch events for write, no temporal locality
71     struct iec61883_packet *packet = (struct iec61883_packet *)data;
72     /* Our node ID can change after a bus reset, so it is best to fetch
73     * our node ID for each packet. */
74     packet->sid = m_local_node_id;
75
76     packet->dbs = m_dimension;
77     packet->fn = 0;
78     packet->qpc = 0;
79     packet->sph = 0;
80     packet->reserved = 0;
81     packet->dbc = m_dbc;
82     packet->eoh1 = 2;
83     packet->fmt = IEC61883_FMT_AMDTP;
84
85     *tag = IEC61883_TAG_WITH_CIP;
86     *sy = 0;
87
88     signed int fc;
89     uint64_t presentation_time;
90     unsigned int presentation_cycle;
91     int cycles_until_presentation;
92
93     uint64_t transmit_at_time;
94     unsigned int transmit_at_cycle;
95     int cycles_until_transmit;
96
97     debugOutputExtreme( DEBUG_LEVEL_ULTRA_VERBOSE,
98                         "Try for cycle %d\n", CYCLE_TIMER_GET_CYCLES(pkt_ctr) );
99     // check whether the packet buffer has packets for us to send.
100     // the base timestamp is the one of the next sample in the buffer
101     ffado_timestamp_t ts_head_tmp;
102     m_data_buffer->getBufferHeadTimestamp( &ts_head_tmp, &fc ); // thread safe
103
104     // the timestamp gives us the time at which we want the sample block
105     // to be output by the device
106     presentation_time = ( uint64_t ) ts_head_tmp;
107
108     // now we calculate the time when we have to transmit the sample block
109     transmit_at_time = substractTicks( presentation_time, m_transmit_transfer_delay );
110
111     // calculate the cycle this block should be presented in
112     // (this is just a virtual calculation since at that time it should
113     //  already be in the device's buffer)
114     presentation_cycle = ( unsigned int ) ( TICKS_TO_CYCLES ( presentation_time ) );
115
116     // calculate the cycle this block should be transmitted in
117     transmit_at_cycle = ( unsigned int ) ( TICKS_TO_CYCLES ( transmit_at_time ) );
118
119     // we can check whether this cycle is within the 'window' we have
120     // to send this packet.
121     // first calculate the number of cycles left before presentation time
122     cycles_until_presentation = diffCycles ( presentation_cycle, CYCLE_TIMER_GET_CYCLES(pkt_ctr) );
123
124     // we can check whether this cycle is within the 'window' we have
125     // to send this packet.
126     // first calculate the number of cycles left before presentation time
127     cycles_until_transmit = diffCycles ( transmit_at_cycle, CYCLE_TIMER_GET_CYCLES(pkt_ctr) );
128
129     // two different options:
130     // 1) there are not enough frames for one packet
131     //      => determine wether this is a problem, since we might still
132     //         have some time to send it
133     // 2) there are enough packets
134     //      => determine whether we have to send them in this packet
135     if ( fc < ( signed int ) m_syt_interval )
136     {
137         // not enough frames in the buffer,
138
139         // we can still postpone the queueing of the packets
140         // if we are far enough ahead of the presentation time
141         if ( cycles_until_presentation <= m_min_cycles_before_presentation )
142         {
143             debugOutput( DEBUG_LEVEL_NORMAL,
144                          "Insufficient frames (P): N=%02d, CY=%04u, TC=%04u, CUT=%04d\n",
145                          fc, CYCLE_TIMER_GET_CYCLES(pkt_ctr),
146                          transmit_at_cycle, cycles_until_transmit );
147             // we are too late
148             return eCRV_XRun;
149         }
150         else
151         {
152             #if DEBUG_EXTREME
153             unsigned int now_cycle = ( unsigned int ) ( TICKS_TO_CYCLES ( m_1394service.getCycleTimerTicks() ) );
154
155             debugOutputExtreme(DEBUG_LEVEL_VERBOSE,
156                                "Insufficient frames (NP): N=%02d, CY=%04u, TC=%04u, CUT=%04d, NOW=%04d\n",
157                                fc, CYCLE_TIMER_GET_CYCLES(pkt_ctr),
158                                transmit_at_cycle, cycles_until_transmit, now_cycle );
159             #endif
160
161             // there is still time left to send the packet
162             // we want the system to give this packet another go at a later time instant
163             return eCRV_Again; // note that the raw1394 again system doesn't work as expected
164
165             // we could wait here for a certain time before trying again. However, this
166             // is not going to work since we then block the iterator thread, hence also
167             // the receiving code, meaning that we are not processing received packets,
168             // and hence there is no progression in the number of frames available.
169
170             // for example:
171             // SleepRelativeUsec(125); // one cycle
172             // goto try_block_of_frames;
173
174             // or more advanced, calculate how many cycles we are ahead of 'now' and
175             // base the sleep on that.
176
177             // note that this requires that there is one thread for each IsoHandler,
178             // otherwise we're in the deadlock described above.
179         }
180     }
181     else
182     {
183         // there are enough frames, so check the time they are intended for
184         // all frames have a certain 'time window' in which they can be sent
185         // this corresponds to the range of the timestamp mechanism:
186         // we can send a packet 15 cycles in advance of the 'presentation time'
187         // in theory we can send the packet up till one cycle before the presentation time,
188         // however this is not very smart.
189
190         // There are 3 options:
191         // 1) the frame block is too early
192         //      => send an empty packet
193         // 2) the frame block is within the window
194         //      => send it
195         // 3) the frame block is too late
196         //      => discard (and raise xrun?)
197         //         get next block of frames and repeat
198
199         if(cycles_until_transmit < 0)
200         {
201             // we are too late
202             debugOutput(DEBUG_LEVEL_VERBOSE,
203                         "Too late: CY=%04u, TC=%04u, CUT=%04d, TSP=%011llu (%04u)\n",
204                         CYCLE_TIMER_GET_CYCLES(pkt_ctr),
205                         transmit_at_cycle, cycles_until_transmit,
206                         presentation_time, (unsigned int)TICKS_TO_CYCLES(presentation_time) );
207             //debugShowBackLogLines(200);
208             // however, if we can send this sufficiently before the presentation
209             // time, it could be harmless.
210             // NOTE: dangerous since the device has no way of reporting that it didn't get
211             //       this packet on time.
212             if(cycles_until_presentation >= m_min_cycles_before_presentation)
213             {
214                 // we are not that late and can still try to transmit the packet
215                 m_dbc += fillDataPacketHeader(packet, length, presentation_time);
216                 m_last_timestamp = presentation_time;
217                 return (fc < (signed)(2*m_syt_interval) ? eCRV_Defer : eCRV_Packet);
218             }
219             else   // definitely too late
220             {
221                 return eCRV_XRun;
222             }
223         }
224         else if(cycles_until_transmit <= m_max_cycles_to_transmit_early)
225         {
226             // it's time send the packet
227             m_dbc += fillDataPacketHeader(packet, length, presentation_time);
228             m_last_timestamp = presentation_time;
229
230             // for timestamp tracing
231             debugOutputExtreme(DEBUG_LEVEL_VERY_VERBOSE,
232                                "XMIT PKT: TSP= %011llu (%04u) (%04u) (%04u)\n",
233                                presentation_time,
234                                (unsigned int)CYCLE_TIMER_GET_CYCLES(pkt_ctr),
235                                presentation_cycle, transmit_at_cycle);
236
237             return (fc < (signed)(m_syt_interval) ? eCRV_Defer : eCRV_Packet);
238         }
239         else
240         {
241             debugOutputExtreme(DEBUG_LEVEL_VERY_VERBOSE,
242                                "Too early: CY=%04u, TC=%04u, CUT=%04d, TST=%011llu (%04u), TSP=%011llu (%04u)\n",
243                                CYCLE_TIMER_GET_CYCLES(pkt_ctr),
244                                transmit_at_cycle, cycles_until_transmit,
245                                transmit_at_time, (unsigned int)TICKS_TO_CYCLES(transmit_at_time),
246                                presentation_time, (unsigned int)TICKS_TO_CYCLES(presentation_time));
247 #ifdef DEBUG
248             if ( cycles_until_transmit > m_max_cycles_to_transmit_early + 1 )
249             {
250                 debugOutputExtreme(DEBUG_LEVEL_VERY_VERBOSE,
251                                    "Way too early: CY=%04u, TC=%04u, CUT=%04d, TST=%011llu (%04u), TSP=%011llu (%04u)\n",
252                                    CYCLE_TIMER_GET_CYCLES(pkt_ctr),
253                                    transmit_at_cycle, cycles_until_transmit,
254                                    transmit_at_time, (unsigned int)TICKS_TO_CYCLES(transmit_at_time),
255                                    presentation_time, (unsigned int)TICKS_TO_CYCLES(presentation_time));
256             }
257 #endif
258             // we are too early, send only an empty packet
259             return eCRV_EmptyPacket;
260         }
261     }
262     return eCRV_Invalid;
263 }
264
265 enum StreamProcessor::eChildReturnValue
266 AmdtpTransmitStreamProcessor::generatePacketData (
267     unsigned char *data, unsigned int *length )
268 {
269     if (m_data_buffer->readFrames(m_syt_interval, (char *)(data + 8)))
270     {
271         debugOutputExtreme(DEBUG_LEVEL_VERY_VERBOSE,
272                            "XMIT DATA: TSP= %011llu (%04u)\n",
273                            m_last_timestamp,
274                            (unsigned int)TICKS_TO_CYCLES(m_last_timestamp));
275         return eCRV_OK;
276     }
277     else return eCRV_XRun;
278 }
279
280 enum StreamProcessor::eChildReturnValue
281 AmdtpTransmitStreamProcessor::generateSilentPacketHeader (
282     unsigned char *data, unsigned int *length,
283     unsigned char *tag, unsigned char *sy,
284     uint32_t pkt_ctr )
285 {
286     struct iec61883_packet *packet = ( struct iec61883_packet * ) data;
287     debugOutputExtreme(DEBUG_LEVEL_ULTRA_VERBOSE,
288                        "XMIT SILENT (cy %04d): CY=%04u, TSP=%011llu (%04u)\n",
289                        CYCLE_TIMER_GET_CYCLES(pkt_ctr), m_last_timestamp,
290                        (unsigned int)TICKS_TO_CYCLES(m_last_timestamp));
291
292     packet->sid = m_local_node_id;
293
294     packet->dbs = m_dimension;
295     packet->fn = 0;
296     packet->qpc = 0;
297     packet->sph = 0;
298     packet->reserved = 0;
299     packet->dbc = m_dbc;
300     packet->eoh1 = 2;
301     packet->fmt = IEC61883_FMT_AMDTP;
302
303     *tag = IEC61883_TAG_WITH_CIP;
304     *sy = 0;
305
306     m_dbc += fillNoDataPacketHeader(packet, length);
307     return eCRV_Packet;
308 }
309
310 enum StreamProcessor::eChildReturnValue
311 AmdtpTransmitStreamProcessor::generateSilentPacketData (
312     unsigned char *data, unsigned int *length )
313 {
314     return eCRV_OK; // no need to do anything
315 }
316
317 enum StreamProcessor::eChildReturnValue
318 AmdtpTransmitStreamProcessor::generateEmptyPacketHeader (
319     unsigned char *data, unsigned int *length,
320     unsigned char *tag, unsigned char *sy,
321     uint32_t pkt_ctr )
322 {
323     struct iec61883_packet *packet = ( struct iec61883_packet * ) data;
324     debugOutputExtreme(DEBUG_LEVEL_ULTRA_VERBOSE,
325                        "XMIT EMPTY (cy %04d): CY=%04u, TSP=%011llu (%04u)\n",
326                        CYCLE_TIMER_GET_CYCLES(pkt_ctr), m_last_timestamp,
327                        (unsigned int)TICKS_TO_CYCLES(m_last_timestamp) );
328     packet->sid = m_local_node_id;
329
330     packet->dbs = m_dimension;
331     packet->fn = 0;
332     packet->qpc = 0;
333     packet->sph = 0;
334     packet->reserved = 0;
335     packet->dbc = m_dbc;
336     packet->eoh1 = 2;
337     packet->fmt = IEC61883_FMT_AMDTP;
338
339     *tag = IEC61883_TAG_WITH_CIP;
340     *sy = 0;
341
342     m_dbc += fillNoDataPacketHeader(packet, length);
343     return eCRV_OK;
344 }
345
346 enum StreamProcessor::eChildReturnValue
347 AmdtpTransmitStreamProcessor::generateEmptyPacketData (
348     unsigned char *data, unsigned int *length )
349 {
350     return eCRV_OK; // no need to do anything
351 }
352
353 unsigned int AmdtpTransmitStreamProcessor::fillDataPacketHeader (
354     struct iec61883_packet *packet, unsigned int* length,
355     uint32_t ts )
356 {
357
358     packet->fdf = m_fdf;
359
360     // convert the timestamp to SYT format
361     uint16_t timestamp_SYT = TICKS_TO_SYT ( ts );
362     packet->syt = CondSwapToBus16 ( timestamp_SYT );
363
364     // FIXME: use a precomputed value here
365     *length = m_syt_interval*sizeof ( quadlet_t ) *m_dimension + 8;
366
367     return m_syt_interval;
368 }
369
370 unsigned int AmdtpTransmitStreamProcessor::fillNoDataPacketHeader (
371     struct iec61883_packet *packet, unsigned int* length )
372 {
373     // no-data packets have syt=0xFFFF
374     // and (can) have the usual amount of events as dummy data
375     // DBC is not increased
376     packet->fdf = IEC61883_FDF_NODATA;
377     packet->syt = 0xffff;
378
379 #if AMDTP_ALLOW_PAYLOAD_IN_NODATA_XMIT
380     if ( m_send_nodata_payload )
381     { // no-data packets with payload (NOTE: DICE-II doesn't like that)
382         *length = 2*sizeof ( quadlet_t ) + m_syt_interval * m_dimension * sizeof ( quadlet_t );
383         return m_syt_interval;
384     } else { // no-data packets without payload
385         *length = 2*sizeof ( quadlet_t );
386         return 0;
387     }
388 #else
389     // no-data packets without payload
390     *length = 2*sizeof ( quadlet_t );
391     return 0;
392 #endif
393 }
394
395 unsigned int
396 AmdtpTransmitStreamProcessor::getSytInterval() {
397     switch (m_StreamProcessorManager.getNominalRate()) {
398         case 32000:
399         case 44100:
400         case 48000:
401             return 8;
402         case 88200:
403         case 96000:
404             return 16;
405         case 176400:
406         case 192000:
407             return 32;
408         default:
409             debugError("Unsupported rate: %d\n", m_StreamProcessorManager.getNominalRate());
410             return 0;
411     }
412 }
413
414 unsigned int
415 AmdtpTransmitStreamProcessor::getFDF() {
416     switch (m_StreamProcessorManager.getNominalRate()) {
417         case 32000: return IEC61883_FDF_SFC_32KHZ;
418         case 44100: return IEC61883_FDF_SFC_44K1HZ;
419         case 48000: return IEC61883_FDF_SFC_48KHZ;
420         case 88200: return IEC61883_FDF_SFC_88K2HZ;
421         case 96000: return IEC61883_FDF_SFC_96KHZ;
422         case 176400: return IEC61883_FDF_SFC_176K4HZ;
423         case 192000: return IEC61883_FDF_SFC_192KHZ;
424         default:
425             debugError("Unsupported rate: %d\n", m_StreamProcessorManager.getNominalRate());
426             return 0;
427     }
428 }
429
430 bool AmdtpTransmitStreamProcessor::prepareChild()
431 {
432     debugOutput ( DEBUG_LEVEL_VERBOSE, "Preparing (%p)...\n", this );
433     m_syt_interval = getSytInterval();
434     m_fdf = getFDF();
435
436     debugOutput ( DEBUG_LEVEL_VERBOSE, " SYT interval / FDF             : %d / %d\n", m_syt_interval, m_fdf );
437 #if AMDTP_ALLOW_PAYLOAD_IN_NODATA_XMIT
438     debugOutput ( DEBUG_LEVEL_VERBOSE, " Send payload in No-Data packets: %s \n", m_send_nodata_payload?"Yes":"No" );
439 #endif
440     debugOutput ( DEBUG_LEVEL_VERBOSE, " Max early transmit cycles      : %d\n", m_max_cycles_to_transmit_early );
441     debugOutput ( DEBUG_LEVEL_VERBOSE, " Transfer delay                 : %d\n", m_transmit_transfer_delay );
442     debugOutput ( DEBUG_LEVEL_VERBOSE, " Min cycles before presentation : %d\n", m_min_cycles_before_presentation );
443
444     iec61883_cip_init (
445         &m_cip_status,
446         IEC61883_FMT_AMDTP,
447         m_fdf,
448         m_StreamProcessorManager.getNominalRate(),
449         m_dimension,
450         m_syt_interval );
451
452     if (!initPortCache()) {
453         debugError("Could not init port cache\n");
454         return false;
455     }
456
457     return true;
458 }
459
460 /*
461 * compose the event streams for the packets from the port buffers
462 */
463 bool AmdtpTransmitStreamProcessor::processWriteBlock ( char *data,
464         unsigned int nevents, unsigned int offset )
465 {
466     // update the variable parts of the cache
467     updatePortCache();
468
469     // encode audio data
470     switch(m_StreamProcessorManager.getAudioDataType()) {
471         case StreamProcessorManager::eADT_Int24:
472             encodeAudioPortsInt24((quadlet_t *)data, offset, nevents);
473             break;
474         case StreamProcessorManager::eADT_Float:
475             encodeAudioPortsFloat((quadlet_t *)data, offset, nevents);
476             break;
477     }
478
479     // do midi ports
480     encodeMidiPorts((quadlet_t *)data, offset, nevents);
481     return true;
482 }
483
484 bool
485 AmdtpTransmitStreamProcessor::transmitSilenceBlock(
486     char *data, unsigned int nevents, unsigned int offset)
487 {
488     // no need to update the port cache when transmitting silence since
489     // no dynamic values are used to do so.
490     encodeAudioPortsSilence((quadlet_t *)data, offset, nevents);
491     encodeMidiPortsSilence((quadlet_t *)data, offset, nevents);
492     return true;
493 }
494
495 /**
496  * @brief encodes all audio ports in the cache to events (silent data)
497  * @param data
498  * @param offset
499  * @param nevents
500  */
501 void
502 AmdtpTransmitStreamProcessor::encodeAudioPortsSilence(quadlet_t *data,
503                                                       unsigned int offset,
504                                                       unsigned int nevents)
505 {
506     unsigned int j;
507     quadlet_t *target_event;
508     int i;
509
510     for (i = 0; i < m_nb_audio_ports; i++) {
511         target_event = (quadlet_t *)(data + i);
512
513         for (j = 0;j < nevents; j += 1)
514         {
515             *target_event = CONDSWAPTOBUS32_CONST(0x40000000);
516             target_event += m_dimension;
517         }
518     }
519 }
520
521 #ifdef __SSE2__
522 #include <emmintrin.h>
523 #warning SSE2 build
524
525 /**
526  * @brief mux all audio ports to events
527  * @param data
528  * @param offset
529  * @param nevents
530  */
531 void
532 AmdtpTransmitStreamProcessor::encodeAudioPortsFloat(quadlet_t *data,
533                                                     unsigned int offset,
534                                                     unsigned int nevents)
535 {
536     unsigned int j;
537     quadlet_t *target_event;
538     int i;
539
540     float * client_buffers[4];
541     float tmp_values[4] __attribute__ ((aligned (16)));
542     uint32_t tmp_values_int[4] __attribute__ ((aligned (16)));
543
544     // prepare the scratch buffer
545     assert(m_scratch_buffer_size_bytes > nevents * 4);
546     memset(m_scratch_buffer, 0, nevents * 4);
547
548     const __m128i label = _mm_set_epi32 (0x40000000, 0x40000000, 0x40000000, 0x40000000);
549     const __m128i mask = _mm_set_epi32 (0x00FFFFFF, 0x00FFFFFF, 0x00FFFFFF, 0x00FFFFFF);
550     const __m128 mult = _mm_set_ps(AMDTP_FLOAT_MULTIPLIER, AMDTP_FLOAT_MULTIPLIER, AMDTP_FLOAT_MULTIPLIER, AMDTP_FLOAT_MULTIPLIER);
551
552 #if AMDTP_CLIP_FLOATS
553     const __m128 v_max = _mm_set_ps(1.0, 1.0, 1.0, 1.0);
554     const __m128 v_min = _mm_set_ps(-1.0, -1.0, -1.0, -1.0);
555 #endif
556
557     // this assumes that audio ports are sorted by position,
558     // and that there are no gaps
559     for (i = 0; i < ((int)m_nb_audio_ports)-4; i += 4) {
560         struct _MBLA_port_cache *p;
561
562         // get the port buffers
563         for (j=0; j<4; j++) {
564             p = &(m_audio_ports.at(i+j));
565             if(likely(p->buffer && p->enabled)) {
566                 client_buffers[j] = (float *) p->buffer;
567                 client_buffers[j] += offset;
568             } else {
569                 // if a port is disabled or has no valid
570                 // buffer, use the scratch buffer (all zero's)
571                 client_buffers[j] = (float *) m_scratch_buffer;
572             }
573         }
574
575         // the base event for this position
576         target_event = (quadlet_t *)(data + i);
577         // process the events
578         for (j=0;j < nevents; j += 1)
579         {
580             // read the values
581             tmp_values[0] = *(client_buffers[0]);
582             tmp_values[1] = *(client_buffers[1]);
583             tmp_values[2] = *(client_buffers[2]);
584             tmp_values[3] = *(client_buffers[3]);
585
586             // now do the SSE based conversion/labeling
587             __m128 v_float = *((__m128*)tmp_values);
588             __m128i *target = (__m128i*)target_event;
589             __m128i v_int;
590
591             // clip
592 #if AMDTP_CLIP_FLOATS
593             // do SSE clipping
594             v_float = _mm_max_ps(v_float, v_min);
595             v_float = _mm_min_ps(v_float, v_max);
596 #endif
597
598             // multiply
599             v_float = _mm_mul_ps(v_float, mult);
600             // convert to signed integer
601             v_int = _mm_cvttps_epi32( v_float );
602             // mask
603             v_int = _mm_and_si128( v_int, mask );
604             // label it
605             v_int = _mm_or_si128( v_int, label );
606
607             // do endian conversion (SSE is always little endian)
608             // do first swap
609             v_int = _mm_or_si128( _mm_slli_epi16( v_int, 8 ), _mm_srli_epi16( v_int, 8 ) );
610             // do second swap
611             v_int = _mm_or_si128( _mm_slli_epi32( v_int, 16 ), _mm_srli_epi32( v_int, 16 ) );
612             // store the packed int
613             // (target misalignment is assumed since we don't know the m_dimension)
614             _mm_storeu_si128 (target, v_int);
615
616             // increment the buffer pointers
617             client_buffers[0]++;
618             client_buffers[1]++;
619             client_buffers[2]++;
620             client_buffers[3]++;
621
622             // go to next target event position
623             target_event += m_dimension;
624         }
625     }
626
627     // do remaining ports
628     // NOTE: these can be time-SSE'd
629     for (; i < (int)m_nb_audio_ports; i++) {
630         struct _MBLA_port_cache &p = m_audio_ports.at(i);
631         target_event = (quadlet_t *)(data + i);
632         assert(nevents + offset <= p.buffer_size );
633
634         if(likely(p.buffer && p.enabled)) {
635             float *buffer = (float *)(p.buffer);
636             buffer += offset;
637    
638             for (j = 0;j < nevents; j += 4)
639             {
640                 // read the values
641                 tmp_values[0] = *buffer;
642                 buffer++;
643                 tmp_values[1] = *buffer;
644                 buffer++;
645                 tmp_values[2] = *buffer;
646                 buffer++;
647                 tmp_values[3] = *buffer;
648                 buffer++;
649
650                 // now do the SSE based conversion/labeling
651                 __m128 v_float = *((__m128*)tmp_values);
652                 __m128i v_int;
653
654 #if AMDTP_CLIP_FLOATS
655                 // do SSE clipping
656                 v_float = _mm_max_ps(v_float, v_min);
657                 v_float = _mm_min_ps(v_float, v_max);
658 #endif
659                 // multiply
660                 v_float = _mm_mul_ps(v_float, mult);
661                 // convert to signed integer
662                 v_int = _mm_cvttps_epi32( v_float );
663                 // mask
664                 v_int = _mm_and_si128( v_int, mask );
665                 // label it
666                 v_int = _mm_or_si128( v_int, label );
667    
668                 // do endian conversion (SSE is always little endian)
669                 // do first swap
670                 v_int = _mm_or_si128( _mm_slli_epi16( v_int, 8 ), _mm_srli_epi16( v_int, 8 ) );
671                 // do second swap
672                 v_int = _mm_or_si128( _mm_slli_epi32( v_int, 16 ), _mm_srli_epi32( v_int, 16 ) );
673
674                 // store the packed int
675                 _mm_store_si128 ((__m128i *)(&tmp_values_int), v_int);
676
677                 // increment the buffer pointers
678                 *target_event = tmp_values_int[0];
679                 target_event += m_dimension;
680                 *target_event = tmp_values_int[1];
681                 target_event += m_dimension;
682                 *target_event = tmp_values_int[2];
683                 target_event += m_dimension;
684                 *target_event = tmp_values_int[3];
685                 target_event += m_dimension;
686             }
687
688             // do the remainder of the events
689             for(;j < nevents; j += 1) {
690                 float *in = (float *)buffer;
691 #if AMDTP_CLIP_FLOATS
692                 // clip directly to the value of a maxed event
693                 if(unlikely(*in > 1.0)) {
694                     *target_event = CONDSWAPTOBUS32_CONST(0x407FFFFF);
695                 } else if(unlikely(*in < -1.0)) {
696                     *target_event = CONDSWAPTOBUS32_CONST(0x40800001);
697                 } else {
698                     float v = (*in) * AMDTP_FLOAT_MULTIPLIER;
699                     unsigned int tmp = ((int) v);
700                     tmp = ( tmp & 0x00FFFFFF ) | 0x40000000;
701                     *target_event = CondSwapToBus32((quadlet_t)tmp);
702                 }
703 #else
704                 float v = (*in) * AMDTP_FLOAT_MULTIPLIER;
705                 unsigned int tmp = ((int) v);
706                 tmp = ( tmp & 0x00FFFFFF ) | 0x40000000;
707                 *target_event = CondSwapToBus32((quadlet_t)tmp);
708 #endif
709                 buffer++;
710                 target_event += m_dimension;
711             }
712
713         } else {
714             for (j = 0;j < nevents; j += 1)
715             {
716                 // hardcoded byte swapped
717                 *target_event = 0x00000040;
718                 target_event += m_dimension;
719             }
720         }
721     }
722 }
723
724
725 /**
726  * @brief mux all audio ports to events
727  * @param data
728  * @param offset
729  * @param nevents
730  */
731 void
732 AmdtpTransmitStreamProcessor::encodeAudioPortsInt24(quadlet_t *data,
733                                                     unsigned int offset,
734                                                     unsigned int nevents)
735 {
736     unsigned int j;
737     quadlet_t *target_event;
738     int i;
739
740     uint32_t *client_buffers[4];
741     uint32_t tmp_values[4] __attribute__ ((aligned (16)));
742
743     // prepare the scratch buffer
744     assert(m_scratch_buffer_size_bytes > nevents * 4);
745     memset(m_scratch_buffer, 0, nevents * 4);
746
747     const __m128i label = _mm_set_epi32 (0x40000000, 0x40000000, 0x40000000, 0x40000000);
748     const __m128i mask  = _mm_set_epi32 (0x00FFFFFF, 0x00FFFFFF, 0x00FFFFFF, 0x00FFFFFF);
749
750     // this assumes that audio ports are sorted by position,
751     // and that there are no gaps
752     for (i = 0; i < ((int)m_nb_audio_ports)-4; i += 4) {
753         struct _MBLA_port_cache *p;
754
755         // get the port buffers
756         for (j=0; j<4; j++) {
757             p = &(m_audio_ports.at(i+j));
758             if(likely(p->buffer && p->enabled)) {
759                 client_buffers[j] = (uint32_t *) p->buffer;
760                 client_buffers[j] += offset;
761             } else {
762                 // if a port is disabled or has no valid
763                 // buffer, use the scratch buffer (all zero's)
764                 client_buffers[j] = (uint32_t *) m_scratch_buffer;
765             }
766         }
767
768         // the base event for this position
769         target_event = (quadlet_t *)(data + i);
770
771         // process the events
772         for (j=0;j < nevents; j += 1)
773         {
774             // read the values
775             tmp_values[0] = *(client_buffers[0]);
776             tmp_values[1] = *(client_buffers[1]);
777             tmp_values[2] = *(client_buffers[2]);
778             tmp_values[3] = *(client_buffers[3]);
779
780             // now do the SSE based conversion/labeling
781             __m128i *target = (__m128i*)target_event;
782             __m128i v_int = *((__m128i*)tmp_values);;
783
784             // mask
785             v_int = _mm_and_si128( v_int, mask );
786             // label it
787             v_int = _mm_or_si128( v_int, label );
788
789             // do endian conversion (SSE is always little endian)
790             // do first swap
791             v_int = _mm_or_si128( _mm_slli_epi16( v_int, 8 ), _mm_srli_epi16( v_int, 8 ) );
792             // do second swap
793             v_int = _mm_or_si128( _mm_slli_epi32( v_int, 16 ), _mm_srli_epi32( v_int, 16 ) );
794
795             // store the packed int
796             // (target misalignment is assumed since we don't know the m_dimension)
797             _mm_storeu_si128 (target, v_int);
798
799             // increment the buffer pointers
800             client_buffers[0]++;
801             client_buffers[1]++;
802             client_buffers[2]++;
803             client_buffers[3]++;
804
805             // go to next target event position
806             target_event += m_dimension;
807         }
808     }
809
810     // do remaining ports
811     // NOTE: these can be time-SSE'd
812     for (; i < ((int)m_nb_audio_ports); i++) {
813         struct _MBLA_port_cache &p = m_audio_ports.at(i);
814         target_event = (quadlet_t *)(data + i);
815         assert(nevents + offset <= p.buffer_size );
816
817         if(likely(p.buffer && p.enabled)) {
818             uint32_t *buffer = (uint32_t *)(p.buffer);
819             buffer += offset;
820    
821             for (j = 0;j < nevents; j += 4)
822             {
823                 // read the values
824                 tmp_values[0] = *buffer;
825                 buffer++;
826                 tmp_values[1] = *buffer;
827                 buffer++;
828                 tmp_values[2] = *buffer;
829                 buffer++;
830                 tmp_values[3] = *buffer;
831                 buffer++;
832
833                 // now do the SSE based conversion/labeling
834                 __m128i v_int = *((__m128i*)tmp_values);;
835
836                 // mask
837                 v_int = _mm_and_si128( v_int, mask );
838                 // label it
839                 v_int = _mm_or_si128( v_int, label );
840
841                 // do endian conversion (SSE is always little endian)
842                 // do first swap
843                 v_int = _mm_or_si128( _mm_slli_epi16( v_int, 8 ), _mm_srli_epi16( v_int, 8 ) );
844                 // do second swap
845                 v_int = _mm_or_si128( _mm_slli_epi32( v_int, 16 ), _mm_srli_epi32( v_int, 16 ) );
846
847                 // store the packed int
848                 _mm_store_si128 ((__m128i *)(&tmp_values), v_int);
849
850                 // increment the buffer pointers
851                 *target_event = tmp_values[0];
852                 target_event += m_dimension;
853                 *target_event = tmp_values[1];
854                 target_event += m_dimension;
855                 *target_event = tmp_values[2];
856                 target_event += m_dimension;
857                 *target_event = tmp_values[3];
858                 target_event += m_dimension;
859             }
860
861             // do the remainder of the events
862             for(;j < nevents; j += 1) {
863                 uint32_t in = (uint32_t)(*buffer);
864                 *target_event = CondSwapToBus32((quadlet_t)((in & 0x00FFFFFF) | 0x40000000));
865                 buffer++;
866                 target_event += m_dimension;
867             }
868
869         } else {
870             for (j = 0;j < nevents; j += 1)
871             {
872                 // hardcoded byte swapped
873                 *target_event = 0x00000040;
874                 target_event += m_dimension;
875             }
876         }
877     }
878 }
879
880 #else
881
882 /**
883  * @brief mux all audio ports to events
884  * @param data
885  * @param offset
886  * @param nevents
887  */
888 void
889 AmdtpTransmitStreamProcessor::encodeAudioPortsInt24(quadlet_t *data,
890                                                     unsigned int offset,
891                                                     unsigned int nevents)
892 {
893     unsigned int j;
894     quadlet_t *target_event;
895     int i;
896
897     for (i = 0; i < m_nb_audio_ports; i++) {
898         struct _MBLA_port_cache &p = m_audio_ports.at(i);
899         target_event = (quadlet_t *)(data + i);
900         assert(nevents + offset <= p.buffer_size );
901
902         if(likely(p.buffer && p.enabled)) {
903             quadlet_t *buffer = (quadlet_t *)(p.buffer);
904             buffer += offset;
905    
906             for (j = 0;j < nevents; j += 1)
907             {
908                 uint32_t in = (uint32_t)(*buffer);
909                 *target_event = CondSwapToBus32((quadlet_t)((in & 0x00FFFFFF) | 0x40000000));
910                 buffer++;
911                 target_event += m_dimension;
912             }
913         } else {
914             for (j = 0;j < nevents; j += 1)
915             {
916                 *target_event = CONDSWAPTOBUS32_CONST(0x40000000);
917                 target_event += m_dimension;
918             }
919         }
920     }
921 }
922
923 /**
924  * @brief mux all audio ports to events
925  * @param data
926  * @param offset
927  * @param nevents
928  */
929 void
930 AmdtpTransmitStreamProcessor::encodeAudioPortsFloat(quadlet_t *data,
931                                                     unsigned int offset,
932                                                     unsigned int nevents)
933 {
934     unsigned int j;
935     quadlet_t *target_event;
936     int i;
937
938     for (i = 0; i < m_nb_audio_ports; i++) {
939         struct _MBLA_port_cache &p = m_audio_ports.at(i);
940         target_event = (quadlet_t *)(data + i);
941         assert(nevents + offset <= p.buffer_size );
942
943         if(likely(p.buffer && p.enabled)) {
944             quadlet_t *buffer = (quadlet_t *)(p.buffer);
945             buffer += offset;
946    
947             for (j = 0;j < nevents; j += 1)
948             {
949                 float *in = (float *)buffer;
950 #if AMDTP_CLIP_FLOATS
951                 // clip directly to the value of a maxed event
952                 if(unlikely(*in > 1.0)) {
953                     *target_event = CONDSWAPTOBUS32_CONST(0x407FFFFF);
954                 } else if(unlikely(*in < -1.0)) {
955                     *target_event = CONDSWAPTOBUS32_CONST(0x40800001);
956                 } else {
957                     float v = (*in) * AMDTP_FLOAT_MULTIPLIER;
958                     unsigned int tmp = ((int) v);
959                     tmp = ( tmp & 0x00FFFFFF ) | 0x40000000;
960                     *target_event = CondSwapToBus32((quadlet_t)tmp);
961                 }
962 #else
963                 float v = (*in) * AMDTP_FLOAT_MULTIPLIER;
964                 unsigned int tmp = ((int) v);
965                 tmp = ( tmp & 0x00FFFFFF ) | 0x40000000;
966                 *target_event = CondSwapToBus32((quadlet_t)tmp);
967 #endif
968                 buffer++;
969                 target_event += m_dimension;
970             }
971         } else {
972             for (j = 0;j < nevents; j += 1)
973             {
974                 *target_event = CONDSWAPTOBUS32_CONST(0x40000000);
975                 target_event += m_dimension;
976             }
977         }
978     }
979 }
980 #endif
981
982 /**
983  * @brief encodes all midi ports in the cache to events (silence)
984  * @param data
985  * @param offset
986  * @param nevents
987  */
988 void
989 AmdtpTransmitStreamProcessor::encodeMidiPortsSilence(quadlet_t *data,
990                                                      unsigned int offset,
991                                                      unsigned int nevents)
992 {
993     quadlet_t *target_event;
994     int i;
995     unsigned int j;
996
997     for (i = 0; i < m_nb_midi_ports; i++) {
998         struct _MIDI_port_cache &p = m_midi_ports.at(i);
999
1000         for (j = p.location;j < nevents; j += 8) {
1001             target_event = (quadlet_t *) (data + ((j * m_dimension) + p.position));
1002             *target_event = CondSwapToBus32(IEC61883_AM824_SET_LABEL(0, IEC61883_AM824_LABEL_MIDI_NO_DATA));
1003         }
1004     }
1005 }
1006
1007 /**
1008  * @brief encodes all midi ports in the cache to events
1009  * @param data
1010  * @param offset
1011  * @param nevents
1012  */
1013 void
1014 AmdtpTransmitStreamProcessor::encodeMidiPorts(quadlet_t *data,
1015                                               unsigned int offset,
1016                                               unsigned int nevents)
1017 {
1018     quadlet_t *target_event;
1019     int i;
1020     unsigned int j;
1021
1022     for (i = 0; i < m_nb_midi_ports; i++) {
1023         struct _MIDI_port_cache &p = m_midi_ports.at(i);
1024         if (p.buffer && p.enabled) {
1025             uint32_t *buffer = (quadlet_t *)(p.buffer);
1026             buffer += offset;
1027
1028             for (j = p.location;j < nevents; j += 8) {
1029                 target_event = (quadlet_t *) (data + ((j * m_dimension) + p.position));
1030
1031                 if ( *buffer & 0xFF000000 )   // we can send a byte
1032                 {
1033                     quadlet_t tmpval;
1034                     tmpval = ((*buffer)<<16) & 0x00FF0000;
1035                     tmpval = IEC61883_AM824_SET_LABEL(tmpval, IEC61883_AM824_LABEL_MIDI_1X);
1036                     *target_event = CondSwapToBus32(tmpval);
1037
1038 //                     debugOutput ( DEBUG_LEVEL_VERBOSE, "MIDI port %s, pos=%u, loc=%u, nevents=%u, dim=%d\n",
1039 //                                p.port->getName().c_str(), p.position, p.location, nevents, m_dimension );
1040 //                     debugOutput ( DEBUG_LEVEL_VERBOSE, "base=%p, target=%p, value=%08X\n",
1041 //                                data, target_event, tmpval );
1042                 } else {
1043                     // can't send a byte, either because there is no byte,
1044                     // or because this would exceed the maximum rate
1045                     // FIXME: this can be ifdef optimized since it's a constant
1046                     *target_event = CondSwapToBus32(IEC61883_AM824_SET_LABEL(0, IEC61883_AM824_LABEL_MIDI_NO_DATA));
1047                 }
1048                 buffer+=8;
1049             }
1050         } else {
1051             for (j = p.location;j < nevents; j += 8) {
1052                 target_event = (quadlet_t *)(data + ((j * m_dimension) + p.position));
1053                 __builtin_prefetch(target_event, 1, 0); // prefetch events for write, no temporal locality
1054                 *target_event = CondSwapToBus32(IEC61883_AM824_SET_LABEL(0, IEC61883_AM824_LABEL_MIDI_NO_DATA));
1055             }
1056         }
1057     }
1058 }
1059
1060 bool
1061 AmdtpTransmitStreamProcessor::initPortCache() {
1062     // make use of the fact that audio ports are the first ports in
1063     // the cluster as per AMDTP. so we can sort the ports by position
1064     // and have very efficient lookups:
1065     // m_float_ports.at(i).buffer -> audio stream i buffer
1066     // for midi ports we simply cache all port info since they are (usually) not
1067     // that numerous
1068     m_nb_audio_ports = 0;
1069     m_audio_ports.clear();
1070    
1071     m_nb_midi_ports = 0;
1072     m_midi_ports.clear();
1073    
1074     for(PortVectorIterator it = m_Ports.begin();
1075         it != m_Ports.end();
1076         ++it )
1077     {
1078         AmdtpPortInfo *pinfo=dynamic_cast<AmdtpPortInfo *>(*it);
1079         assert(pinfo); // this should not fail!!
1080
1081         switch( pinfo->getFormat() )
1082         {
1083             case AmdtpPortInfo::E_MBLA:
1084                 m_nb_audio_ports++;
1085                 break;
1086             case AmdtpPortInfo::E_SPDIF: // still unimplemented
1087                 break;
1088             case AmdtpPortInfo::E_Midi:
1089                 m_nb_midi_ports++;
1090                 break;
1091             default: // ignore
1092                 break;
1093         }
1094     }
1095
1096     int idx;
1097     for (idx = 0; idx < m_nb_audio_ports; idx++) {
1098         for(PortVectorIterator it = m_Ports.begin();
1099             it != m_Ports.end();
1100             ++it )
1101         {
1102             AmdtpPortInfo *pinfo=dynamic_cast<AmdtpPortInfo *>(*it);
1103             debugOutput(DEBUG_LEVEL_VERY_VERBOSE,
1104                         "idx %u: looking at port %s at position %u\n",
1105                         idx, (*it)->getName().c_str(), pinfo->getPosition());
1106             if(pinfo->getPosition() == (unsigned int)idx) {
1107                 struct _MBLA_port_cache p;
1108                 p.port = dynamic_cast<AmdtpAudioPort *>(*it);
1109                 if(p.port == NULL) {
1110                     debugError("Port is not an AmdtpAudioPort!\n");
1111                     return false;
1112                 }
1113                 p.buffer = NULL; // to be filled by updatePortCache
1114                 #ifdef DEBUG
1115                 p.buffer_size = (*it)->getBufferSize();
1116                 #endif
1117
1118                 m_audio_ports.push_back(p);
1119                 debugOutput(DEBUG_LEVEL_VERBOSE,
1120                             "Cached port %s at position %u\n",
1121                             p.port->getName().c_str(), idx);
1122                 goto next_index;
1123             }
1124         }
1125         debugError("No MBLA port found for position %d\n", idx);
1126         return false;
1127 next_index:
1128         continue;
1129     }
1130
1131     for(PortVectorIterator it = m_Ports.begin();
1132         it != m_Ports.end();
1133         ++it )
1134     {
1135         AmdtpPortInfo *pinfo=dynamic_cast<AmdtpPortInfo *>(*it);
1136         debugOutput(DEBUG_LEVEL_VERY_VERBOSE,
1137                     "idx %u: looking at port %s at position %u, location %u\n",
1138                     idx, (*it)->getName().c_str(), pinfo->getPosition(), pinfo->getLocation());
1139         if ((*it)->getPortType() == Port::E_Midi) {
1140             struct _MIDI_port_cache p;
1141             p.port = dynamic_cast<AmdtpMidiPort *>(*it);
1142             if(p.port == NULL) {
1143                 debugError("Port is not an AmdtpMidiPort!\n");
1144                 return false;
1145             }
1146             p.position = pinfo->getPosition();
1147             p.location = pinfo->getLocation();
1148             p.buffer = NULL; // to be filled by updatePortCache
1149             #ifdef DEBUG
1150             p.buffer_size = (*it)->getBufferSize();
1151             #endif
1152
1153             m_midi_ports.push_back(p);
1154             debugOutput(DEBUG_LEVEL_VERBOSE,
1155                         "Cached port %s at position %u, location %u\n",
1156                         p.port->getName().c_str(), p.position, p.location);
1157         }
1158     }
1159
1160     return true;
1161 }
1162
1163 void
1164 AmdtpTransmitStreamProcessor::updatePortCache() {
1165     int idx;
1166     for (idx = 0; idx < m_nb_audio_ports; idx++) {
1167         struct _MBLA_port_cache& p = m_audio_ports.at(idx);
1168         AmdtpAudioPort *port = p.port;
1169         p.buffer = port->getBufferAddress();
1170         p.enabled = !port->isDisabled();
1171     }
1172     for (idx = 0; idx < m_nb_midi_ports; idx++) {
1173         struct _MIDI_port_cache& p = m_midi_ports.at(idx);
1174         AmdtpMidiPort *port = p.port;
1175         p.buffer = port->getBufferAddress();
1176         p.enabled = !port->isDisabled();
1177     }
1178 }
1179
1180 } // end of namespace Streaming
Note: See TracBrowser for help on using the browser.