root/trunk/libffado/src/libstreaming/amdtp/AmdtpTransmitStreamProcessor.cpp

Revision 864, 38.6 kB (checked in by ppalmers, 15 years ago)

update license to GPLv2 or GPLv3 instead of GPLv2 or any later version. Update copyrights to reflect the new year

Line 
1 /*
2  * Copyright (C) 2005-2008 by Pieter Palmers
3  *
4  * This file is part of FFADO
5  * FFADO = Free Firewire (pro-)audio drivers for linux
6  *
7  * FFADO is based upon FreeBoB.
8  *
9  * This program is free software: you can redistribute it and/or modify
10  * it under the terms of the GNU General Public License as published by
11  * the Free Software Foundation, either version 2 of the License, or
12  * (at your option) version 3 of the License.
13  *
14  * This program is distributed in the hope that it will be useful,
15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17  * GNU General Public License for more details.
18  *
19  * You should have received a copy of the GNU General Public License
20  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
21  *
22  */
23
24 #include "config.h"
25 #include "AmdtpTransmitStreamProcessor.h"
26 #include "AmdtpPort.h"
27 #include "../StreamProcessorManager.h"
28 #include "devicemanager.h"
29
30 #include "libutil/Time.h"
31
32 #include "libieee1394/ieee1394service.h"
33 #include "libieee1394/IsoHandlerManager.h"
34 #include "libieee1394/cycletimer.h"
35
36 #include <netinet/in.h>
37 #include <assert.h>
38
39 #define AMDTP_FLOAT_MULTIPLIER 2147483392.0
40
41 namespace Streaming
42 {
43
44 /* transmit */
45 AmdtpTransmitStreamProcessor::AmdtpTransmitStreamProcessor(FFADODevice &parent, int dimension)
46         : StreamProcessor(parent, ePT_Transmit)
47         , m_dimension( dimension )
48         , m_dbc( 0 )
49         , m_nb_audio_ports( 0 )
50         , m_nb_midi_ports( 0 )
51 {}
52
53 enum StreamProcessor::eChildReturnValue
54 AmdtpTransmitStreamProcessor::generatePacketHeader (
55     unsigned char *data, unsigned int *length,
56     unsigned char *tag, unsigned char *sy,
57     int cycle, unsigned int dropped, unsigned int max_length )
58 {
59     __builtin_prefetch(data, 1, 0); // prefetch events for write, no temporal locality
60     struct iec61883_packet *packet = (struct iec61883_packet *)data;
61     /* Our node ID can change after a bus reset, so it is best to fetch
62     * our node ID for each packet. */
63     packet->sid = m_local_node_id;
64
65     packet->dbs = m_dimension;
66     packet->fn = 0;
67     packet->qpc = 0;
68     packet->sph = 0;
69     packet->reserved = 0;
70     packet->dbc = m_dbc;
71     packet->eoh1 = 2;
72     packet->fmt = IEC61883_FMT_AMDTP;
73
74     *tag = IEC61883_TAG_WITH_CIP;
75     *sy = 0;
76
77     signed int fc;
78     uint64_t presentation_time;
79     unsigned int presentation_cycle;
80     int cycles_until_presentation;
81
82     uint64_t transmit_at_time;
83     unsigned int transmit_at_cycle;
84     int cycles_until_transmit;
85
86     debugOutput ( DEBUG_LEVEL_ULTRA_VERBOSE, "Try for cycle %d\n", cycle );
87     // check whether the packet buffer has packets for us to send.
88     // the base timestamp is the one of the next sample in the buffer
89     ffado_timestamp_t ts_head_tmp;
90     m_data_buffer->getBufferHeadTimestamp ( &ts_head_tmp, &fc ); // thread safe
91
92     // the timestamp gives us the time at which we want the sample block
93     // to be output by the device
94     presentation_time = ( uint64_t ) ts_head_tmp;
95     m_last_timestamp = presentation_time;
96
97     // now we calculate the time when we have to transmit the sample block
98     transmit_at_time = substractTicks ( presentation_time, AMDTP_TRANSMIT_TRANSFER_DELAY );
99
100     // calculate the cycle this block should be presented in
101     // (this is just a virtual calculation since at that time it should
102     //  already be in the device's buffer)
103     presentation_cycle = ( unsigned int ) ( TICKS_TO_CYCLES ( presentation_time ) );
104
105     // calculate the cycle this block should be transmitted in
106     transmit_at_cycle = ( unsigned int ) ( TICKS_TO_CYCLES ( transmit_at_time ) );
107
108     // we can check whether this cycle is within the 'window' we have
109     // to send this packet.
110     // first calculate the number of cycles left before presentation time
111     cycles_until_presentation = diffCycles ( presentation_cycle, cycle );
112
113     // we can check whether this cycle is within the 'window' we have
114     // to send this packet.
115     // first calculate the number of cycles left before presentation time
116     cycles_until_transmit = diffCycles ( transmit_at_cycle, cycle );
117
118     if (dropped) {
119         debugOutput ( DEBUG_LEVEL_VERBOSE,
120                     "Gen HDR: CY=%04u, TC=%04u, CUT=%04d, TST=%011llu (%04u), TSP=%011llu (%04u)\n",
121                     cycle,
122                     transmit_at_cycle, cycles_until_transmit,
123                     transmit_at_time, ( unsigned int ) TICKS_TO_CYCLES ( transmit_at_time ),
124                     presentation_time, ( unsigned int ) TICKS_TO_CYCLES ( presentation_time ) );
125     }
126     // two different options:
127     // 1) there are not enough frames for one packet
128     //      => determine wether this is a problem, since we might still
129     //         have some time to send it
130     // 2) there are enough packets
131     //      => determine whether we have to send them in this packet
132     if ( fc < ( signed int ) m_syt_interval )
133     {
134         // not enough frames in the buffer,
135
136         // we can still postpone the queueing of the packets
137         // if we are far enough ahead of the presentation time
138         if ( cycles_until_presentation <= AMDTP_MIN_CYCLES_BEFORE_PRESENTATION )
139         {
140             debugOutput ( DEBUG_LEVEL_VERBOSE,
141                         "Insufficient frames (P): N=%02d, CY=%04u, TC=%04u, CUT=%04d\n",
142                         fc, cycle, transmit_at_cycle, cycles_until_transmit );
143             // we are too late
144             return eCRV_XRun;
145         }
146         else
147         {
148             unsigned int now_cycle = ( unsigned int ) ( TICKS_TO_CYCLES ( m_1394service.getCycleTimerTicks() ) );
149
150             debugOutput ( DEBUG_LEVEL_VERBOSE,
151                         "Insufficient frames (NP): N=%02d, CY=%04u, TC=%04u, CUT=%04d, NOW=%04d\n",
152                         fc, cycle, transmit_at_cycle, cycles_until_transmit, now_cycle );
153             debugWarning("Insufficient frames (NP): N=%02d, CY=%04u, TC=%04u, CUT=%04d, NOW=%04d\n",
154                          fc, cycle, transmit_at_cycle, cycles_until_transmit, now_cycle );
155
156             // there is still time left to send the packet
157             // we want the system to give this packet another go at a later time instant
158             return eCRV_Again; // note that the raw1394 again system doesn't work as expected
159
160             // we could wait here for a certain time before trying again. However, this
161             // is not going to work since we then block the iterator thread, hence also
162             // the receiving code, meaning that we are not processing received packets,
163             // and hence there is no progression in the number of frames available.
164
165             // for example:
166             // SleepRelativeUsec(125); // one cycle
167             // goto try_block_of_frames;
168
169             // or more advanced, calculate how many cycles we are ahead of 'now' and
170             // base the sleep on that.
171
172             // note that this requires that there is one thread for each IsoHandler,
173             // otherwise we're in the deadlock described above.
174         }
175     }
176     else
177     {
178         // there are enough frames, so check the time they are intended for
179         // all frames have a certain 'time window' in which they can be sent
180         // this corresponds to the range of the timestamp mechanism:
181         // we can send a packet 15 cycles in advance of the 'presentation time'
182         // in theory we can send the packet up till one cycle before the presentation time,
183         // however this is not very smart.
184
185         // There are 3 options:
186         // 1) the frame block is too early
187         //      => send an empty packet
188         // 2) the frame block is within the window
189         //      => send it
190         // 3) the frame block is too late
191         //      => discard (and raise xrun?)
192         //         get next block of frames and repeat
193
194         if(cycles_until_transmit < 0)
195         {
196             // we are too late
197             debugOutput(DEBUG_LEVEL_VERBOSE,
198                         "Too late: CY=%04u, TC=%04u, CUT=%04d, TSP=%011llu (%04u)\n",
199                         cycle,
200                         transmit_at_cycle, cycles_until_transmit,
201                         presentation_time, (unsigned int)TICKS_TO_CYCLES(presentation_time) );
202             //debugShowBackLogLines(200);
203 //             // however, if we can send this sufficiently before the presentation
204 //             // time, it could be harmless.
205 //             // NOTE: dangerous since the device has no way of reporting that it didn't get
206 //             //       this packet on time.
207 //             if(cycles_until_presentation >= AMDTP_MIN_CYCLES_BEFORE_PRESENTATION)
208 //             {
209 //                 // we are not that late and can still try to transmit the packet
210 //                 m_dbc += fillDataPacketHeader(packet, length, m_last_timestamp);
211 //                 return (fc < (signed)(2*m_syt_interval) ? eCRV_Defer : eCRV_Packet);
212 //             }
213 //             else   // definitely too late
214 //             {
215                 return eCRV_XRun;
216 //             }
217         }
218         else if(cycles_until_transmit <= AMDTP_MAX_CYCLES_TO_TRANSMIT_EARLY)
219         {
220             // it's time send the packet
221             m_dbc += fillDataPacketHeader(packet, length, m_last_timestamp);
222             return (fc < (signed)(2*m_syt_interval) ? eCRV_Defer : eCRV_Packet);
223         }
224         else
225         {
226             debugOutput ( DEBUG_LEVEL_VERY_VERBOSE,
227                         "Too early: CY=%04u, TC=%04u, CUT=%04d, TST=%011llu (%04u), TSP=%011llu (%04u)\n",
228                         cycle,
229                         transmit_at_cycle, cycles_until_transmit,
230                         transmit_at_time, ( unsigned int ) TICKS_TO_CYCLES ( transmit_at_time ),
231                         presentation_time, ( unsigned int ) TICKS_TO_CYCLES ( presentation_time ) );
232 #ifdef DEBUG
233             if ( cycles_until_transmit > AMDTP_MAX_CYCLES_TO_TRANSMIT_EARLY + 1 )
234             {
235                 debugOutput ( DEBUG_LEVEL_VERY_VERBOSE,
236                             "Way too early: CY=%04u, TC=%04u, CUT=%04d, TST=%011llu (%04u), TSP=%011llu (%04u)\n",
237                             cycle,
238                             transmit_at_cycle, cycles_until_transmit,
239                             transmit_at_time, ( unsigned int ) TICKS_TO_CYCLES ( transmit_at_time ),
240                             presentation_time, ( unsigned int ) TICKS_TO_CYCLES ( presentation_time ) );
241             }
242 #endif
243             // we are too early, send only an empty packet
244             return eCRV_EmptyPacket;
245         }
246     }
247     return eCRV_Invalid;
248 }
249
250 enum StreamProcessor::eChildReturnValue
251 AmdtpTransmitStreamProcessor::generatePacketData (
252     unsigned char *data, unsigned int *length,
253     unsigned char *tag, unsigned char *sy,
254     int cycle, unsigned int dropped, unsigned int max_length )
255 {
256     if ( m_data_buffer->readFrames ( m_syt_interval, ( char * ) ( data + 8 ) ) )
257     {
258         debugOutput ( DEBUG_LEVEL_ULTRA_VERBOSE, "XMIT DATA (cy %04d): TSP=%011llu (%04u)\n",
259                     cycle, m_last_timestamp, ( unsigned int ) TICKS_TO_CYCLES ( m_last_timestamp ) );
260         return eCRV_OK;
261     }
262     else return eCRV_XRun;
263
264 }
265
266 enum StreamProcessor::eChildReturnValue
267 AmdtpTransmitStreamProcessor::generateSilentPacketHeader (
268     unsigned char *data, unsigned int *length,
269     unsigned char *tag, unsigned char *sy,
270     int cycle, unsigned int dropped, unsigned int max_length )
271 {
272     struct iec61883_packet *packet = ( struct iec61883_packet * ) data;
273     debugOutput ( DEBUG_LEVEL_ULTRA_VERBOSE, "XMIT SILENT (cy %04d): CY=%04u, TSP=%011llu (%04u)\n",
274                 cycle, m_last_timestamp, ( unsigned int ) TICKS_TO_CYCLES ( m_last_timestamp ) );
275
276     packet->sid = m_local_node_id;
277
278     packet->dbs = m_dimension;
279     packet->fn = 0;
280     packet->qpc = 0;
281     packet->sph = 0;
282     packet->reserved = 0;
283     packet->dbc = m_dbc;
284     packet->eoh1 = 2;
285     packet->fmt = IEC61883_FMT_AMDTP;
286
287     *tag = IEC61883_TAG_WITH_CIP;
288     *sy = 0;
289
290     m_dbc += fillNoDataPacketHeader ( packet, length );
291     return eCRV_Packet;
292 }
293
294 enum StreamProcessor::eChildReturnValue
295 AmdtpTransmitStreamProcessor::generateSilentPacketData (
296     unsigned char *data, unsigned int *length,
297     unsigned char *tag, unsigned char *sy,
298     int cycle, unsigned int dropped, unsigned int max_length )
299 {
300     return eCRV_OK; // no need to do anything
301 }
302
303 enum StreamProcessor::eChildReturnValue
304 AmdtpTransmitStreamProcessor::generateEmptyPacketHeader (
305     unsigned char *data, unsigned int *length,
306     unsigned char *tag, unsigned char *sy,
307     int cycle, unsigned int dropped, unsigned int max_length )
308 {
309     struct iec61883_packet *packet = ( struct iec61883_packet * ) data;
310     debugOutput ( DEBUG_LEVEL_ULTRA_VERBOSE, "XMIT EMPTY (cy %04d): CY=%04u, TSP=%011llu (%04u)\n",
311                 cycle, m_last_timestamp, ( unsigned int ) TICKS_TO_CYCLES ( m_last_timestamp ) );
312     packet->sid = m_local_node_id;
313
314     packet->dbs = m_dimension;
315     packet->fn = 0;
316     packet->qpc = 0;
317     packet->sph = 0;
318     packet->reserved = 0;
319     packet->dbc = m_dbc;
320     packet->eoh1 = 2;
321     packet->fmt = IEC61883_FMT_AMDTP;
322
323     *tag = IEC61883_TAG_WITH_CIP;
324     *sy = 0;
325
326     m_dbc += fillNoDataPacketHeader ( packet, length );
327     return eCRV_OK;
328 }
329
330 enum StreamProcessor::eChildReturnValue
331 AmdtpTransmitStreamProcessor::generateEmptyPacketData (
332     unsigned char *data, unsigned int *length,
333     unsigned char *tag, unsigned char *sy,
334     int cycle, unsigned int dropped, unsigned int max_length )
335 {
336     return eCRV_OK; // no need to do anything
337 }
338
339 unsigned int AmdtpTransmitStreamProcessor::fillDataPacketHeader (
340     struct iec61883_packet *packet, unsigned int* length,
341     uint32_t ts )
342 {
343
344     packet->fdf = m_fdf;
345
346     // convert the timestamp to SYT format
347     uint16_t timestamp_SYT = TICKS_TO_SYT ( ts );
348     packet->syt = ntohs ( timestamp_SYT );
349
350     *length = m_syt_interval*sizeof ( quadlet_t ) *m_dimension + 8;
351
352     return m_syt_interval;
353 }
354
355 unsigned int AmdtpTransmitStreamProcessor::fillNoDataPacketHeader (
356     struct iec61883_packet *packet, unsigned int* length )
357 {
358
359     // no-data packets have syt=0xFFFF
360     // and have the usual amount of events as dummy data (?)
361     packet->fdf = IEC61883_FDF_NODATA;
362     packet->syt = 0xffff;
363
364     // FIXME: either make this a setting or choose
365     bool send_payload=true;
366     if ( send_payload )
367     {
368         // this means no-data packets with payload (DICE doesn't like that)
369         *length = 2*sizeof ( quadlet_t ) + m_syt_interval * m_dimension * sizeof ( quadlet_t );
370         return m_syt_interval;
371     }
372     else
373     {
374         // dbc is not incremented
375         // this means no-data packets without payload
376         *length = 2*sizeof ( quadlet_t );
377         return 0;
378     }
379 }
380
381 unsigned int
382 AmdtpTransmitStreamProcessor::getSytInterval() {
383     switch (m_StreamProcessorManager.getNominalRate()) {
384         case 32000:
385         case 44100:
386         case 48000:
387             return 8;
388         case 88200:
389         case 96000:
390             return 16;
391         case 176400:
392         case 192000:
393             return 32;
394         default:
395             debugError("Unsupported rate: %d\n", m_StreamProcessorManager.getNominalRate());
396             return 0;
397     }
398 }
399 unsigned int
400 AmdtpTransmitStreamProcessor::getFDF() {
401     switch (m_StreamProcessorManager.getNominalRate()) {
402         case 32000: return IEC61883_FDF_SFC_32KHZ;
403         case 44100: return IEC61883_FDF_SFC_44K1HZ;
404         case 48000: return IEC61883_FDF_SFC_48KHZ;
405         case 88200: return IEC61883_FDF_SFC_88K2HZ;
406         case 96000: return IEC61883_FDF_SFC_96KHZ;
407         case 176400: return IEC61883_FDF_SFC_176K4HZ;
408         case 192000: return IEC61883_FDF_SFC_192KHZ;
409         default:
410             debugError("Unsupported rate: %d\n", m_StreamProcessorManager.getNominalRate());
411             return 0;
412     }
413 }
414
415 bool AmdtpTransmitStreamProcessor::prepareChild()
416 {
417     debugOutput ( DEBUG_LEVEL_VERBOSE, "Preparing (%p)...\n", this );
418     m_syt_interval = getSytInterval();
419     m_fdf = getFDF();
420
421     iec61883_cip_init (
422         &m_cip_status,
423         IEC61883_FMT_AMDTP,
424         m_fdf,
425         m_StreamProcessorManager.getNominalRate(),
426         m_dimension,
427         m_syt_interval );
428
429     if (!initPortCache()) {
430         debugError("Could not init port cache\n");
431         return false;
432     }
433
434     return true;
435 }
436
437 /*
438 * compose the event streams for the packets from the port buffers
439 */
440 bool AmdtpTransmitStreamProcessor::processWriteBlock ( char *data,
441         unsigned int nevents, unsigned int offset )
442 {
443     // update the variable parts of the cache
444     updatePortCache();
445
446     // encode audio data
447     switch(m_StreamProcessorManager.getAudioDataType()) {
448         case StreamProcessorManager::eADT_Int24:
449             encodeAudioPortsInt24((quadlet_t *)data, offset, nevents);
450             break;
451         case StreamProcessorManager::eADT_Float:
452             encodeAudioPortsFloat((quadlet_t *)data, offset, nevents);
453             break;
454     }
455
456     // do midi ports
457     encodeMidiPorts((quadlet_t *)data, offset, nevents);
458     return true;
459 }
460
461 bool
462 AmdtpTransmitStreamProcessor::transmitSilenceBlock(
463     char *data, unsigned int nevents, unsigned int offset)
464 {
465     // no need to update the port cache when transmitting silence since
466     // no dynamic values are used to do so.
467     encodeAudioPortsSilence((quadlet_t *)data, offset, nevents);
468     encodeMidiPortsSilence((quadlet_t *)data, offset, nevents);
469     return true;
470 }
471
472 /**
473  * @brief encodes all audio ports in the cache to events (silent data)
474  * @param data
475  * @param offset
476  * @param nevents
477  */
478 void
479 AmdtpTransmitStreamProcessor::encodeAudioPortsSilence(quadlet_t *data,
480                                                       unsigned int offset,
481                                                       unsigned int nevents)
482 {
483     unsigned int j;
484     quadlet_t *target_event;
485     unsigned int i;
486
487     for (i = 0; i < m_nb_audio_ports; i++) {
488         target_event = (quadlet_t *)(data + i);
489
490         for (j = 0;j < nevents; j += 1)
491         {
492             *target_event = 0x00000040;
493             target_event += m_dimension;
494         }
495     }
496 }
497
498 #ifdef __SSE2__
499 //#if 0
500 #include <emmintrin.h>
501 #warning SSE2 build
502
503 /**
504  * @brief mux all audio ports to events
505  * @param data
506  * @param offset
507  * @param nevents
508  */
509 void
510 AmdtpTransmitStreamProcessor::encodeAudioPortsFloat(quadlet_t *data,
511                                                     unsigned int offset,
512                                                     unsigned int nevents)
513 {
514     unsigned int j;
515     quadlet_t *target_event;
516     unsigned int i;
517
518     float * client_buffers[4];
519     float tmp_values[4] __attribute__ ((aligned (16)));
520     uint32_t tmp_values_int[4] __attribute__ ((aligned (16)));
521
522     // prepare the scratch buffer
523     assert(m_scratch_buffer_size_bytes > nevents * 4);
524     memset(m_scratch_buffer, 0, nevents * 4);
525
526     const __m128i label = _mm_set_epi32 (0x40000000, 0x40000000, 0x40000000, 0x40000000);
527     const __m128 mult = _mm_set_ps(AMDTP_FLOAT_MULTIPLIER, AMDTP_FLOAT_MULTIPLIER, AMDTP_FLOAT_MULTIPLIER, AMDTP_FLOAT_MULTIPLIER);
528
529     // this assumes that audio ports are sorted by position,
530     // and that there are no gaps
531     for (i = 0; i < m_nb_audio_ports-4; i += 4) {
532         struct _MBLA_port_cache *p;
533
534         // get the port buffers
535         for (j=0; j<4; j++) {
536             p = &(m_audio_ports.at(i+j));
537             if(p->buffer && p->enabled) {
538                 client_buffers[j] = (float *) p->buffer;
539                 client_buffers[j] += offset;
540             } else {
541                 // if a port is disabled or has no valid
542                 // buffer, use the scratch buffer (all zero's)
543                 client_buffers[j] = (float *) m_scratch_buffer;
544             }
545         }
546
547         // the base event for this position
548         target_event = (quadlet_t *)(data + i);
549
550         // process the events
551         for (j=0;j < nevents; j += 1)
552         {
553             // read the values
554             tmp_values[0] = *(client_buffers[0]);
555             tmp_values[1] = *(client_buffers[1]);
556             tmp_values[2] = *(client_buffers[2]);
557             tmp_values[3] = *(client_buffers[3]);
558
559             // now do the SSE based conversion/labeling
560             __m128 v_float = *((__m128*)tmp_values);
561             __m128i *target = (__m128i*)target_event;
562             __m128i v_int;
563
564             // multiply
565             v_float = _mm_mul_ps(v_float, mult);
566             // convert to signed integer
567             v_int = _mm_cvttps_epi32( v_float );
568             // shift right 8 bits
569             v_int = _mm_srli_epi32( v_int, 8 );
570             // label it
571             v_int = _mm_or_si128( v_int, label );
572
573             // do endian conversion (SSE is always little endian)
574             // do first swap
575             v_int = _mm_or_si128( _mm_slli_epi16( v_int, 8 ), _mm_srli_epi16( v_int, 8 ) );
576             // do second swap
577             v_int = _mm_or_si128( _mm_slli_epi32( v_int, 16 ), _mm_srli_epi32( v_int, 16 ) );
578
579             // store the packed int
580             // (target misalignment is assumed since we don't know the m_dimension)
581             _mm_storeu_si128 (target, v_int);
582
583             // increment the buffer pointers
584             client_buffers[0]++;
585             client_buffers[1]++;
586             client_buffers[2]++;
587             client_buffers[3]++;
588
589             // go to next target event position
590             target_event += m_dimension;
591         }
592     }
593
594     // do remaining ports
595     // NOTE: these can be time-SSE'd
596     for (; i < m_nb_audio_ports; i++) {
597         struct _MBLA_port_cache &p = m_audio_ports.at(i);
598         target_event = (quadlet_t *)(data + i);
599         assert(nevents + offset <= p.buffer_size );
600
601         if(p.buffer && p.enabled) {
602             float *buffer = (float *)(p.buffer);
603             buffer += offset;
604    
605             for (j = 0;j < nevents; j += 4)
606             {
607                 // read the values
608                 tmp_values[0] = *buffer;
609                 buffer++;
610                 tmp_values[1] = *buffer;
611                 buffer++;
612                 tmp_values[2] = *buffer;
613                 buffer++;
614                 tmp_values[3] = *buffer;
615                 buffer++;
616    
617                 // now do the SSE based conversion/labeling
618                 __m128 v_float = *((__m128*)tmp_values);
619                 __m128i v_int;
620    
621                 // multiply
622                 v_float = _mm_mul_ps(v_float, mult);
623                 // convert to signed integer
624                 v_int = _mm_cvttps_epi32( v_float );
625                 // shift right 8 bits
626                 v_int = _mm_srli_epi32( v_int, 8 );
627                 // label it
628                 v_int = _mm_or_si128( v_int, label );
629    
630                 // do endian conversion (SSE is always little endian)
631                 // do first swap
632                 v_int = _mm_or_si128( _mm_slli_epi16( v_int, 8 ), _mm_srli_epi16( v_int, 8 ) );
633                 // do second swap
634                 v_int = _mm_or_si128( _mm_slli_epi32( v_int, 16 ), _mm_srli_epi32( v_int, 16 ) );
635
636                 // store the packed int
637                 _mm_store_si128 ((__m128i *)(&tmp_values_int), v_int);
638
639                 // increment the buffer pointers
640                 *target_event = tmp_values_int[0];
641                 target_event += m_dimension;
642                 *target_event = tmp_values_int[1];
643                 target_event += m_dimension;
644                 *target_event = tmp_values_int[2];
645                 target_event += m_dimension;
646                 *target_event = tmp_values_int[3];
647                 target_event += m_dimension;
648             }
649
650             // do the remainder of the events
651             for(;j < nevents; j += 1) {
652                 float *in = (float *)buffer;
653                 float v = (*in) * AMDTP_FLOAT_MULTIPLIER;
654                 unsigned int tmp = ((int) v);
655                 tmp = ( tmp >> 8 ) | 0x40000000;
656                 *target_event = htonl((quadlet_t)tmp);
657                 buffer++;
658                 target_event += m_dimension;
659             }
660
661         } else {
662             for (j = 0;j < nevents; j += 1)
663             {
664                 // hardcoded byte swapped
665                 *target_event = 0x00000040;
666                 target_event += m_dimension;
667             }
668         }
669     }
670 }
671
672
673 /**
674  * @brief mux all audio ports to events
675  * @param data
676  * @param offset
677  * @param nevents
678  */
679 void
680 AmdtpTransmitStreamProcessor::encodeAudioPortsInt24(quadlet_t *data,
681                                                     unsigned int offset,
682                                                     unsigned int nevents)
683 {
684     unsigned int j;
685     quadlet_t *target_event;
686     unsigned int i;
687
688     uint32_t *client_buffers[4];
689     uint32_t tmp_values[4] __attribute__ ((aligned (16)));
690
691     // prepare the scratch buffer
692     assert(m_scratch_buffer_size_bytes > nevents * 4);
693     memset(m_scratch_buffer, 0, nevents * 4);
694
695     const __m128i label = _mm_set_epi32 (0x40000000, 0x40000000, 0x40000000, 0x40000000);
696     const __m128i mask  = _mm_set_epi32 (0x00FFFFFF, 0x00FFFFFF, 0x00FFFFFF, 0x00FFFFFF);
697
698     // this assumes that audio ports are sorted by position,
699     // and that there are no gaps
700     for (i = 0; i < m_nb_audio_ports-4; i += 4) {
701         struct _MBLA_port_cache *p;
702
703         // get the port buffers
704         for (j=0; j<4; j++) {
705             p = &(m_audio_ports.at(i+j));
706             if(p->buffer && p->enabled) {
707                 client_buffers[j] = (uint32_t *) p->buffer;
708                 client_buffers[j] += offset;
709             } else {
710                 // if a port is disabled or has no valid
711                 // buffer, use the scratch buffer (all zero's)
712                 client_buffers[j] = (uint32_t *) m_scratch_buffer;
713             }
714         }
715
716         // the base event for this position
717         target_event = (quadlet_t *)(data + i);
718
719         // process the events
720         for (j=0;j < nevents; j += 1)
721         {
722             // read the values
723             tmp_values[0] = *(client_buffers[0]);
724             tmp_values[1] = *(client_buffers[1]);
725             tmp_values[2] = *(client_buffers[2]);
726             tmp_values[3] = *(client_buffers[3]);
727
728             // now do the SSE based conversion/labeling
729             __m128i *target = (__m128i*)target_event;
730             __m128i v_int = *((__m128i*)tmp_values);;
731
732             // mask
733             v_int = _mm_and_si128( v_int, mask );
734             // label it
735             v_int = _mm_or_si128( v_int, label );
736
737             // do endian conversion (SSE is always little endian)
738             // do first swap
739             v_int = _mm_or_si128( _mm_slli_epi16( v_int, 8 ), _mm_srli_epi16( v_int, 8 ) );
740             // do second swap
741             v_int = _mm_or_si128( _mm_slli_epi32( v_int, 16 ), _mm_srli_epi32( v_int, 16 ) );
742
743             // store the packed int
744             // (target misalignment is assumed since we don't know the m_dimension)
745             _mm_storeu_si128 (target, v_int);
746
747             // increment the buffer pointers
748             client_buffers[0]++;
749             client_buffers[1]++;
750             client_buffers[2]++;
751             client_buffers[3]++;
752
753             // go to next target event position
754             target_event += m_dimension;
755         }
756     }
757
758     // do remaining ports
759     // NOTE: these can be time-SSE'd
760     for (; i < m_nb_audio_ports; i++) {
761         struct _MBLA_port_cache &p = m_audio_ports.at(i);
762         target_event = (quadlet_t *)(data + i);
763         assert(nevents + offset <= p.buffer_size );
764
765         if(p.buffer && p.enabled) {
766             uint32_t *buffer = (uint32_t *)(p.buffer);
767             buffer += offset;
768    
769             for (j = 0;j < nevents; j += 4)
770             {
771                 // read the values
772                 tmp_values[0] = *buffer;
773                 buffer++;
774                 tmp_values[1] = *buffer;
775                 buffer++;
776                 tmp_values[2] = *buffer;
777                 buffer++;
778                 tmp_values[3] = *buffer;
779                 buffer++;
780
781                 // now do the SSE based conversion/labeling
782                 __m128i v_int = *((__m128i*)tmp_values);;
783
784                 // mask
785                 v_int = _mm_and_si128( v_int, mask );
786                 // label it
787                 v_int = _mm_or_si128( v_int, label );
788
789                 // do endian conversion (SSE is always little endian)
790                 // do first swap
791                 v_int = _mm_or_si128( _mm_slli_epi16( v_int, 8 ), _mm_srli_epi16( v_int, 8 ) );
792                 // do second swap
793                 v_int = _mm_or_si128( _mm_slli_epi32( v_int, 16 ), _mm_srli_epi32( v_int, 16 ) );
794
795                 // store the packed int
796                 _mm_store_si128 ((__m128i *)(&tmp_values), v_int);
797
798                 // increment the buffer pointers
799                 *target_event = tmp_values[0];
800                 target_event += m_dimension;
801                 *target_event = tmp_values[1];
802                 target_event += m_dimension;
803                 *target_event = tmp_values[2];
804                 target_event += m_dimension;
805                 *target_event = tmp_values[3];
806                 target_event += m_dimension;
807             }
808
809             // do the remainder of the events
810             for(;j < nevents; j += 1) {
811                 uint32_t in = (uint32_t)(*buffer);
812                 *target_event = htonl((quadlet_t)((in & 0x00FFFFFF) | 0x40000000));
813                 buffer++;
814                 target_event += m_dimension;
815             }
816
817         } else {
818             for (j = 0;j < nevents; j += 1)
819             {
820                 // hardcoded byte swapped
821                 *target_event = 0x00000040;
822                 target_event += m_dimension;
823             }
824         }
825     }
826 }
827
828 #else
829
830 /**
831  * @brief mux all audio ports to events
832  * @param data
833  * @param offset
834  * @param nevents
835  */
836 void
837 AmdtpTransmitStreamProcessor::encodeAudioPortsInt24(quadlet_t *data,
838                                                     unsigned int offset,
839                                                     unsigned int nevents)
840 {
841     unsigned int j;
842     quadlet_t *target_event;
843     unsigned int i;
844
845     for (i = 0; i < m_nb_audio_ports; i++) {
846         struct _MBLA_port_cache &p = m_audio_ports.at(i);
847         target_event = (quadlet_t *)(data + i);
848         assert(nevents + offset <= p.buffer_size );
849
850         if(p.buffer && p.enabled) {
851             quadlet_t *buffer = (quadlet_t *)(p.buffer);
852             buffer += offset;
853    
854             for (j = 0;j < nevents; j += 1)
855             {
856                 uint32_t in = (uint32_t)(*buffer);
857                 *target_event = htonl((quadlet_t)((in & 0x00FFFFFF) | 0x40000000));
858                 buffer++;
859                 target_event += m_dimension;
860             }
861         } else {
862             for (j = 0;j < nevents; j += 1)
863             {
864                 *target_event = 0x00000040;
865                 target_event += m_dimension;
866             }
867         }
868     }
869 }
870
871 /**
872  * @brief mux all audio ports to events
873  * @param data
874  * @param offset
875  * @param nevents
876  */
877 void
878 AmdtpTransmitStreamProcessor::encodeAudioPortsFloat(quadlet_t *data,
879                                                     unsigned int offset,
880                                                     unsigned int nevents)
881 {
882     unsigned int j;
883     quadlet_t *target_event;
884     unsigned int i;
885
886     for (i = 0; i < m_nb_audio_ports; i++) {
887         struct _MBLA_port_cache &p = m_audio_ports.at(i);
888         target_event = (quadlet_t *)(data + i);
889         assert(nevents + offset <= p.buffer_size );
890
891         if(p.buffer && p.enabled) {
892             quadlet_t *buffer = (quadlet_t *)(p.buffer);
893             buffer += offset;
894    
895             for (j = 0;j < nevents; j += 1)
896             {
897                 float *in = (float *)buffer;
898                 float v = (*in) * AMDTP_FLOAT_MULTIPLIER;
899                 unsigned int tmp = ((int) v);
900                 tmp = ( tmp >> 8 ) | 0x40000000;
901                 *target_event = htonl((quadlet_t)tmp);
902                 buffer++;
903                 target_event += m_dimension;
904             }
905         } else {
906             for (j = 0;j < nevents; j += 1)
907             {
908                 // hardcoded little endian
909                 *target_event = 0x00000040;
910                 target_event += m_dimension;
911             }
912         }
913     }
914 }
915 #endif
916
917 /**
918  * @brief encodes all midi ports in the cache to events (silence)
919  * @param data
920  * @param offset
921  * @param nevents
922  */
923 void
924 AmdtpTransmitStreamProcessor::encodeMidiPortsSilence(quadlet_t *data,
925                                                      unsigned int offset,
926                                                      unsigned int nevents)
927 {
928     quadlet_t *target_event;
929     unsigned int i,j;
930
931     for (i = 0; i < m_nb_midi_ports; i++) {
932         struct _MIDI_port_cache &p = m_midi_ports.at(i);
933
934         for (j = p.location;j < nevents; j += 8) {
935             target_event = (quadlet_t *) (data + ((j * m_dimension) + p.position));
936             *target_event = htonl(IEC61883_AM824_SET_LABEL(0, IEC61883_AM824_LABEL_MIDI_NO_DATA));
937         }
938     }
939 }
940
941 /**
942  * @brief encodes all midi ports in the cache to events
943  * @param data
944  * @param offset
945  * @param nevents
946  */
947 void
948 AmdtpTransmitStreamProcessor::encodeMidiPorts(quadlet_t *data,
949                                               unsigned int offset,
950                                               unsigned int nevents)
951 {
952     quadlet_t *target_event;
953     unsigned int i,j;
954
955     for (i = 0; i < m_nb_midi_ports; i++) {
956         struct _MIDI_port_cache &p = m_midi_ports.at(i);
957         if (p.buffer && p.enabled) {
958             uint32_t *buffer = (quadlet_t *)(p.buffer);
959             buffer += offset;
960
961             for (j = p.location;j < nevents; j += 8) {
962                 target_event = (quadlet_t *) (data + ((j * m_dimension) + p.position));
963
964                 if ( *buffer & 0xFF000000 )   // we can send a byte
965                 {
966                     quadlet_t tmpval;
967                     tmpval = ((*buffer)<<16) & 0x00FF0000;
968                     tmpval = IEC61883_AM824_SET_LABEL(tmpval, IEC61883_AM824_LABEL_MIDI_1X);
969                     *target_event = htonl(tmpval);
970
971 //                     debugOutput ( DEBUG_LEVEL_VERBOSE, "MIDI port %s, pos=%u, loc=%u, nevents=%u, dim=%d\n",
972 //                                p.port->getName().c_str(), p.position, p.location, nevents, m_dimension );
973 //                     debugOutput ( DEBUG_LEVEL_VERBOSE, "base=%p, target=%p, value=%08X\n",
974 //                                data, target_event, tmpval );
975                 } else {
976                     // can't send a byte, either because there is no byte,
977                     // or because this would exceed the maximum rate
978                     // FIXME: this can be ifdef optimized since it's a constant
979                     *target_event = htonl(IEC61883_AM824_SET_LABEL(0, IEC61883_AM824_LABEL_MIDI_NO_DATA));
980                 }
981                 buffer+=8;
982             }
983         } else {
984             for (j = p.location;j < nevents; j += 8) {
985                 target_event = (quadlet_t *)(data + ((j * m_dimension) + p.position));
986                 __builtin_prefetch(target_event, 1, 0); // prefetch events for write, no temporal locality
987                 *target_event = htonl(IEC61883_AM824_SET_LABEL(0, IEC61883_AM824_LABEL_MIDI_NO_DATA));
988             }
989         }
990     }
991 }
992
993 bool
994 AmdtpTransmitStreamProcessor::initPortCache() {
995     // make use of the fact that audio ports are the first ports in
996     // the cluster as per AMDTP. so we can sort the ports by position
997     // and have very efficient lookups:
998     // m_float_ports.at(i).buffer -> audio stream i buffer
999     // for midi ports we simply cache all port info since they are (usually) not
1000     // that numerous
1001     m_nb_audio_ports = 0;
1002     m_audio_ports.clear();
1003    
1004     m_nb_midi_ports = 0;
1005     m_midi_ports.clear();
1006    
1007     for(PortVectorIterator it = m_Ports.begin();
1008         it != m_Ports.end();
1009         ++it )
1010     {
1011         AmdtpPortInfo *pinfo=dynamic_cast<AmdtpPortInfo *>(*it);
1012         assert(pinfo); // this should not fail!!
1013
1014         switch( pinfo->getFormat() )
1015         {
1016             case AmdtpPortInfo::E_MBLA:
1017                 m_nb_audio_ports++;
1018                 break;
1019             case AmdtpPortInfo::E_SPDIF: // still unimplemented
1020                 break;
1021             case AmdtpPortInfo::E_Midi:
1022                 m_nb_midi_ports++;
1023                 break;
1024             default: // ignore
1025                 break;
1026         }
1027     }
1028
1029     unsigned int idx;
1030     for (idx = 0; idx < m_nb_audio_ports; idx++) {
1031         for(PortVectorIterator it = m_Ports.begin();
1032             it != m_Ports.end();
1033             ++it )
1034         {
1035             AmdtpPortInfo *pinfo=dynamic_cast<AmdtpPortInfo *>(*it);
1036             debugOutput(DEBUG_LEVEL_VERY_VERBOSE, "idx %u: looking at port %s at position %u\n",
1037                                               idx, (*it)->getName().c_str(), pinfo->getPosition());
1038             if(pinfo->getPosition() == idx) {
1039                 struct _MBLA_port_cache p;
1040                 p.port = dynamic_cast<AmdtpAudioPort *>(*it);
1041                 if(p.port == NULL) {
1042                     debugError("Port is not an AmdtpAudioPort!\n");
1043                     return false;
1044                 }
1045                 p.buffer = NULL; // to be filled by updatePortCache
1046                 #ifdef DEBUG
1047                 p.buffer_size = (*it)->getBufferSize();
1048                 #endif
1049
1050                 m_audio_ports.push_back(p);
1051                 debugOutput(DEBUG_LEVEL_VERBOSE, "Cached port %s at position %u\n",
1052                                                  p.port->getName().c_str(), idx);
1053                 goto next_index;
1054             }
1055         }
1056         debugError("No MBLA port found for position %d\n", idx);
1057         return false;
1058 next_index:
1059         continue;
1060     }
1061
1062     for(PortVectorIterator it = m_Ports.begin();
1063         it != m_Ports.end();
1064         ++it )
1065     {
1066         AmdtpPortInfo *pinfo=dynamic_cast<AmdtpPortInfo *>(*it);
1067         debugOutput(DEBUG_LEVEL_VERY_VERBOSE, "idx %u: looking at port %s at position %u, location %u\n",
1068                                         idx, (*it)->getName().c_str(), pinfo->getPosition(), pinfo->getLocation());
1069         if ((*it)->getPortType() == Port::E_Midi) {
1070             struct _MIDI_port_cache p;
1071             p.port = dynamic_cast<AmdtpMidiPort *>(*it);
1072             if(p.port == NULL) {
1073                 debugError("Port is not an AmdtpMidiPort!\n");
1074                 return false;
1075             }
1076             p.position = pinfo->getPosition();
1077             p.location = pinfo->getLocation();
1078             p.buffer = NULL; // to be filled by updatePortCache
1079             #ifdef DEBUG
1080             p.buffer_size = (*it)->getBufferSize();
1081             #endif
1082
1083             m_midi_ports.push_back(p);
1084             debugOutput(DEBUG_LEVEL_VERBOSE, "Cached port %s at position %u, location %u\n",
1085                                             p.port->getName().c_str(), p.position, p.location);
1086         }
1087     }
1088
1089     return true;
1090 }
1091
1092 void
1093 AmdtpTransmitStreamProcessor::updatePortCache() {
1094     unsigned int idx;
1095     for (idx = 0; idx < m_nb_audio_ports; idx++) {
1096         struct _MBLA_port_cache& p = m_audio_ports.at(idx);
1097         AmdtpAudioPort *port = p.port;
1098         p.buffer = port->getBufferAddress();
1099         p.enabled = !port->isDisabled();
1100     }
1101     for (idx = 0; idx < m_nb_midi_ports; idx++) {
1102         struct _MIDI_port_cache& p = m_midi_ports.at(idx);
1103         AmdtpMidiPort *port = p.port;
1104         p.buffer = port->getBufferAddress();
1105         p.enabled = !port->isDisabled();
1106     }
1107 }
1108
1109 } // end of namespace Streaming
Note: See TracBrowser for help on using the browser.