root/branches/libffado-2.0/src/libstreaming/amdtp/AmdtpTransmitStreamProcessor.cpp

Revision 1546, 42.2 kB (checked in by ppalmers, 15 years ago)

fix stupid mistake

Line 
1 /*
2  * Copyright (C) 2005-2008 by Pieter Palmers
3  *
4  * This file is part of FFADO
5  * FFADO = Free Firewire (pro-)audio drivers for linux
6  *
7  * FFADO is based upon FreeBoB.
8  *
9  * This program is free software: you can redistribute it and/or modify
10  * it under the terms of the GNU General Public License as published by
11  * the Free Software Foundation, either version 2 of the License, or
12  * (at your option) version 3 of the License.
13  *
14  * This program is distributed in the hope that it will be useful,
15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17  * GNU General Public License for more details.
18  *
19  * You should have received a copy of the GNU General Public License
20  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
21  *
22  */
23
24 #include "config.h"
25 #include "AmdtpTransmitStreamProcessor.h"
26 #include "AmdtpPort.h"
27 #include "../StreamProcessorManager.h"
28 #include "devicemanager.h"
29
30 #include "libutil/Time.h"
31 #include "libutil/float_cast.h"
32
33 #include "libieee1394/ieee1394service.h"
34 #include "libieee1394/IsoHandlerManager.h"
35 #include "libieee1394/cycletimer.h"
36
37 #include "libutil/ByteSwap.h"
38 #include <assert.h>
39 #include <cstring>
40
41 #define likely(x)   __builtin_expect((x),1)
42 #define unlikely(x) __builtin_expect((x),0)
43
44 #define AMDTP_FLOAT_MULTIPLIER (1.0f * ((1<<23) - 1))
45 namespace Streaming
46 {
47
48 /* transmit */
49 AmdtpTransmitStreamProcessor::AmdtpTransmitStreamProcessor(FFADODevice &parent, int dimension)
50         : StreamProcessor(parent, ePT_Transmit)
51         , m_dimension( dimension )
52         , m_dbc( 0 )
53 #if AMDTP_ALLOW_PAYLOAD_IN_NODATA_XMIT
54         , m_send_nodata_payload ( AMDTP_SEND_PAYLOAD_IN_NODATA_XMIT_BY_DEFAULT )
55 #endif
56         , m_max_cycles_to_transmit_early ( AMDTP_MAX_CYCLES_TO_TRANSMIT_EARLY )
57         , m_transmit_transfer_delay ( AMDTP_TRANSMIT_TRANSFER_DELAY )
58         , m_min_cycles_before_presentation ( AMDTP_MIN_CYCLES_BEFORE_PRESENTATION )
59         , m_nb_audio_ports( 0 )
60         , m_nb_midi_ports( 0 )
61 {}
62
63 enum StreamProcessor::eChildReturnValue
64 AmdtpTransmitStreamProcessor::generatePacketHeader (
65     unsigned char *data, unsigned int *length,
66     unsigned char *tag, unsigned char *sy,
67     uint32_t pkt_ctr )
68 {
69     __builtin_prefetch(data, 1, 0); // prefetch events for write, no temporal locality
70     struct iec61883_packet *packet = (struct iec61883_packet *)data;
71     /* Our node ID can change after a bus reset, so it is best to fetch
72     * our node ID for each packet. */
73     packet->sid = m_local_node_id;
74
75     packet->dbs = m_dimension;
76     packet->fn = 0;
77     packet->qpc = 0;
78     packet->sph = 0;
79     packet->reserved = 0;
80     packet->dbc = m_dbc;
81     packet->eoh1 = 2;
82     packet->fmt = IEC61883_FMT_AMDTP;
83
84     *tag = IEC61883_TAG_WITH_CIP;
85     *sy = 0;
86
87     signed int fc;
88     uint64_t presentation_time;
89     unsigned int presentation_cycle;
90     int cycles_until_presentation;
91
92     uint64_t transmit_at_time;
93     unsigned int transmit_at_cycle;
94     int cycles_until_transmit;
95
96     debugOutputExtreme( DEBUG_LEVEL_ULTRA_VERBOSE,
97                         "Try for cycle %d\n", CYCLE_TIMER_GET_CYCLES(pkt_ctr) );
98     // check whether the packet buffer has packets for us to send.
99     // the base timestamp is the one of the next sample in the buffer
100     ffado_timestamp_t ts_head_tmp;
101     m_data_buffer->getBufferHeadTimestamp( &ts_head_tmp, &fc ); // thread safe
102
103     // the timestamp gives us the time at which we want the sample block
104     // to be output by the device
105     presentation_time = ( uint64_t ) ts_head_tmp;
106
107     // now we calculate the time when we have to transmit the sample block
108     transmit_at_time = substractTicks( presentation_time, m_transmit_transfer_delay );
109
110     // calculate the cycle this block should be presented in
111     // (this is just a virtual calculation since at that time it should
112     //  already be in the device's buffer)
113     presentation_cycle = ( unsigned int ) ( TICKS_TO_CYCLES ( presentation_time ) );
114
115     // calculate the cycle this block should be transmitted in
116     transmit_at_cycle = ( unsigned int ) ( TICKS_TO_CYCLES ( transmit_at_time ) );
117
118     // we can check whether this cycle is within the 'window' we have
119     // to send this packet.
120     // first calculate the number of cycles left before presentation time
121     cycles_until_presentation = diffCycles ( presentation_cycle, CYCLE_TIMER_GET_CYCLES(pkt_ctr) );
122
123     // we can check whether this cycle is within the 'window' we have
124     // to send this packet.
125     // first calculate the number of cycles left before presentation time
126     cycles_until_transmit = diffCycles ( transmit_at_cycle, CYCLE_TIMER_GET_CYCLES(pkt_ctr) );
127
128     // two different options:
129     // 1) there are not enough frames for one packet
130     //      => determine wether this is a problem, since we might still
131     //         have some time to send it
132     // 2) there are enough packets
133     //      => determine whether we have to send them in this packet
134     if ( fc < ( signed int ) m_syt_interval )
135     {
136         // not enough frames in the buffer,
137
138         // we can still postpone the queueing of the packets
139         // if we are far enough ahead of the presentation time
140         if ( cycles_until_presentation <= m_min_cycles_before_presentation )
141         {
142             debugOutput( DEBUG_LEVEL_NORMAL,
143                          "Insufficient frames (P): N=%02d, CY=%04u, TC=%04u, CUT=%04d\n",
144                          fc, CYCLE_TIMER_GET_CYCLES(pkt_ctr),
145                          transmit_at_cycle, cycles_until_transmit );
146             // we are too late
147             return eCRV_XRun;
148         }
149         else
150         {
151             #if DEBUG_EXTREME
152             unsigned int now_cycle = ( unsigned int ) ( TICKS_TO_CYCLES ( m_1394service.getCycleTimerTicks() ) );
153
154             debugOutputExtreme(DEBUG_LEVEL_VERBOSE,
155                                "Insufficient frames (NP): N=%02d, CY=%04u, TC=%04u, CUT=%04d, NOW=%04d\n",
156                                fc, CYCLE_TIMER_GET_CYCLES(pkt_ctr),
157                                transmit_at_cycle, cycles_until_transmit, now_cycle );
158             #endif
159
160             // there is still time left to send the packet
161             // we want the system to give this packet another go at a later time instant
162             return eCRV_Again; // note that the raw1394 again system doesn't work as expected
163
164             // we could wait here for a certain time before trying again. However, this
165             // is not going to work since we then block the iterator thread, hence also
166             // the receiving code, meaning that we are not processing received packets,
167             // and hence there is no progression in the number of frames available.
168
169             // for example:
170             // SleepRelativeUsec(125); // one cycle
171             // goto try_block_of_frames;
172
173             // or more advanced, calculate how many cycles we are ahead of 'now' and
174             // base the sleep on that.
175
176             // note that this requires that there is one thread for each IsoHandler,
177             // otherwise we're in the deadlock described above.
178         }
179     }
180     else
181     {
182         // there are enough frames, so check the time they are intended for
183         // all frames have a certain 'time window' in which they can be sent
184         // this corresponds to the range of the timestamp mechanism:
185         // we can send a packet 15 cycles in advance of the 'presentation time'
186         // in theory we can send the packet up till one cycle before the presentation time,
187         // however this is not very smart.
188
189         // There are 3 options:
190         // 1) the frame block is too early
191         //      => send an empty packet
192         // 2) the frame block is within the window
193         //      => send it
194         // 3) the frame block is too late
195         //      => discard (and raise xrun?)
196         //         get next block of frames and repeat
197
198         if(cycles_until_transmit < 0)
199         {
200             // we are too late
201             debugOutput(DEBUG_LEVEL_VERBOSE,
202                         "Too late: CY=%04u, TC=%04u, CUT=%04d, TSP=%011llu (%04u)\n",
203                         CYCLE_TIMER_GET_CYCLES(pkt_ctr),
204                         transmit_at_cycle, cycles_until_transmit,
205                         presentation_time, (unsigned int)TICKS_TO_CYCLES(presentation_time) );
206             //debugShowBackLogLines(200);
207             // however, if we can send this sufficiently before the presentation
208             // time, it could be harmless.
209             // NOTE: dangerous since the device has no way of reporting that it didn't get
210             //       this packet on time.
211             if(cycles_until_presentation >= m_min_cycles_before_presentation)
212             {
213                 // we are not that late and can still try to transmit the packet
214                 m_dbc += fillDataPacketHeader(packet, length, presentation_time);
215                 m_last_timestamp = presentation_time;
216                 return (fc < (signed)(2*m_syt_interval) ? eCRV_Defer : eCRV_Packet);
217             }
218             else   // definitely too late
219             {
220                 return eCRV_XRun;
221             }
222         }
223         else if(cycles_until_transmit <= m_max_cycles_to_transmit_early)
224         {
225             // it's time send the packet
226             m_dbc += fillDataPacketHeader(packet, length, presentation_time);
227             m_last_timestamp = presentation_time;
228
229             // for timestamp tracing
230             debugOutputExtreme(DEBUG_LEVEL_VERY_VERBOSE,
231                                "XMIT PKT: TSP= %011llu (%04u) (%04u) (%04u)\n",
232                                presentation_time,
233                                (unsigned int)CYCLE_TIMER_GET_CYCLES(pkt_ctr),
234                                presentation_cycle, transmit_at_cycle);
235
236             return (fc < (signed)(m_syt_interval) ? eCRV_Defer : eCRV_Packet);
237         }
238         else
239         {
240             debugOutputExtreme(DEBUG_LEVEL_VERY_VERBOSE,
241                                "Too early: CY=%04u, TC=%04u, CUT=%04d, TST=%011llu (%04u), TSP=%011llu (%04u)\n",
242                                CYCLE_TIMER_GET_CYCLES(pkt_ctr),
243                                transmit_at_cycle, cycles_until_transmit,
244                                transmit_at_time, (unsigned int)TICKS_TO_CYCLES(transmit_at_time),
245                                presentation_time, (unsigned int)TICKS_TO_CYCLES(presentation_time));
246 #ifdef DEBUG
247             if ( cycles_until_transmit > m_max_cycles_to_transmit_early + 1 )
248             {
249                 debugOutputExtreme(DEBUG_LEVEL_VERY_VERBOSE,
250                                    "Way too early: CY=%04u, TC=%04u, CUT=%04d, TST=%011llu (%04u), TSP=%011llu (%04u)\n",
251                                    CYCLE_TIMER_GET_CYCLES(pkt_ctr),
252                                    transmit_at_cycle, cycles_until_transmit,
253                                    transmit_at_time, (unsigned int)TICKS_TO_CYCLES(transmit_at_time),
254                                    presentation_time, (unsigned int)TICKS_TO_CYCLES(presentation_time));
255             }
256 #endif
257             // we are too early, send only an empty packet
258             return eCRV_EmptyPacket;
259         }
260     }
261     return eCRV_Invalid;
262 }
263
264 enum StreamProcessor::eChildReturnValue
265 AmdtpTransmitStreamProcessor::generatePacketData (
266     unsigned char *data, unsigned int *length )
267 {
268     if (m_data_buffer->readFrames(m_syt_interval, (char *)(data + 8)))
269     {
270         debugOutputExtreme(DEBUG_LEVEL_VERBOSE,
271                            "XMIT DATA: TSP= %011llu (%04u)\n",
272                            m_last_timestamp,
273                            (unsigned int)TICKS_TO_CYCLES(m_last_timestamp));
274         #if 0
275         // debug code to output the packet content
276         char tmpbuff[8192];
277         int cnt=0;
278         quadlet_t *tmp = (quadlet_t *)((char *)(data + 8));
279
280         for(int i=0; i<m_syt_interval; i++) {
281             cnt += snprintf(tmpbuff + cnt, 8192-cnt, "[%02d] ", i);
282             for(int j=0; j<m_dimension; j++) {
283                 cnt += snprintf(tmpbuff + cnt, 8192-cnt, "%08X ", *tmp);
284                 tmp++;
285             }
286             cnt += snprintf(tmpbuff + cnt, 8192-cnt, "\n");
287         }
288         debugOutput(DEBUG_LEVEL_VERBOSE, "\n%s\n", tmpbuff);
289         #endif
290         return eCRV_OK;
291     }
292     else return eCRV_XRun;
293 }
294
295 enum StreamProcessor::eChildReturnValue
296 AmdtpTransmitStreamProcessor::generateSilentPacketHeader (
297     unsigned char *data, unsigned int *length,
298     unsigned char *tag, unsigned char *sy,
299     uint32_t pkt_ctr )
300 {
301     struct iec61883_packet *packet = ( struct iec61883_packet * ) data;
302     debugOutputExtreme(DEBUG_LEVEL_ULTRA_VERBOSE,
303                        "XMIT SILENT (cy %04d): CY=%04u, TSP=%011llu (%04u)\n",
304                        CYCLE_TIMER_GET_CYCLES(pkt_ctr), m_last_timestamp,
305                        (unsigned int)TICKS_TO_CYCLES(m_last_timestamp));
306
307     packet->sid = m_local_node_id;
308
309     packet->dbs = m_dimension;
310     packet->fn = 0;
311     packet->qpc = 0;
312     packet->sph = 0;
313     packet->reserved = 0;
314     packet->dbc = m_dbc;
315     packet->eoh1 = 2;
316     packet->fmt = IEC61883_FMT_AMDTP;
317
318     *tag = IEC61883_TAG_WITH_CIP;
319     *sy = 0;
320
321     m_dbc += fillNoDataPacketHeader(packet, length);
322     return eCRV_Packet;
323 }
324
325 enum StreamProcessor::eChildReturnValue
326 AmdtpTransmitStreamProcessor::generateSilentPacketData (
327     unsigned char *data, unsigned int *length )
328 {
329     return eCRV_OK; // no need to do anything
330 }
331
332 enum StreamProcessor::eChildReturnValue
333 AmdtpTransmitStreamProcessor::generateEmptyPacketHeader (
334     unsigned char *data, unsigned int *length,
335     unsigned char *tag, unsigned char *sy,
336     uint32_t pkt_ctr )
337 {
338     struct iec61883_packet *packet = ( struct iec61883_packet * ) data;
339     debugOutputExtreme(DEBUG_LEVEL_ULTRA_VERBOSE,
340                        "XMIT EMPTY (cy %04d): CY=%04u, TSP=%011llu (%04u)\n",
341                        CYCLE_TIMER_GET_CYCLES(pkt_ctr), m_last_timestamp,
342                        (unsigned int)TICKS_TO_CYCLES(m_last_timestamp) );
343     packet->sid = m_local_node_id;
344
345     packet->dbs = m_dimension;
346     packet->fn = 0;
347     packet->qpc = 0;
348     packet->sph = 0;
349     packet->reserved = 0;
350     packet->dbc = m_dbc;
351     packet->eoh1 = 2;
352     packet->fmt = IEC61883_FMT_AMDTP;
353
354     *tag = IEC61883_TAG_WITH_CIP;
355     *sy = 0;
356
357     m_dbc += fillNoDataPacketHeader(packet, length);
358     return eCRV_OK;
359 }
360
361 enum StreamProcessor::eChildReturnValue
362 AmdtpTransmitStreamProcessor::generateEmptyPacketData (
363     unsigned char *data, unsigned int *length )
364 {
365     return eCRV_OK; // no need to do anything
366 }
367
368 unsigned int AmdtpTransmitStreamProcessor::fillDataPacketHeader (
369     struct iec61883_packet *packet, unsigned int* length,
370     uint32_t ts )
371 {
372
373     packet->fdf = m_fdf;
374
375     // convert the timestamp to SYT format
376     uint16_t timestamp_SYT = TICKS_TO_SYT ( ts );
377     packet->syt = CondSwapToBus16 ( timestamp_SYT );
378
379     // FIXME: use a precomputed value here
380     *length = m_syt_interval*sizeof ( quadlet_t ) *m_dimension + 8;
381
382     return m_syt_interval;
383 }
384
385 unsigned int AmdtpTransmitStreamProcessor::fillNoDataPacketHeader (
386     struct iec61883_packet *packet, unsigned int* length )
387 {
388     // no-data packets have syt=0xFFFF
389     // and (can) have the usual amount of events as dummy data
390     // DBC is not increased
391     packet->fdf = IEC61883_FDF_NODATA;
392     packet->syt = 0xffff;
393
394 #if AMDTP_ALLOW_PAYLOAD_IN_NODATA_XMIT
395     if ( m_send_nodata_payload )
396     { // no-data packets with payload (NOTE: DICE-II doesn't like that)
397         *length = 2*sizeof ( quadlet_t ) + m_syt_interval * m_dimension * sizeof ( quadlet_t );
398         return m_syt_interval;
399     } else { // no-data packets without payload
400         *length = 2*sizeof ( quadlet_t );
401         return 0;
402     }
403 #else
404     // no-data packets without payload
405     *length = 2*sizeof ( quadlet_t );
406     return 0;
407 #endif
408 }
409
410 unsigned int
411 AmdtpTransmitStreamProcessor::getSytInterval() {
412     switch (m_StreamProcessorManager.getNominalRate()) {
413         case 32000:
414         case 44100:
415         case 48000:
416             return 8;
417         case 88200:
418         case 96000:
419             return 16;
420         case 176400:
421         case 192000:
422             return 32;
423         default:
424             debugError("Unsupported rate: %d\n", m_StreamProcessorManager.getNominalRate());
425             return 0;
426     }
427 }
428
429 unsigned int
430 AmdtpTransmitStreamProcessor::getFDF() {
431     switch (m_StreamProcessorManager.getNominalRate()) {
432         case 32000: return IEC61883_FDF_SFC_32KHZ;
433         case 44100: return IEC61883_FDF_SFC_44K1HZ;
434         case 48000: return IEC61883_FDF_SFC_48KHZ;
435         case 88200: return IEC61883_FDF_SFC_88K2HZ;
436         case 96000: return IEC61883_FDF_SFC_96KHZ;
437         case 176400: return IEC61883_FDF_SFC_176K4HZ;
438         case 192000: return IEC61883_FDF_SFC_192KHZ;
439         default:
440             debugError("Unsupported rate: %d\n", m_StreamProcessorManager.getNominalRate());
441             return 0;
442     }
443 }
444
445 bool AmdtpTransmitStreamProcessor::prepareChild()
446 {
447     debugOutput ( DEBUG_LEVEL_VERBOSE, "Preparing (%p)...\n", this );
448     m_syt_interval = getSytInterval();
449     m_fdf = getFDF();
450
451     debugOutput ( DEBUG_LEVEL_VERBOSE, " SYT interval / FDF             : %d / %d\n", m_syt_interval, m_fdf );
452 #if AMDTP_ALLOW_PAYLOAD_IN_NODATA_XMIT
453     debugOutput ( DEBUG_LEVEL_VERBOSE, " Send payload in No-Data packets: %s \n", m_send_nodata_payload?"Yes":"No" );
454 #endif
455     debugOutput ( DEBUG_LEVEL_VERBOSE, " Max early transmit cycles      : %d\n", m_max_cycles_to_transmit_early );
456     debugOutput ( DEBUG_LEVEL_VERBOSE, " Transfer delay                 : %d\n", m_transmit_transfer_delay );
457     debugOutput ( DEBUG_LEVEL_VERBOSE, " Min cycles before presentation : %d\n", m_min_cycles_before_presentation );
458
459     iec61883_cip_init (
460         &m_cip_status,
461         IEC61883_FMT_AMDTP,
462         m_fdf,
463         m_StreamProcessorManager.getNominalRate(),
464         m_dimension,
465         m_syt_interval );
466
467     if (!initPortCache()) {
468         debugError("Could not init port cache\n");
469         return false;
470     }
471
472     return true;
473 }
474
475 /*
476 * compose the event streams for the packets from the port buffers
477 */
478 bool AmdtpTransmitStreamProcessor::processWriteBlock ( char *data,
479         unsigned int nevents, unsigned int offset )
480 {
481     // update the variable parts of the cache
482     updatePortCache();
483
484     // encode audio data
485     switch(m_StreamProcessorManager.getAudioDataType()) {
486         case StreamProcessorManager::eADT_Int24:
487             encodeAudioPortsInt24((quadlet_t *)data, offset, nevents);
488             break;
489         case StreamProcessorManager::eADT_Float:
490             encodeAudioPortsFloat((quadlet_t *)data, offset, nevents);
491             break;
492     }
493
494     // do midi ports
495     encodeMidiPorts((quadlet_t *)data, offset, nevents);
496     return true;
497 }
498
499 bool
500 AmdtpTransmitStreamProcessor::transmitSilenceBlock(
501     char *data, unsigned int nevents, unsigned int offset)
502 {
503     // no need to update the port cache when transmitting silence since
504     // no dynamic values are used to do so.
505     encodeAudioPortsSilence((quadlet_t *)data, offset, nevents);
506     encodeMidiPortsSilence((quadlet_t *)data, offset, nevents);
507     return true;
508 }
509
510 /**
511  * @brief encodes all audio ports in the cache to events (silent data)
512  * @param data
513  * @param offset
514  * @param nevents
515  */
516 void
517 AmdtpTransmitStreamProcessor::encodeAudioPortsSilence(quadlet_t *data,
518                                                       unsigned int offset,
519                                                       unsigned int nevents)
520 {
521     unsigned int j;
522     quadlet_t *target_event;
523     int i;
524
525     for (i = 0; i < m_nb_audio_ports; i++) {
526         target_event = (quadlet_t *)(data + i);
527
528         for (j = 0;j < nevents; j += 1)
529         {
530             *target_event = CONDSWAPTOBUS32_CONST(0x40000000);
531             target_event += m_dimension;
532         }
533     }
534 }
535
536 #ifdef __SSE2__
537 #include <emmintrin.h>
538 #warning SSE2 build
539
540 /**
541  * @brief mux all audio ports to events
542  * @param data
543  * @param offset
544  * @param nevents
545  */
546 void
547 AmdtpTransmitStreamProcessor::encodeAudioPortsFloat(quadlet_t *data,
548                                                     unsigned int offset,
549                                                     unsigned int nevents)
550 {
551     unsigned int j;
552     quadlet_t *target_event;
553     int i;
554
555     float * client_buffers[4];
556     float tmp_values[4] __attribute__ ((aligned (16)));
557     uint32_t tmp_values_int[4] __attribute__ ((aligned (16)));
558
559     // prepare the scratch buffer
560     assert(m_scratch_buffer_size_bytes > nevents * 4);
561     memset(m_scratch_buffer, 0, nevents * 4);
562
563     const __m128i label = _mm_set_epi32 (0x40000000, 0x40000000, 0x40000000, 0x40000000);
564     const __m128i mask = _mm_set_epi32 (0x00FFFFFF, 0x00FFFFFF, 0x00FFFFFF, 0x00FFFFFF);
565     const __m128 mult = _mm_set_ps(AMDTP_FLOAT_MULTIPLIER, AMDTP_FLOAT_MULTIPLIER, AMDTP_FLOAT_MULTIPLIER, AMDTP_FLOAT_MULTIPLIER);
566
567 #if AMDTP_CLIP_FLOATS
568     const __m128 v_max = _mm_set_ps(1.0, 1.0, 1.0, 1.0);
569     const __m128 v_min = _mm_set_ps(-1.0, -1.0, -1.0, -1.0);
570 #endif
571
572     // this assumes that audio ports are sorted by position,
573     // and that there are no gaps
574     for (i = 0; i < ((int)m_nb_audio_ports)-4; i += 4) {
575         struct _MBLA_port_cache *p;
576
577         // get the port buffers
578         for (j=0; j<4; j++) {
579             p = &(m_audio_ports.at(i+j));
580             if(likely(p->buffer && p->enabled)) {
581                 client_buffers[j] = (float *) p->buffer;
582                 client_buffers[j] += offset;
583             } else {
584                 // if a port is disabled or has no valid
585                 // buffer, use the scratch buffer (all zero's)
586                 client_buffers[j] = (float *) m_scratch_buffer;
587             }
588         }
589
590         // the base event for this position
591         target_event = (quadlet_t *)(data + i);
592         // process the events
593         for (j=0;j < nevents; j += 1)
594         {
595             // read the values
596             tmp_values[0] = *(client_buffers[0]);
597             tmp_values[1] = *(client_buffers[1]);
598             tmp_values[2] = *(client_buffers[2]);
599             tmp_values[3] = *(client_buffers[3]);
600
601             // now do the SSE based conversion/labeling
602             __m128 v_float = *((__m128*)tmp_values);
603             __m128i *target = (__m128i*)target_event;
604             __m128i v_int;
605
606             // clip
607 #if AMDTP_CLIP_FLOATS
608             // do SSE clipping
609             v_float = _mm_max_ps(v_float, v_min);
610             v_float = _mm_min_ps(v_float, v_max);
611 #endif
612
613             // multiply
614             v_float = _mm_mul_ps(v_float, mult);
615             // convert to signed integer
616             v_int = _mm_cvttps_epi32( v_float );
617             // mask
618             v_int = _mm_and_si128( v_int, mask );
619             // label it
620             v_int = _mm_or_si128( v_int, label );
621
622             // do endian conversion (SSE is always little endian)
623             // do first swap
624             v_int = _mm_or_si128( _mm_slli_epi16( v_int, 8 ), _mm_srli_epi16( v_int, 8 ) );
625             // do second swap
626             v_int = _mm_or_si128( _mm_slli_epi32( v_int, 16 ), _mm_srli_epi32( v_int, 16 ) );
627             // store the packed int
628             // (target misalignment is assumed since we don't know the m_dimension)
629             _mm_storeu_si128 (target, v_int);
630
631             // increment the buffer pointers
632             client_buffers[0]++;
633             client_buffers[1]++;
634             client_buffers[2]++;
635             client_buffers[3]++;
636
637             // go to next target event position
638             target_event += m_dimension;
639         }
640     }
641
642     // do remaining ports
643     // NOTE: these can be time-SSE'd
644     for (; i < (int)m_nb_audio_ports; i++) {
645         struct _MBLA_port_cache &p = m_audio_ports.at(i);
646         target_event = (quadlet_t *)(data + i);
647         assert(nevents + offset <= p.buffer_size );
648
649         if(likely(p.buffer && p.enabled)) {
650             float *buffer = (float *)(p.buffer);
651             buffer += offset;
652    
653             for (j = 0;j < nevents; j += 4)
654             {
655                 // read the values
656                 tmp_values[0] = *buffer;
657                 buffer++;
658                 tmp_values[1] = *buffer;
659                 buffer++;
660                 tmp_values[2] = *buffer;
661                 buffer++;
662                 tmp_values[3] = *buffer;
663                 buffer++;
664
665                 // now do the SSE based conversion/labeling
666                 __m128 v_float = *((__m128*)tmp_values);
667                 __m128i v_int;
668
669 #if AMDTP_CLIP_FLOATS
670                 // do SSE clipping
671                 v_float = _mm_max_ps(v_float, v_min);
672                 v_float = _mm_min_ps(v_float, v_max);
673 #endif
674                 // multiply
675                 v_float = _mm_mul_ps(v_float, mult);
676                 // convert to signed integer
677                 v_int = _mm_cvttps_epi32( v_float );
678                 // mask
679                 v_int = _mm_and_si128( v_int, mask );
680                 // label it
681                 v_int = _mm_or_si128( v_int, label );
682    
683                 // do endian conversion (SSE is always little endian)
684                 // do first swap
685                 v_int = _mm_or_si128( _mm_slli_epi16( v_int, 8 ), _mm_srli_epi16( v_int, 8 ) );
686                 // do second swap
687                 v_int = _mm_or_si128( _mm_slli_epi32( v_int, 16 ), _mm_srli_epi32( v_int, 16 ) );
688
689                 // store the packed int
690                 _mm_store_si128 ((__m128i *)(&tmp_values_int), v_int);
691
692                 // increment the buffer pointers
693                 *target_event = tmp_values_int[0];
694                 target_event += m_dimension;
695                 *target_event = tmp_values_int[1];
696                 target_event += m_dimension;
697                 *target_event = tmp_values_int[2];
698                 target_event += m_dimension;
699                 *target_event = tmp_values_int[3];
700                 target_event += m_dimension;
701             }
702
703             // do the remainder of the events
704             for(;j < nevents; j += 1) {
705                 float *in = (float *)buffer;
706 #if AMDTP_CLIP_FLOATS
707                 // clip directly to the value of a maxed event
708                 if(unlikely(*in > 1.0)) {
709                     *target_event = CONDSWAPTOBUS32_CONST(0x407FFFFF);
710                 } else if(unlikely(*in < -1.0)) {
711                     *target_event = CONDSWAPTOBUS32_CONST(0x40800001);
712                 } else {
713                     float v = (*in) * AMDTP_FLOAT_MULTIPLIER;
714                     unsigned int tmp = ((int) v);
715                     tmp = ( tmp & 0x00FFFFFF ) | 0x40000000;
716                     *target_event = CondSwapToBus32((quadlet_t)tmp);
717                 }
718 #else
719                 float v = (*in) * AMDTP_FLOAT_MULTIPLIER;
720                 unsigned int tmp = ((int) v);
721                 tmp = ( tmp & 0x00FFFFFF ) | 0x40000000;
722                 *target_event = CondSwapToBus32((quadlet_t)tmp);
723 #endif
724                 buffer++;
725                 target_event += m_dimension;
726             }
727
728         } else {
729             for (j = 0;j < nevents; j += 1)
730             {
731                 // hardcoded byte swapped
732                 *target_event = 0x00000040;
733                 target_event += m_dimension;
734             }
735         }
736     }
737 }
738
739
740 /**
741  * @brief mux all audio ports to events
742  * @param data
743  * @param offset
744  * @param nevents
745  */
746 void
747 AmdtpTransmitStreamProcessor::encodeAudioPortsInt24(quadlet_t *data,
748                                                     unsigned int offset,
749                                                     unsigned int nevents)
750 {
751     unsigned int j;
752     quadlet_t *target_event;
753     int i;
754
755     uint32_t *client_buffers[4];
756     uint32_t tmp_values[4] __attribute__ ((aligned (16)));
757
758     // prepare the scratch buffer
759     assert(m_scratch_buffer_size_bytes > nevents * 4);
760     memset(m_scratch_buffer, 0, nevents * 4);
761
762     const __m128i label = _mm_set_epi32 (0x40000000, 0x40000000, 0x40000000, 0x40000000);
763     const __m128i mask  = _mm_set_epi32 (0x00FFFFFF, 0x00FFFFFF, 0x00FFFFFF, 0x00FFFFFF);
764
765     // this assumes that audio ports are sorted by position,
766     // and that there are no gaps
767     for (i = 0; i < ((int)m_nb_audio_ports)-4; i += 4) {
768         struct _MBLA_port_cache *p;
769
770         // get the port buffers
771         for (j=0; j<4; j++) {
772             p = &(m_audio_ports.at(i+j));
773             if(likely(p->buffer && p->enabled)) {
774                 client_buffers[j] = (uint32_t *) p->buffer;
775                 client_buffers[j] += offset;
776             } else {
777                 // if a port is disabled or has no valid
778                 // buffer, use the scratch buffer (all zero's)
779                 client_buffers[j] = (uint32_t *) m_scratch_buffer;
780             }
781         }
782
783         // the base event for this position
784         target_event = (quadlet_t *)(data + i);
785
786         // process the events
787         for (j=0;j < nevents; j += 1)
788         {
789             // read the values
790             tmp_values[0] = *(client_buffers[0]);
791             tmp_values[1] = *(client_buffers[1]);
792             tmp_values[2] = *(client_buffers[2]);
793             tmp_values[3] = *(client_buffers[3]);
794
795             // now do the SSE based conversion/labeling
796             __m128i *target = (__m128i*)target_event;
797             __m128i v_int = *((__m128i*)tmp_values);;
798
799             // mask
800             v_int = _mm_and_si128( v_int, mask );
801             // label it
802             v_int = _mm_or_si128( v_int, label );
803
804             // do endian conversion (SSE is always little endian)
805             // do first swap
806             v_int = _mm_or_si128( _mm_slli_epi16( v_int, 8 ), _mm_srli_epi16( v_int, 8 ) );
807             // do second swap
808             v_int = _mm_or_si128( _mm_slli_epi32( v_int, 16 ), _mm_srli_epi32( v_int, 16 ) );
809
810             // store the packed int
811             // (target misalignment is assumed since we don't know the m_dimension)
812             _mm_storeu_si128 (target, v_int);
813
814             // increment the buffer pointers
815             client_buffers[0]++;
816             client_buffers[1]++;
817             client_buffers[2]++;
818             client_buffers[3]++;
819
820             // go to next target event position
821             target_event += m_dimension;
822         }
823     }
824
825     // do remaining ports
826     // NOTE: these can be time-SSE'd
827     for (; i < ((int)m_nb_audio_ports); i++) {
828         struct _MBLA_port_cache &p = m_audio_ports.at(i);
829         target_event = (quadlet_t *)(data + i);
830         assert(nevents + offset <= p.buffer_size );
831
832         if(likely(p.buffer && p.enabled)) {
833             uint32_t *buffer = (uint32_t *)(p.buffer);
834             buffer += offset;
835    
836             for (j = 0;j < nevents; j += 4)
837             {
838                 // read the values
839                 tmp_values[0] = *buffer;
840                 buffer++;
841                 tmp_values[1] = *buffer;
842                 buffer++;
843                 tmp_values[2] = *buffer;
844                 buffer++;
845                 tmp_values[3] = *buffer;
846                 buffer++;
847
848                 // now do the SSE based conversion/labeling
849                 __m128i v_int = *((__m128i*)tmp_values);;
850
851                 // mask
852                 v_int = _mm_and_si128( v_int, mask );
853                 // label it
854                 v_int = _mm_or_si128( v_int, label );
855
856                 // do endian conversion (SSE is always little endian)
857                 // do first swap
858                 v_int = _mm_or_si128( _mm_slli_epi16( v_int, 8 ), _mm_srli_epi16( v_int, 8 ) );
859                 // do second swap
860                 v_int = _mm_or_si128( _mm_slli_epi32( v_int, 16 ), _mm_srli_epi32( v_int, 16 ) );
861
862                 // store the packed int
863                 _mm_store_si128 ((__m128i *)(&tmp_values), v_int);
864
865                 // increment the buffer pointers
866                 *target_event = tmp_values[0];
867                 target_event += m_dimension;
868                 *target_event = tmp_values[1];
869                 target_event += m_dimension;
870                 *target_event = tmp_values[2];
871                 target_event += m_dimension;
872                 *target_event = tmp_values[3];
873                 target_event += m_dimension;
874             }
875
876             // do the remainder of the events
877             for(;j < nevents; j += 1) {
878                 uint32_t in = (uint32_t)(*buffer);
879                 *target_event = CondSwapToBus32((quadlet_t)((in & 0x00FFFFFF) | 0x40000000));
880                 buffer++;
881                 target_event += m_dimension;
882             }
883
884         } else {
885             for (j = 0;j < nevents; j += 1)
886             {
887                 // hardcoded byte swapped
888                 *target_event = 0x00000040;
889                 target_event += m_dimension;
890             }
891         }
892     }
893 }
894
895 #else
896
897 /**
898  * @brief mux all audio ports to events
899  * @param data
900  * @param offset
901  * @param nevents
902  */
903 void
904 AmdtpTransmitStreamProcessor::encodeAudioPortsInt24(quadlet_t *data,
905                                                     unsigned int offset,
906                                                     unsigned int nevents)
907 {
908     unsigned int j;
909     quadlet_t *target_event;
910     int i;
911
912     for (i = 0; i < m_nb_audio_ports; i++) {
913         struct _MBLA_port_cache &p = m_audio_ports.at(i);
914         target_event = (quadlet_t *)(data + i);
915         assert(nevents + offset <= p.buffer_size );
916
917         if(likely(p.buffer && p.enabled)) {
918             quadlet_t *buffer = (quadlet_t *)(p.buffer);
919             buffer += offset;
920    
921             for (j = 0;j < nevents; j += 1)
922             {
923                 uint32_t in = (uint32_t)(*buffer);
924                 *target_event = CondSwapToBus32((quadlet_t)((in & 0x00FFFFFF) | 0x40000000));
925                 buffer++;
926                 target_event += m_dimension;
927             }
928         } else {
929             for (j = 0;j < nevents; j += 1)
930             {
931                 *target_event = CONDSWAPTOBUS32_CONST(0x40000000);
932                 target_event += m_dimension;
933             }
934         }
935     }
936 }
937
938 /**
939  * @brief mux all audio ports to events
940  * @param data
941  * @param offset
942  * @param nevents
943  */
944 void
945 AmdtpTransmitStreamProcessor::encodeAudioPortsFloat(quadlet_t *data,
946                                                     unsigned int offset,
947                                                     unsigned int nevents)
948 {
949     unsigned int j;
950     quadlet_t *target_event;
951     int i;
952
953     for (i = 0; i < m_nb_audio_ports; i++) {
954         struct _MBLA_port_cache &p = m_audio_ports.at(i);
955         target_event = (quadlet_t *)(data + i);
956         assert(nevents + offset <= p.buffer_size );
957
958         if(likely(p.buffer && p.enabled)) {
959             quadlet_t *buffer = (quadlet_t *)(p.buffer);
960             buffer += offset;
961    
962             for (j = 0;j < nevents; j += 1)
963             {
964                 float *in = (float *)buffer;
965 #if AMDTP_CLIP_FLOATS
966                 // clip directly to the value of a maxed event
967                 if(unlikely(*in > 1.0)) {
968                     *target_event = CONDSWAPTOBUS32_CONST(0x407FFFFF);
969                 } else if(unlikely(*in < -1.0)) {
970                     *target_event = CONDSWAPTOBUS32_CONST(0x40800001);
971                 } else {
972                     float v = (*in) * AMDTP_FLOAT_MULTIPLIER;
973                     unsigned int tmp = ((int) v);
974                     tmp = ( tmp & 0x00FFFFFF ) | 0x40000000;
975                     *target_event = CondSwapToBus32((quadlet_t)tmp);
976                 }
977 #else
978                 float v = (*in) * AMDTP_FLOAT_MULTIPLIER;
979                 unsigned int tmp = ((int) v);
980                 tmp = ( tmp & 0x00FFFFFF ) | 0x40000000;
981                 *target_event = CondSwapToBus32((quadlet_t)tmp);
982 #endif
983                 buffer++;
984                 target_event += m_dimension;
985             }
986         } else {
987             for (j = 0;j < nevents; j += 1)
988             {
989                 *target_event = CONDSWAPTOBUS32_CONST(0x40000000);
990                 target_event += m_dimension;
991             }
992         }
993     }
994 }
995 #endif
996
997 /**
998  * @brief encodes all midi ports in the cache to events (silence)
999  * @param data
1000  * @param offset
1001  * @param nevents
1002  */
1003 void
1004 AmdtpTransmitStreamProcessor::encodeMidiPortsSilence(quadlet_t *data,
1005                                                      unsigned int offset,
1006                                                      unsigned int nevents)
1007 {
1008     quadlet_t *target_event;
1009     int i;
1010     unsigned int j;
1011
1012     for (i = 0; i < m_nb_midi_ports; i++) {
1013         struct _MIDI_port_cache &p = m_midi_ports.at(i);
1014
1015         for (j = p.location;j < nevents; j += 8) {
1016             target_event = (quadlet_t *) (data + ((j * m_dimension) + p.position));
1017             *target_event = CondSwapToBus32(IEC61883_AM824_SET_LABEL(0, IEC61883_AM824_LABEL_MIDI_NO_DATA));
1018         }
1019     }
1020 }
1021
1022 /**
1023  * @brief encodes all midi ports in the cache to events
1024  * @param data
1025  * @param offset
1026  * @param nevents
1027  */
1028 void
1029 AmdtpTransmitStreamProcessor::encodeMidiPorts(quadlet_t *data,
1030                                               unsigned int offset,
1031                                               unsigned int nevents)
1032 {
1033     quadlet_t *target_event;
1034     int i;
1035     unsigned int j;
1036
1037     for (i = 0; i < m_nb_midi_ports; i++) {
1038         struct _MIDI_port_cache &p = m_midi_ports.at(i);
1039         if (p.buffer && p.enabled) {
1040             uint32_t *buffer = (quadlet_t *)(p.buffer);
1041             buffer += offset;
1042
1043             for (j = p.location;j < nevents; j += 8) {
1044                 target_event = (quadlet_t *) (data + ((j * m_dimension) + p.position));
1045
1046                 if ( *buffer & 0xFF000000 )   // we can send a byte
1047                 {
1048                     quadlet_t tmpval;
1049                     tmpval = ((*buffer)<<16) & 0x00FF0000;
1050                     tmpval = IEC61883_AM824_SET_LABEL(tmpval, IEC61883_AM824_LABEL_MIDI_1X);
1051                     *target_event = CondSwapToBus32(tmpval);
1052
1053                     debugOutputExtreme( DEBUG_LEVEL_VERBOSE, "MIDI port %s, pos=%u, loc=%u, nevents=%u, dim=%d\n",
1054                                p.port->getName().c_str(), p.position, p.location, nevents, m_dimension );
1055                     debugOutputExtreme( DEBUG_LEVEL_VERBOSE, "base=%p, target=%p, value=%08X\n",
1056                                data, target_event, tmpval );
1057                 } else {
1058                     // can't send a byte, either because there is no byte,
1059                     // or because this would exceed the maximum rate
1060                     // FIXME: this can be ifdef optimized since it's a constant
1061                     *target_event = CondSwapToBus32(IEC61883_AM824_SET_LABEL(0, IEC61883_AM824_LABEL_MIDI_NO_DATA));
1062                 }
1063                 buffer+=8;
1064             }
1065         } else {
1066             for (j = p.location;j < nevents; j += 8) {
1067                 target_event = (quadlet_t *)(data + ((j * m_dimension) + p.position));
1068                 __builtin_prefetch(target_event, 1, 0); // prefetch events for write, no temporal locality
1069                 *target_event = CondSwapToBus32(IEC61883_AM824_SET_LABEL(0, IEC61883_AM824_LABEL_MIDI_NO_DATA));
1070             }
1071         }
1072     }
1073 }
1074
1075 bool
1076 AmdtpTransmitStreamProcessor::initPortCache() {
1077     // make use of the fact that audio ports are the first ports in
1078     // the cluster as per AMDTP. so we can sort the ports by position
1079     // and have very efficient lookups:
1080     // m_float_ports.at(i).buffer -> audio stream i buffer
1081     // for midi ports we simply cache all port info since they are (usually) not
1082     // that numerous
1083     m_nb_audio_ports = 0;
1084     m_audio_ports.clear();
1085    
1086     m_nb_midi_ports = 0;
1087     m_midi_ports.clear();
1088    
1089     for(PortVectorIterator it = m_Ports.begin();
1090         it != m_Ports.end();
1091         ++it )
1092     {
1093         AmdtpPortInfo *pinfo=dynamic_cast<AmdtpPortInfo *>(*it);
1094         assert(pinfo); // this should not fail!!
1095
1096         switch( pinfo->getFormat() )
1097         {
1098             case AmdtpPortInfo::E_MBLA:
1099                 m_nb_audio_ports++;
1100                 break;
1101             case AmdtpPortInfo::E_SPDIF: // still unimplemented
1102                 break;
1103             case AmdtpPortInfo::E_Midi:
1104                 m_nb_midi_ports++;
1105                 break;
1106             default: // ignore
1107                 break;
1108         }
1109     }
1110
1111     int idx;
1112     for (idx = 0; idx < m_nb_audio_ports; idx++) {
1113         for(PortVectorIterator it = m_Ports.begin();
1114             it != m_Ports.end();
1115             ++it )
1116         {
1117             AmdtpPortInfo *pinfo=dynamic_cast<AmdtpPortInfo *>(*it);
1118             debugOutput(DEBUG_LEVEL_VERY_VERBOSE,
1119                         "idx %u: looking at port %s at position %u\n",
1120                         idx, (*it)->getName().c_str(), pinfo->getPosition());
1121             if(pinfo->getPosition() == (unsigned int)idx) {
1122                 struct _MBLA_port_cache p;
1123                 p.port = dynamic_cast<AmdtpAudioPort *>(*it);
1124                 if(p.port == NULL) {
1125                     debugError("Port is not an AmdtpAudioPort!\n");
1126                     return false;
1127                 }
1128                 p.buffer = NULL; // to be filled by updatePortCache
1129                 #ifdef DEBUG
1130                 p.buffer_size = (*it)->getBufferSize();
1131                 #endif
1132
1133                 m_audio_ports.push_back(p);
1134                 debugOutput(DEBUG_LEVEL_VERBOSE,
1135                             "Cached port %s at position %u\n",
1136                             p.port->getName().c_str(), idx);
1137                 goto next_index;
1138             }
1139         }
1140         debugError("No MBLA port found for position %d\n", idx);
1141         return false;
1142 next_index:
1143         continue;
1144     }
1145
1146     for(PortVectorIterator it = m_Ports.begin();
1147         it != m_Ports.end();
1148         ++it )
1149     {
1150         AmdtpPortInfo *pinfo=dynamic_cast<AmdtpPortInfo *>(*it);
1151         debugOutput(DEBUG_LEVEL_VERY_VERBOSE,
1152                     "idx %u: looking at port %s at position %u, location %u\n",
1153                     idx, (*it)->getName().c_str(), pinfo->getPosition(), pinfo->getLocation());
1154         if ((*it)->getPortType() == Port::E_Midi) {
1155             struct _MIDI_port_cache p;
1156             p.port = dynamic_cast<AmdtpMidiPort *>(*it);
1157             if(p.port == NULL) {
1158                 debugError("Port is not an AmdtpMidiPort!\n");
1159                 return false;
1160             }
1161             p.position = pinfo->getPosition();
1162             p.location = pinfo->getLocation();
1163             p.buffer = NULL; // to be filled by updatePortCache
1164             #ifdef DEBUG
1165             p.buffer_size = (*it)->getBufferSize();
1166             #endif
1167
1168             m_midi_ports.push_back(p);
1169             debugOutput(DEBUG_LEVEL_VERBOSE,
1170                         "Cached port %s at position %u, location %u\n",
1171                         p.port->getName().c_str(), p.position, p.location);
1172         }
1173     }
1174
1175     return true;
1176 }
1177
1178 void
1179 AmdtpTransmitStreamProcessor::updatePortCache() {
1180     int idx;
1181     for (idx = 0; idx < m_nb_audio_ports; idx++) {
1182         struct _MBLA_port_cache& p = m_audio_ports.at(idx);
1183         AmdtpAudioPort *port = p.port;
1184         p.buffer = port->getBufferAddress();
1185         p.enabled = !port->isDisabled();
1186     }
1187     for (idx = 0; idx < m_nb_midi_ports; idx++) {
1188         struct _MIDI_port_cache& p = m_midi_ports.at(idx);
1189         AmdtpMidiPort *port = p.port;
1190         p.buffer = port->getBufferAddress();
1191         p.enabled = !port->isDisabled();
1192     }
1193 }
1194
1195 } // end of namespace Streaming
Note: See TracBrowser for help on using the browser.