root/branches/libffado-2.0/src/libstreaming/amdtp/AmdtpTransmitStreamProcessor.cpp

Revision 1344, 40.0 kB (checked in by ppalmers, 12 years ago)

switch back to a sleep based period signalling scheme to ensure proper wakeup timing

Line 
1 /*
2  * Copyright (C) 2005-2008 by Pieter Palmers
3  *
4  * This file is part of FFADO
5  * FFADO = Free Firewire (pro-)audio drivers for linux
6  *
7  * FFADO is based upon FreeBoB.
8  *
9  * This program is free software: you can redistribute it and/or modify
10  * it under the terms of the GNU General Public License as published by
11  * the Free Software Foundation, either version 2 of the License, or
12  * (at your option) version 3 of the License.
13  *
14  * This program is distributed in the hope that it will be useful,
15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17  * GNU General Public License for more details.
18  *
19  * You should have received a copy of the GNU General Public License
20  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
21  *
22  */
23
24 #include "config.h"
25 #include "AmdtpTransmitStreamProcessor.h"
26 #include "AmdtpPort.h"
27 #include "../StreamProcessorManager.h"
28 #include "devicemanager.h"
29
30 #include "libutil/Time.h"
31 #include "libutil/float_cast.h"
32
33 #include "libieee1394/ieee1394service.h"
34 #include "libieee1394/IsoHandlerManager.h"
35 #include "libieee1394/cycletimer.h"
36
37 #include "libutil/ByteSwap.h"
38 #include <assert.h>
39 #include <cstring>
40
41 #define AMDTP_FLOAT_MULTIPLIER 2147483392.0
42
43 namespace Streaming
44 {
45
46 /* transmit */
47 AmdtpTransmitStreamProcessor::AmdtpTransmitStreamProcessor(FFADODevice &parent, int dimension)
48         : StreamProcessor(parent, ePT_Transmit)
49         , m_dimension( dimension )
50         , m_dbc( 0 )
51 #if AMDTP_ALLOW_PAYLOAD_IN_NODATA_XMIT
52         , m_send_nodata_payload ( AMDTP_SEND_PAYLOAD_IN_NODATA_XMIT_BY_DEFAULT )
53 #endif
54         , m_nb_audio_ports( 0 )
55         , m_nb_midi_ports( 0 )
56 {}
57
58 enum StreamProcessor::eChildReturnValue
59 AmdtpTransmitStreamProcessor::generatePacketHeader (
60     unsigned char *data, unsigned int *length,
61     unsigned char *tag, unsigned char *sy,
62     uint32_t pkt_ctr )
63 {
64     __builtin_prefetch(data, 1, 0); // prefetch events for write, no temporal locality
65     struct iec61883_packet *packet = (struct iec61883_packet *)data;
66     /* Our node ID can change after a bus reset, so it is best to fetch
67     * our node ID for each packet. */
68     packet->sid = m_local_node_id;
69
70     packet->dbs = m_dimension;
71     packet->fn = 0;
72     packet->qpc = 0;
73     packet->sph = 0;
74     packet->reserved = 0;
75     packet->dbc = m_dbc;
76     packet->eoh1 = 2;
77     packet->fmt = IEC61883_FMT_AMDTP;
78
79     *tag = IEC61883_TAG_WITH_CIP;
80     *sy = 0;
81
82     signed int fc;
83     uint64_t presentation_time;
84     unsigned int presentation_cycle;
85     int cycles_until_presentation;
86
87     uint64_t transmit_at_time;
88     unsigned int transmit_at_cycle;
89     int cycles_until_transmit;
90
91     debugOutputExtreme( DEBUG_LEVEL_ULTRA_VERBOSE,
92                         "Try for cycle %d\n", CYCLE_TIMER_GET_CYCLES(pkt_ctr) );
93     // check whether the packet buffer has packets for us to send.
94     // the base timestamp is the one of the next sample in the buffer
95     ffado_timestamp_t ts_head_tmp;
96     m_data_buffer->getBufferHeadTimestamp( &ts_head_tmp, &fc ); // thread safe
97
98     // the timestamp gives us the time at which we want the sample block
99     // to be output by the device
100     presentation_time = ( uint64_t ) ts_head_tmp;
101
102     // now we calculate the time when we have to transmit the sample block
103     transmit_at_time = substractTicks( presentation_time, AMDTP_TRANSMIT_TRANSFER_DELAY );
104
105     // calculate the cycle this block should be presented in
106     // (this is just a virtual calculation since at that time it should
107     //  already be in the device's buffer)
108     presentation_cycle = ( unsigned int ) ( TICKS_TO_CYCLES ( presentation_time ) );
109
110     // calculate the cycle this block should be transmitted in
111     transmit_at_cycle = ( unsigned int ) ( TICKS_TO_CYCLES ( transmit_at_time ) );
112
113     // we can check whether this cycle is within the 'window' we have
114     // to send this packet.
115     // first calculate the number of cycles left before presentation time
116     cycles_until_presentation = diffCycles ( presentation_cycle, CYCLE_TIMER_GET_CYCLES(pkt_ctr) );
117
118     // we can check whether this cycle is within the 'window' we have
119     // to send this packet.
120     // first calculate the number of cycles left before presentation time
121     cycles_until_transmit = diffCycles ( transmit_at_cycle, CYCLE_TIMER_GET_CYCLES(pkt_ctr) );
122
123     // two different options:
124     // 1) there are not enough frames for one packet
125     //      => determine wether this is a problem, since we might still
126     //         have some time to send it
127     // 2) there are enough packets
128     //      => determine whether we have to send them in this packet
129     if ( fc < ( signed int ) m_syt_interval )
130     {
131         // not enough frames in the buffer,
132
133         // we can still postpone the queueing of the packets
134         // if we are far enough ahead of the presentation time
135         if ( cycles_until_presentation <= AMDTP_MIN_CYCLES_BEFORE_PRESENTATION )
136         {
137             debugOutput( DEBUG_LEVEL_NORMAL,
138                          "Insufficient frames (P): N=%02d, CY=%04u, TC=%04u, CUT=%04d\n",
139                          fc, CYCLE_TIMER_GET_CYCLES(pkt_ctr),
140                          transmit_at_cycle, cycles_until_transmit );
141             // we are too late
142             return eCRV_XRun;
143         }
144         else
145         {
146             #if DEBUG_EXTREME
147             unsigned int now_cycle = ( unsigned int ) ( TICKS_TO_CYCLES ( m_1394service.getCycleTimerTicks() ) );
148
149             debugOutputExtreme(DEBUG_LEVEL_VERBOSE,
150                                "Insufficient frames (NP): N=%02d, CY=%04u, TC=%04u, CUT=%04d, NOW=%04d\n",
151                                fc, CYCLE_TIMER_GET_CYCLES(pkt_ctr),
152                                transmit_at_cycle, cycles_until_transmit, now_cycle );
153             #endif
154
155             // there is still time left to send the packet
156             // we want the system to give this packet another go at a later time instant
157             return eCRV_Again; // note that the raw1394 again system doesn't work as expected
158
159             // we could wait here for a certain time before trying again. However, this
160             // is not going to work since we then block the iterator thread, hence also
161             // the receiving code, meaning that we are not processing received packets,
162             // and hence there is no progression in the number of frames available.
163
164             // for example:
165             // SleepRelativeUsec(125); // one cycle
166             // goto try_block_of_frames;
167
168             // or more advanced, calculate how many cycles we are ahead of 'now' and
169             // base the sleep on that.
170
171             // note that this requires that there is one thread for each IsoHandler,
172             // otherwise we're in the deadlock described above.
173         }
174     }
175     else
176     {
177         // there are enough frames, so check the time they are intended for
178         // all frames have a certain 'time window' in which they can be sent
179         // this corresponds to the range of the timestamp mechanism:
180         // we can send a packet 15 cycles in advance of the 'presentation time'
181         // in theory we can send the packet up till one cycle before the presentation time,
182         // however this is not very smart.
183
184         // There are 3 options:
185         // 1) the frame block is too early
186         //      => send an empty packet
187         // 2) the frame block is within the window
188         //      => send it
189         // 3) the frame block is too late
190         //      => discard (and raise xrun?)
191         //         get next block of frames and repeat
192
193         if(cycles_until_transmit < 0)
194         {
195             // we are too late
196             debugOutput(DEBUG_LEVEL_VERBOSE,
197                         "Too late: CY=%04u, TC=%04u, CUT=%04d, TSP=%011llu (%04u)\n",
198                         CYCLE_TIMER_GET_CYCLES(pkt_ctr),
199                         transmit_at_cycle, cycles_until_transmit,
200                         presentation_time, (unsigned int)TICKS_TO_CYCLES(presentation_time) );
201             //debugShowBackLogLines(200);
202             // however, if we can send this sufficiently before the presentation
203             // time, it could be harmless.
204             // NOTE: dangerous since the device has no way of reporting that it didn't get
205             //       this packet on time.
206             if(cycles_until_presentation >= AMDTP_MIN_CYCLES_BEFORE_PRESENTATION)
207             {
208                 // we are not that late and can still try to transmit the packet
209                 m_dbc += fillDataPacketHeader(packet, length, presentation_time);
210                 m_last_timestamp = presentation_time;
211                 return (fc < (signed)(2*m_syt_interval) ? eCRV_Defer : eCRV_Packet);
212             }
213             else   // definitely too late
214             {
215                 return eCRV_XRun;
216             }
217         }
218         else if(cycles_until_transmit <= AMDTP_MAX_CYCLES_TO_TRANSMIT_EARLY)
219         {
220             // it's time send the packet
221             m_dbc += fillDataPacketHeader(packet, length, presentation_time);
222             m_last_timestamp = presentation_time;
223
224             // for timestamp tracing
225             debugOutputExtreme(DEBUG_LEVEL_VERY_VERBOSE,
226                                "XMIT PKT: TSP= %011llu (%04u) (%04u) (%04u)\n",
227                                presentation_time,
228                                (unsigned int)CYCLE_TIMER_GET_CYCLES(pkt_ctr),
229                                presentation_cycle, transmit_at_cycle);
230
231             return (fc < (signed)(m_syt_interval) ? eCRV_Defer : eCRV_Packet);
232         }
233         else
234         {
235             debugOutputExtreme(DEBUG_LEVEL_VERY_VERBOSE,
236                                "Too early: CY=%04u, TC=%04u, CUT=%04d, TST=%011llu (%04u), TSP=%011llu (%04u)\n",
237                                CYCLE_TIMER_GET_CYCLES(pkt_ctr),
238                                transmit_at_cycle, cycles_until_transmit,
239                                transmit_at_time, (unsigned int)TICKS_TO_CYCLES(transmit_at_time),
240                                presentation_time, (unsigned int)TICKS_TO_CYCLES(presentation_time));
241 #ifdef DEBUG
242             if ( cycles_until_transmit > AMDTP_MAX_CYCLES_TO_TRANSMIT_EARLY + 1 )
243             {
244                 debugOutputExtreme(DEBUG_LEVEL_VERY_VERBOSE,
245                                    "Way too early: CY=%04u, TC=%04u, CUT=%04d, TST=%011llu (%04u), TSP=%011llu (%04u)\n",
246                                    CYCLE_TIMER_GET_CYCLES(pkt_ctr),
247                                    transmit_at_cycle, cycles_until_transmit,
248                                    transmit_at_time, (unsigned int)TICKS_TO_CYCLES(transmit_at_time),
249                                    presentation_time, (unsigned int)TICKS_TO_CYCLES(presentation_time));
250             }
251 #endif
252             // we are too early, send only an empty packet
253             return eCRV_EmptyPacket;
254         }
255     }
256     return eCRV_Invalid;
257 }
258
259 enum StreamProcessor::eChildReturnValue
260 AmdtpTransmitStreamProcessor::generatePacketData (
261     unsigned char *data, unsigned int *length )
262 {
263     if (m_data_buffer->readFrames(m_syt_interval, (char *)(data + 8)))
264     {
265         debugOutputExtreme(DEBUG_LEVEL_VERY_VERBOSE,
266                            "XMIT DATA: TSP= %011llu (%04u)\n",
267                            m_last_timestamp,
268                            (unsigned int)TICKS_TO_CYCLES(m_last_timestamp));
269         return eCRV_OK;
270     }
271     else return eCRV_XRun;
272 }
273
274 enum StreamProcessor::eChildReturnValue
275 AmdtpTransmitStreamProcessor::generateSilentPacketHeader (
276     unsigned char *data, unsigned int *length,
277     unsigned char *tag, unsigned char *sy,
278     uint32_t pkt_ctr )
279 {
280     struct iec61883_packet *packet = ( struct iec61883_packet * ) data;
281     debugOutputExtreme(DEBUG_LEVEL_ULTRA_VERBOSE,
282                        "XMIT SILENT (cy %04d): CY=%04u, TSP=%011llu (%04u)\n",
283                        CYCLE_TIMER_GET_CYCLES(pkt_ctr), m_last_timestamp,
284                        (unsigned int)TICKS_TO_CYCLES(m_last_timestamp));
285
286     packet->sid = m_local_node_id;
287
288     packet->dbs = m_dimension;
289     packet->fn = 0;
290     packet->qpc = 0;
291     packet->sph = 0;
292     packet->reserved = 0;
293     packet->dbc = m_dbc;
294     packet->eoh1 = 2;
295     packet->fmt = IEC61883_FMT_AMDTP;
296
297     *tag = IEC61883_TAG_WITH_CIP;
298     *sy = 0;
299
300     m_dbc += fillNoDataPacketHeader(packet, length);
301     return eCRV_Packet;
302 }
303
304 enum StreamProcessor::eChildReturnValue
305 AmdtpTransmitStreamProcessor::generateSilentPacketData (
306     unsigned char *data, unsigned int *length )
307 {
308     return eCRV_OK; // no need to do anything
309 }
310
311 enum StreamProcessor::eChildReturnValue
312 AmdtpTransmitStreamProcessor::generateEmptyPacketHeader (
313     unsigned char *data, unsigned int *length,
314     unsigned char *tag, unsigned char *sy,
315     uint32_t pkt_ctr )
316 {
317     struct iec61883_packet *packet = ( struct iec61883_packet * ) data;
318     debugOutputExtreme(DEBUG_LEVEL_ULTRA_VERBOSE,
319                        "XMIT EMPTY (cy %04d): CY=%04u, TSP=%011llu (%04u)\n",
320                        CYCLE_TIMER_GET_CYCLES(pkt_ctr), m_last_timestamp,
321                        (unsigned int)TICKS_TO_CYCLES(m_last_timestamp) );
322     packet->sid = m_local_node_id;
323
324     packet->dbs = m_dimension;
325     packet->fn = 0;
326     packet->qpc = 0;
327     packet->sph = 0;
328     packet->reserved = 0;
329     packet->dbc = m_dbc;
330     packet->eoh1 = 2;
331     packet->fmt = IEC61883_FMT_AMDTP;
332
333     *tag = IEC61883_TAG_WITH_CIP;
334     *sy = 0;
335
336     m_dbc += fillNoDataPacketHeader(packet, length);
337     return eCRV_OK;
338 }
339
340 enum StreamProcessor::eChildReturnValue
341 AmdtpTransmitStreamProcessor::generateEmptyPacketData (
342     unsigned char *data, unsigned int *length )
343 {
344     return eCRV_OK; // no need to do anything
345 }
346
347 unsigned int AmdtpTransmitStreamProcessor::fillDataPacketHeader (
348     struct iec61883_packet *packet, unsigned int* length,
349     uint32_t ts )
350 {
351
352     packet->fdf = m_fdf;
353
354     // convert the timestamp to SYT format
355     uint16_t timestamp_SYT = TICKS_TO_SYT ( ts );
356     packet->syt = CondSwapToBus16 ( timestamp_SYT );
357
358     // FIXME: use a precomputed value here
359     *length = m_syt_interval*sizeof ( quadlet_t ) *m_dimension + 8;
360
361     return m_syt_interval;
362 }
363
364 unsigned int AmdtpTransmitStreamProcessor::fillNoDataPacketHeader (
365     struct iec61883_packet *packet, unsigned int* length )
366 {
367     // no-data packets have syt=0xFFFF
368     // and (can) have the usual amount of events as dummy data
369     // DBC is not increased
370     packet->fdf = IEC61883_FDF_NODATA;
371     packet->syt = 0xffff;
372
373 #if AMDTP_ALLOW_PAYLOAD_IN_NODATA_XMIT
374     if ( m_send_nodata_payload )
375     { // no-data packets with payload (NOTE: DICE-II doesn't like that)
376         *length = 2*sizeof ( quadlet_t ) + m_syt_interval * m_dimension * sizeof ( quadlet_t );
377         return m_syt_interval;
378     } else { // no-data packets without payload
379         *length = 2*sizeof ( quadlet_t );
380         return 0;
381     }
382 #else
383     // no-data packets without payload
384     *length = 2*sizeof ( quadlet_t );
385     return 0;
386 #endif
387 }
388
389 unsigned int
390 AmdtpTransmitStreamProcessor::getSytInterval() {
391     switch (m_StreamProcessorManager.getNominalRate()) {
392         case 32000:
393         case 44100:
394         case 48000:
395             return 8;
396         case 88200:
397         case 96000:
398             return 16;
399         case 176400:
400         case 192000:
401             return 32;
402         default:
403             debugError("Unsupported rate: %d\n", m_StreamProcessorManager.getNominalRate());
404             return 0;
405     }
406 }
407
408 unsigned int
409 AmdtpTransmitStreamProcessor::getAveragePacketSize()
410 {
411     // in one second we have 8000 packets
412     // containing FRAMERATE frames of m_dimension quadlets
413     // so 8000 packet headers + FRAMERATE*m_dimension quadlets
414     unsigned int one_second = 8000 * 2 * sizeof(quadlet_t) + m_StreamProcessorManager.getNominalRate() * m_dimension * sizeof(quadlet_t);
415     return one_second / 8000;
416 }
417
418 unsigned int
419 AmdtpTransmitStreamProcessor::getFDF() {
420     switch (m_StreamProcessorManager.getNominalRate()) {
421         case 32000: return IEC61883_FDF_SFC_32KHZ;
422         case 44100: return IEC61883_FDF_SFC_44K1HZ;
423         case 48000: return IEC61883_FDF_SFC_48KHZ;
424         case 88200: return IEC61883_FDF_SFC_88K2HZ;
425         case 96000: return IEC61883_FDF_SFC_96KHZ;
426         case 176400: return IEC61883_FDF_SFC_176K4HZ;
427         case 192000: return IEC61883_FDF_SFC_192KHZ;
428         default:
429             debugError("Unsupported rate: %d\n", m_StreamProcessorManager.getNominalRate());
430             return 0;
431     }
432 }
433
434 bool AmdtpTransmitStreamProcessor::prepareChild()
435 {
436     debugOutput ( DEBUG_LEVEL_VERBOSE, "Preparing (%p)...\n", this );
437     m_syt_interval = getSytInterval();
438     m_fdf = getFDF();
439
440     iec61883_cip_init (
441         &m_cip_status,
442         IEC61883_FMT_AMDTP,
443         m_fdf,
444         m_StreamProcessorManager.getNominalRate(),
445         m_dimension,
446         m_syt_interval );
447
448     if (!initPortCache()) {
449         debugError("Could not init port cache\n");
450         return false;
451     }
452
453     return true;
454 }
455
456 /*
457 * compose the event streams for the packets from the port buffers
458 */
459 bool AmdtpTransmitStreamProcessor::processWriteBlock ( char *data,
460         unsigned int nevents, unsigned int offset )
461 {
462     // update the variable parts of the cache
463     updatePortCache();
464
465     // encode audio data
466     switch(m_StreamProcessorManager.getAudioDataType()) {
467         case StreamProcessorManager::eADT_Int24:
468             encodeAudioPortsInt24((quadlet_t *)data, offset, nevents);
469             break;
470         case StreamProcessorManager::eADT_Float:
471             encodeAudioPortsFloat((quadlet_t *)data, offset, nevents);
472             break;
473     }
474
475     // do midi ports
476     encodeMidiPorts((quadlet_t *)data, offset, nevents);
477     return true;
478 }
479
480 bool
481 AmdtpTransmitStreamProcessor::transmitSilenceBlock(
482     char *data, unsigned int nevents, unsigned int offset)
483 {
484     // no need to update the port cache when transmitting silence since
485     // no dynamic values are used to do so.
486     encodeAudioPortsSilence((quadlet_t *)data, offset, nevents);
487     encodeMidiPortsSilence((quadlet_t *)data, offset, nevents);
488     return true;
489 }
490
491 /**
492  * @brief encodes all audio ports in the cache to events (silent data)
493  * @param data
494  * @param offset
495  * @param nevents
496  */
497 void
498 AmdtpTransmitStreamProcessor::encodeAudioPortsSilence(quadlet_t *data,
499                                                       unsigned int offset,
500                                                       unsigned int nevents)
501 {
502     unsigned int j;
503     quadlet_t *target_event;
504     int i;
505
506     for (i = 0; i < m_nb_audio_ports; i++) {
507         target_event = (quadlet_t *)(data + i);
508
509         for (j = 0;j < nevents; j += 1)
510         {
511             *target_event = CONDSWAPTOBUS32_CONST(0x40000000);
512             target_event += m_dimension;
513         }
514     }
515 }
516
517 #ifdef __SSE2__
518 //#if 0
519 #include <emmintrin.h>
520 #warning SSE2 build
521
522 /**
523  * @brief mux all audio ports to events
524  * @param data
525  * @param offset
526  * @param nevents
527  */
528 void
529 AmdtpTransmitStreamProcessor::encodeAudioPortsFloat(quadlet_t *data,
530                                                     unsigned int offset,
531                                                     unsigned int nevents)
532 {
533     unsigned int j;
534     quadlet_t *target_event;
535     int i;
536
537     float * client_buffers[4];
538     float tmp_values[4] __attribute__ ((aligned (16)));
539     uint32_t tmp_values_int[4] __attribute__ ((aligned (16)));
540
541     // prepare the scratch buffer
542     assert(m_scratch_buffer_size_bytes > nevents * 4);
543     memset(m_scratch_buffer, 0, nevents * 4);
544
545     const __m128i label = _mm_set_epi32 (0x40000000, 0x40000000, 0x40000000, 0x40000000);
546     const __m128 mult = _mm_set_ps(AMDTP_FLOAT_MULTIPLIER, AMDTP_FLOAT_MULTIPLIER, AMDTP_FLOAT_MULTIPLIER, AMDTP_FLOAT_MULTIPLIER);
547
548 #if AMDTP_CLIP_FLOATS
549     const __m128 v_max = _mm_set_ps(1.0, 1.0, 1.0, 1.0);
550     const __m128 v_min = _mm_set_ps(-1.0, -1.0, -1.0, -1.0);
551 #endif
552
553     // this assumes that audio ports are sorted by position,
554     // and that there are no gaps
555     for (i = 0; i < ((int)m_nb_audio_ports)-4; i += 4) {
556         struct _MBLA_port_cache *p;
557
558         // get the port buffers
559         for (j=0; j<4; j++) {
560             p = &(m_audio_ports.at(i+j));
561             if(p->buffer && p->enabled) {
562                 client_buffers[j] = (float *) p->buffer;
563                 client_buffers[j] += offset;
564             } else {
565                 // if a port is disabled or has no valid
566                 // buffer, use the scratch buffer (all zero's)
567                 client_buffers[j] = (float *) m_scratch_buffer;
568             }
569         }
570
571         // the base event for this position
572         target_event = (quadlet_t *)(data + i);
573
574         // process the events
575         for (j=0;j < nevents; j += 1)
576         {
577             // read the values
578             tmp_values[0] = *(client_buffers[0]);
579             tmp_values[1] = *(client_buffers[1]);
580             tmp_values[2] = *(client_buffers[2]);
581             tmp_values[3] = *(client_buffers[3]);
582
583             // now do the SSE based conversion/labeling
584             __m128 v_float = *((__m128*)tmp_values);
585             __m128i *target = (__m128i*)target_event;
586             __m128i v_int;
587
588             // clip
589 #if AMDTP_CLIP_FLOATS
590             // do SSE clipping
591             v_float = _mm_max_ps(v_float, v_min);
592             v_float = _mm_min_ps(v_float, v_max);
593 #endif
594
595             // multiply
596             v_float = _mm_mul_ps(v_float, mult);
597             // convert to signed integer
598             v_int = _mm_cvttps_epi32( v_float );
599             // shift right 8 bits
600             v_int = _mm_srli_epi32( v_int, 8 );
601             // label it
602             v_int = _mm_or_si128( v_int, label );
603
604             // do endian conversion (SSE is always little endian)
605             // do first swap
606             v_int = _mm_or_si128( _mm_slli_epi16( v_int, 8 ), _mm_srli_epi16( v_int, 8 ) );
607             // do second swap
608             v_int = _mm_or_si128( _mm_slli_epi32( v_int, 16 ), _mm_srli_epi32( v_int, 16 ) );
609
610             // store the packed int
611             // (target misalignment is assumed since we don't know the m_dimension)
612             _mm_storeu_si128 (target, v_int);
613
614             // increment the buffer pointers
615             client_buffers[0]++;
616             client_buffers[1]++;
617             client_buffers[2]++;
618             client_buffers[3]++;
619
620             // go to next target event position
621             target_event += m_dimension;
622         }
623     }
624
625     // do remaining ports
626     // NOTE: these can be time-SSE'd
627     for (; i < (int)m_nb_audio_ports; i++) {
628         struct _MBLA_port_cache &p = m_audio_ports.at(i);
629         target_event = (quadlet_t *)(data + i);
630         assert(nevents + offset <= p.buffer_size );
631
632         if(p.buffer && p.enabled) {
633             float *buffer = (float *)(p.buffer);
634             buffer += offset;
635    
636             for (j = 0;j < nevents; j += 4)
637             {
638                 // read the values
639                 tmp_values[0] = *buffer;
640                 buffer++;
641                 tmp_values[1] = *buffer;
642                 buffer++;
643                 tmp_values[2] = *buffer;
644                 buffer++;
645                 tmp_values[3] = *buffer;
646                 buffer++;
647
648                 // now do the SSE based conversion/labeling
649                 __m128 v_float = *((__m128*)tmp_values);
650                 __m128i v_int;
651
652 #if AMDTP_CLIP_FLOATS
653                 // do SSE clipping
654                 v_float = _mm_max_ps(v_float, v_min);
655                 v_float = _mm_min_ps(v_float, v_max);
656 #endif
657
658                 // multiply
659                 v_float = _mm_mul_ps(v_float, mult);
660                 // convert to signed integer
661                 v_int = _mm_cvttps_epi32( v_float );
662                 // shift right 8 bits
663                 v_int = _mm_srli_epi32( v_int, 8 );
664                 // label it
665                 v_int = _mm_or_si128( v_int, label );
666    
667                 // do endian conversion (SSE is always little endian)
668                 // do first swap
669                 v_int = _mm_or_si128( _mm_slli_epi16( v_int, 8 ), _mm_srli_epi16( v_int, 8 ) );
670                 // do second swap
671                 v_int = _mm_or_si128( _mm_slli_epi32( v_int, 16 ), _mm_srli_epi32( v_int, 16 ) );
672
673                 // store the packed int
674                 _mm_store_si128 ((__m128i *)(&tmp_values_int), v_int);
675
676                 // increment the buffer pointers
677                 *target_event = tmp_values_int[0];
678                 target_event += m_dimension;
679                 *target_event = tmp_values_int[1];
680                 target_event += m_dimension;
681                 *target_event = tmp_values_int[2];
682                 target_event += m_dimension;
683                 *target_event = tmp_values_int[3];
684                 target_event += m_dimension;
685             }
686
687             // do the remainder of the events
688             for(;j < nevents; j += 1) {
689                 float *in = (float *)buffer;
690 #if AMDTP_CLIP_FLOATS
691                 if(*in > 1.0) *in=1.0;
692                 if(*in < -1.0) *in=-1.0;
693 #endif
694                 float v = (*in) * AMDTP_FLOAT_MULTIPLIER;
695                 unsigned int tmp = ((int) v);
696                 tmp = ( tmp >> 8 ) | 0x40000000;
697                 *target_event = CondSwapToBus32((quadlet_t)tmp);
698                 buffer++;
699                 target_event += m_dimension;
700             }
701
702         } else {
703             for (j = 0;j < nevents; j += 1)
704             {
705                 // hardcoded byte swapped
706                 *target_event = 0x00000040;
707                 target_event += m_dimension;
708             }
709         }
710     }
711 }
712
713
714 /**
715  * @brief mux all audio ports to events
716  * @param data
717  * @param offset
718  * @param nevents
719  */
720 void
721 AmdtpTransmitStreamProcessor::encodeAudioPortsInt24(quadlet_t *data,
722                                                     unsigned int offset,
723                                                     unsigned int nevents)
724 {
725     unsigned int j;
726     quadlet_t *target_event;
727     int i;
728
729     uint32_t *client_buffers[4];
730     uint32_t tmp_values[4] __attribute__ ((aligned (16)));
731
732     // prepare the scratch buffer
733     assert(m_scratch_buffer_size_bytes > nevents * 4);
734     memset(m_scratch_buffer, 0, nevents * 4);
735
736     const __m128i label = _mm_set_epi32 (0x40000000, 0x40000000, 0x40000000, 0x40000000);
737     const __m128i mask  = _mm_set_epi32 (0x00FFFFFF, 0x00FFFFFF, 0x00FFFFFF, 0x00FFFFFF);
738
739     // this assumes that audio ports are sorted by position,
740     // and that there are no gaps
741     for (i = 0; i < ((int)m_nb_audio_ports)-4; i += 4) {
742         struct _MBLA_port_cache *p;
743
744         // get the port buffers
745         for (j=0; j<4; j++) {
746             p = &(m_audio_ports.at(i+j));
747             if(p->buffer && p->enabled) {
748                 client_buffers[j] = (uint32_t *) p->buffer;
749                 client_buffers[j] += offset;
750             } else {
751                 // if a port is disabled or has no valid
752                 // buffer, use the scratch buffer (all zero's)
753                 client_buffers[j] = (uint32_t *) m_scratch_buffer;
754             }
755         }
756
757         // the base event for this position
758         target_event = (quadlet_t *)(data + i);
759
760         // process the events
761         for (j=0;j < nevents; j += 1)
762         {
763             // read the values
764             tmp_values[0] = *(client_buffers[0]);
765             tmp_values[1] = *(client_buffers[1]);
766             tmp_values[2] = *(client_buffers[2]);
767             tmp_values[3] = *(client_buffers[3]);
768
769             // now do the SSE based conversion/labeling
770             __m128i *target = (__m128i*)target_event;
771             __m128i v_int = *((__m128i*)tmp_values);;
772
773             // mask
774             v_int = _mm_and_si128( v_int, mask );
775             // label it
776             v_int = _mm_or_si128( v_int, label );
777
778             // do endian conversion (SSE is always little endian)
779             // do first swap
780             v_int = _mm_or_si128( _mm_slli_epi16( v_int, 8 ), _mm_srli_epi16( v_int, 8 ) );
781             // do second swap
782             v_int = _mm_or_si128( _mm_slli_epi32( v_int, 16 ), _mm_srli_epi32( v_int, 16 ) );
783
784             // store the packed int
785             // (target misalignment is assumed since we don't know the m_dimension)
786             _mm_storeu_si128 (target, v_int);
787
788             // increment the buffer pointers
789             client_buffers[0]++;
790             client_buffers[1]++;
791             client_buffers[2]++;
792             client_buffers[3]++;
793
794             // go to next target event position
795             target_event += m_dimension;
796         }
797     }
798
799     // do remaining ports
800     // NOTE: these can be time-SSE'd
801     for (; i < ((int)m_nb_audio_ports); i++) {
802         struct _MBLA_port_cache &p = m_audio_ports.at(i);
803         target_event = (quadlet_t *)(data + i);
804         assert(nevents + offset <= p.buffer_size );
805
806         if(p.buffer && p.enabled) {
807             uint32_t *buffer = (uint32_t *)(p.buffer);
808             buffer += offset;
809    
810             for (j = 0;j < nevents; j += 4)
811             {
812                 // read the values
813                 tmp_values[0] = *buffer;
814                 buffer++;
815                 tmp_values[1] = *buffer;
816                 buffer++;
817                 tmp_values[2] = *buffer;
818                 buffer++;
819                 tmp_values[3] = *buffer;
820                 buffer++;
821
822                 // now do the SSE based conversion/labeling
823                 __m128i v_int = *((__m128i*)tmp_values);;
824
825                 // mask
826                 v_int = _mm_and_si128( v_int, mask );
827                 // label it
828                 v_int = _mm_or_si128( v_int, label );
829
830                 // do endian conversion (SSE is always little endian)
831                 // do first swap
832                 v_int = _mm_or_si128( _mm_slli_epi16( v_int, 8 ), _mm_srli_epi16( v_int, 8 ) );
833                 // do second swap
834                 v_int = _mm_or_si128( _mm_slli_epi32( v_int, 16 ), _mm_srli_epi32( v_int, 16 ) );
835
836                 // store the packed int
837                 _mm_store_si128 ((__m128i *)(&tmp_values), v_int);
838
839                 // increment the buffer pointers
840                 *target_event = tmp_values[0];
841                 target_event += m_dimension;
842                 *target_event = tmp_values[1];
843                 target_event += m_dimension;
844                 *target_event = tmp_values[2];
845                 target_event += m_dimension;
846                 *target_event = tmp_values[3];
847                 target_event += m_dimension;
848             }
849
850             // do the remainder of the events
851             for(;j < nevents; j += 1) {
852                 uint32_t in = (uint32_t)(*buffer);
853                 *target_event = CondSwapToBus32((quadlet_t)((in & 0x00FFFFFF) | 0x40000000));
854                 buffer++;
855                 target_event += m_dimension;
856             }
857
858         } else {
859             for (j = 0;j < nevents; j += 1)
860             {
861                 // hardcoded byte swapped
862                 *target_event = 0x00000040;
863                 target_event += m_dimension;
864             }
865         }
866     }
867 }
868
869 #else
870
871 /**
872  * @brief mux all audio ports to events
873  * @param data
874  * @param offset
875  * @param nevents
876  */
877 void
878 AmdtpTransmitStreamProcessor::encodeAudioPortsInt24(quadlet_t *data,
879                                                     unsigned int offset,
880                                                     unsigned int nevents)
881 {
882     unsigned int j;
883     quadlet_t *target_event;
884     int i;
885
886     for (i = 0; i < m_nb_audio_ports; i++) {
887         struct _MBLA_port_cache &p = m_audio_ports.at(i);
888         target_event = (quadlet_t *)(data + i);
889         assert(nevents + offset <= p.buffer_size );
890
891         if(p.buffer && p.enabled) {
892             quadlet_t *buffer = (quadlet_t *)(p.buffer);
893             buffer += offset;
894    
895             for (j = 0;j < nevents; j += 1)
896             {
897                 uint32_t in = (uint32_t)(*buffer);
898                 *target_event = CondSwapToBus32((quadlet_t)((in & 0x00FFFFFF) | 0x40000000));
899                 buffer++;
900                 target_event += m_dimension;
901             }
902         } else {
903             for (j = 0;j < nevents; j += 1)
904             {
905                 *target_event = CONDSWAPTOBUS32_CONST(0x40000000);
906                 target_event += m_dimension;
907             }
908         }
909     }
910 }
911
912 /**
913  * @brief mux all audio ports to events
914  * @param data
915  * @param offset
916  * @param nevents
917  */
918 void
919 AmdtpTransmitStreamProcessor::encodeAudioPortsFloat(quadlet_t *data,
920                                                     unsigned int offset,
921                                                     unsigned int nevents)
922 {
923     unsigned int j;
924     quadlet_t *target_event;
925     int i;
926
927     for (i = 0; i < m_nb_audio_ports; i++) {
928         struct _MBLA_port_cache &p = m_audio_ports.at(i);
929         target_event = (quadlet_t *)(data + i);
930         assert(nevents + offset <= p.buffer_size );
931
932         if(p.buffer && p.enabled) {
933             quadlet_t *buffer = (quadlet_t *)(p.buffer);
934             buffer += offset;
935    
936             for (j = 0;j < nevents; j += 1)
937             {
938                 float *in = (float *)buffer;
939 #if AMDTP_CLIP_FLOATS
940                 if(*in > 1.0) *in=1.0;
941                 if(*in < -1.0) *in=-1.0;
942 #endif
943                 float v = (*in) * AMDTP_FLOAT_MULTIPLIER;
944                 unsigned int tmp = ((int) lrintf(v));
945
946                 tmp = ( tmp >> 8 ) | 0x40000000;
947                 *target_event = CondSwapToBus32((quadlet_t)tmp);
948                 buffer++;
949                 target_event += m_dimension;
950             }
951         } else {
952             for (j = 0;j < nevents; j += 1)
953             {
954                 *target_event = CONDSWAPTOBUS32_CONST(0x40000000);
955                 target_event += m_dimension;
956             }
957         }
958     }
959 }
960 #endif
961
962 /**
963  * @brief encodes all midi ports in the cache to events (silence)
964  * @param data
965  * @param offset
966  * @param nevents
967  */
968 void
969 AmdtpTransmitStreamProcessor::encodeMidiPortsSilence(quadlet_t *data,
970                                                      unsigned int offset,
971                                                      unsigned int nevents)
972 {
973     quadlet_t *target_event;
974     int i;
975     unsigned int j;
976
977     for (i = 0; i < m_nb_midi_ports; i++) {
978         struct _MIDI_port_cache &p = m_midi_ports.at(i);
979
980         for (j = p.location;j < nevents; j += 8) {
981             target_event = (quadlet_t *) (data + ((j * m_dimension) + p.position));
982             *target_event = CondSwapToBus32(IEC61883_AM824_SET_LABEL(0, IEC61883_AM824_LABEL_MIDI_NO_DATA));
983         }
984     }
985 }
986
987 /**
988  * @brief encodes all midi ports in the cache to events
989  * @param data
990  * @param offset
991  * @param nevents
992  */
993 void
994 AmdtpTransmitStreamProcessor::encodeMidiPorts(quadlet_t *data,
995                                               unsigned int offset,
996                                               unsigned int nevents)
997 {
998     quadlet_t *target_event;
999     int i;
1000     unsigned int j;
1001
1002     for (i = 0; i < m_nb_midi_ports; i++) {
1003         struct _MIDI_port_cache &p = m_midi_ports.at(i);
1004         if (p.buffer && p.enabled) {
1005             uint32_t *buffer = (quadlet_t *)(p.buffer);
1006             buffer += offset;
1007
1008             for (j = p.location;j < nevents; j += 8) {
1009                 target_event = (quadlet_t *) (data + ((j * m_dimension) + p.position));
1010
1011                 if ( *buffer & 0xFF000000 )   // we can send a byte
1012                 {
1013                     quadlet_t tmpval;
1014                     tmpval = ((*buffer)<<16) & 0x00FF0000;
1015                     tmpval = IEC61883_AM824_SET_LABEL(tmpval, IEC61883_AM824_LABEL_MIDI_1X);
1016                     *target_event = CondSwapToBus32(tmpval);
1017
1018 //                     debugOutput ( DEBUG_LEVEL_VERBOSE, "MIDI port %s, pos=%u, loc=%u, nevents=%u, dim=%d\n",
1019 //                                p.port->getName().c_str(), p.position, p.location, nevents, m_dimension );
1020 //                     debugOutput ( DEBUG_LEVEL_VERBOSE, "base=%p, target=%p, value=%08X\n",
1021 //                                data, target_event, tmpval );
1022                 } else {
1023                     // can't send a byte, either because there is no byte,
1024                     // or because this would exceed the maximum rate
1025                     // FIXME: this can be ifdef optimized since it's a constant
1026                     *target_event = CondSwapToBus32(IEC61883_AM824_SET_LABEL(0, IEC61883_AM824_LABEL_MIDI_NO_DATA));
1027                 }
1028                 buffer+=8;
1029             }
1030         } else {
1031             for (j = p.location;j < nevents; j += 8) {
1032                 target_event = (quadlet_t *)(data + ((j * m_dimension) + p.position));
1033                 __builtin_prefetch(target_event, 1, 0); // prefetch events for write, no temporal locality
1034                 *target_event = CondSwapToBus32(IEC61883_AM824_SET_LABEL(0, IEC61883_AM824_LABEL_MIDI_NO_DATA));
1035             }
1036         }
1037     }
1038 }
1039
1040 bool
1041 AmdtpTransmitStreamProcessor::initPortCache() {
1042     // make use of the fact that audio ports are the first ports in
1043     // the cluster as per AMDTP. so we can sort the ports by position
1044     // and have very efficient lookups:
1045     // m_float_ports.at(i).buffer -> audio stream i buffer
1046     // for midi ports we simply cache all port info since they are (usually) not
1047     // that numerous
1048     m_nb_audio_ports = 0;
1049     m_audio_ports.clear();
1050    
1051     m_nb_midi_ports = 0;
1052     m_midi_ports.clear();
1053    
1054     for(PortVectorIterator it = m_Ports.begin();
1055         it != m_Ports.end();
1056         ++it )
1057     {
1058         AmdtpPortInfo *pinfo=dynamic_cast<AmdtpPortInfo *>(*it);
1059         assert(pinfo); // this should not fail!!
1060
1061         switch( pinfo->getFormat() )
1062         {
1063             case AmdtpPortInfo::E_MBLA:
1064                 m_nb_audio_ports++;
1065                 break;
1066             case AmdtpPortInfo::E_SPDIF: // still unimplemented
1067                 break;
1068             case AmdtpPortInfo::E_Midi:
1069                 m_nb_midi_ports++;
1070                 break;
1071             default: // ignore
1072                 break;
1073         }
1074     }
1075
1076     int idx;
1077     for (idx = 0; idx < m_nb_audio_ports; idx++) {
1078         for(PortVectorIterator it = m_Ports.begin();
1079             it != m_Ports.end();
1080             ++it )
1081         {
1082             AmdtpPortInfo *pinfo=dynamic_cast<AmdtpPortInfo *>(*it);
1083             debugOutput(DEBUG_LEVEL_VERY_VERBOSE,
1084                         "idx %u: looking at port %s at position %u\n",
1085                         idx, (*it)->getName().c_str(), pinfo->getPosition());
1086             if(pinfo->getPosition() == (unsigned int)idx) {
1087                 struct _MBLA_port_cache p;
1088                 p.port = dynamic_cast<AmdtpAudioPort *>(*it);
1089                 if(p.port == NULL) {
1090                     debugError("Port is not an AmdtpAudioPort!\n");
1091                     return false;
1092                 }
1093                 p.buffer = NULL; // to be filled by updatePortCache
1094                 #ifdef DEBUG
1095                 p.buffer_size = (*it)->getBufferSize();
1096                 #endif
1097
1098                 m_audio_ports.push_back(p);
1099                 debugOutput(DEBUG_LEVEL_VERBOSE,
1100                             "Cached port %s at position %u\n",
1101                             p.port->getName().c_str(), idx);
1102                 goto next_index;
1103             }
1104         }
1105         debugError("No MBLA port found for position %d\n", idx);
1106         return false;
1107 next_index:
1108         continue;
1109     }
1110
1111     for(PortVectorIterator it = m_Ports.begin();
1112         it != m_Ports.end();
1113         ++it )
1114     {
1115         AmdtpPortInfo *pinfo=dynamic_cast<AmdtpPortInfo *>(*it);
1116         debugOutput(DEBUG_LEVEL_VERY_VERBOSE,
1117                     "idx %u: looking at port %s at position %u, location %u\n",
1118                     idx, (*it)->getName().c_str(), pinfo->getPosition(), pinfo->getLocation());
1119         if ((*it)->getPortType() == Port::E_Midi) {
1120             struct _MIDI_port_cache p;
1121             p.port = dynamic_cast<AmdtpMidiPort *>(*it);
1122             if(p.port == NULL) {
1123                 debugError("Port is not an AmdtpMidiPort!\n");
1124                 return false;
1125             }
1126             p.position = pinfo->getPosition();
1127             p.location = pinfo->getLocation();
1128             p.buffer = NULL; // to be filled by updatePortCache
1129             #ifdef DEBUG
1130             p.buffer_size = (*it)->getBufferSize();
1131             #endif
1132
1133             m_midi_ports.push_back(p);
1134             debugOutput(DEBUG_LEVEL_VERBOSE,
1135                         "Cached port %s at position %u, location %u\n",
1136                         p.port->getName().c_str(), p.position, p.location);
1137         }
1138     }
1139
1140     return true;
1141 }
1142
1143 void
1144 AmdtpTransmitStreamProcessor::updatePortCache() {
1145     int idx;
1146     for (idx = 0; idx < m_nb_audio_ports; idx++) {
1147         struct _MBLA_port_cache& p = m_audio_ports.at(idx);
1148         AmdtpAudioPort *port = p.port;
1149         p.buffer = port->getBufferAddress();
1150         p.enabled = !port->isDisabled();
1151     }
1152     for (idx = 0; idx < m_nb_midi_ports; idx++) {
1153         struct _MIDI_port_cache& p = m_midi_ports.at(idx);
1154         AmdtpMidiPort *port = p.port;
1155         p.buffer = port->getBufferAddress();
1156         p.enabled = !port->isDisabled();
1157     }
1158 }
1159
1160 } // end of namespace Streaming
Note: See TracBrowser for help on using the browser.