Changeset 847

Show
Ignore:
Timestamp:
01/14/08 10:59:06 (13 years ago)
Author:
ppalmers
Message:

use SSE2 for byteswaps (20% faster than ntohl())

Files:

Legend:

Unmodified
Added
Removed
Modified
Copied
Moved
  • trunk/libffado/config.h.in

    r833 r847  
    3131//#define USE_SSE $USE_SSE 
    3232#define USE_SSE 0 
     33//#define USE_SSE2 $USE_SSE2 
     34#define USE_SSE2 0 
    3335 
    3436#define CACHEDIR "~/.ffado" 
  • trunk/libffado/SConstruct

    r836 r847  
    290290     
    291291    build_host_supports_sse = 0 
     292    build_host_supports_sse2 = 0 
    292293 
    293294    if config[config_kernel] == 'linux' : 
     
    302303            if "sse" in x86_flags: 
    303304                build_host_supports_sse = 1 
     305            if "sse2" in x86_flags: 
     306                build_host_supports_sse2 = 1 
    304307            if "3dnow" in x86_flags: 
    305308                opt_flags.append ("-m3dnow") 
     
    315318        env['USE_SSE'] = 1 
    316319 
     320    if ((env['DIST_TARGET'] == 'i686') or (env['DIST_TARGET'] == 'x86_64')) \ 
     321       and build_host_supports_sse2 and env['ENABLE_OPTIMIZATIONS']: 
     322        opt_flags.extend (["-msse2"]) 
     323        env['USE_SSE2'] = 1 
     324 
    317325# end of processor-specific section 
    318326if env['ENABLE_OPTIMIZATIONS']: 
  • trunk/libffado/src/libstreaming/amdtp/AmdtpTransmitStreamProcessor.cpp

    r833 r847  
    3636#include <netinet/in.h> 
    3737#include <assert.h> 
     38 
     39#include "libutil/ByteSwap.h" 
    3840 
    3941#define AMDTP_FLOAT_MULTIPLIER 2147483392.0 
     
    417419    } 
    418420    encodeMidiPorts((quadlet_t *)data, offset, nevents); 
     421    byteSwapToBus(((quadlet_t *)data), nevents * m_dimension); 
    419422    return true; 
    420423} 
     
    429432    encodeAudioPortsSilence((quadlet_t *)data, offset, nevents); 
    430433    encodeMidiPortsSilence((quadlet_t *)data, offset, nevents); 
     434    byteSwapToBus(((quadlet_t *)data), nevents * m_dimension); 
    431435    return true; 
    432436} 
     
    453457        for (j = 0;j < nevents; j += 1) 
    454458        { 
    455             *target_event = htonl( 0x40000000 )
     459            *target_event = 0x40000000
    456460            target_event += m_dimension; 
    457461            __builtin_prefetch(target_event, 1, 0); // prefetch events for write, no temporal locality 
     
    491495                float v = (*buffer) * AMDTP_FLOAT_MULTIPLIER; 
    492496                unsigned int tmp = ((int) v); 
    493                 *target_event = htonl ( ( tmp >> 8 ) | 0x40000000 )
     497                *target_event = ( tmp >> 8 ) | 0x40000000
    494498                buffer++; 
    495499                __builtin_prefetch(buffer, 0, 0); // prefetch events for read, no temporal locality 
     
    500504            for (j = 0;j < nevents; j += 1) 
    501505            { 
    502                 *target_event = htonl( 0x40000000 )
     506                *target_event = 0x40000000
    503507                target_event += m_dimension; 
    504508                __builtin_prefetch(target_event, 1, 0); // prefetch events for write, no temporal locality 
     
    536540            for (j = 0; j < nevents; j += 1) 
    537541            { 
    538                 *target_event = htonl(((*buffer) & 0x00FFFFFF) | 0x40000000)
     542                *target_event = ((*buffer) & 0x00FFFFFF) | 0x40000000
    539543                buffer++; 
    540544                __builtin_prefetch(buffer, 0, 0); // prefetch events for read, no temporal locality 
     
    546550            for (j = 0;j < nevents; j += 1) 
    547551            { 
    548                 *target_event = htonl( 0x40000000 )
     552                *target_event = 0x40000000
    549553                target_event += m_dimension; 
    550554                __builtin_prefetch(target_event, 1, 0); // prefetch events for write, no temporal locality 
     
    574578            target_event = (quadlet_t *) (data + ((j * m_dimension) + p.position)); 
    575579            __builtin_prefetch(target_event, 1, 0); // prefetch events for write, no temporal locality 
    576             *target_event = htonl(IEC61883_AM824_SET_LABEL(0, IEC61883_AM824_LABEL_MIDI_NO_DATA)); 
     580            *target_event = IEC61883_AM824_SET_LABEL(0, IEC61883_AM824_LABEL_MIDI_NO_DATA); 
    577581        } 
    578582    } 
     
    609613                    tmpval = ((*buffer)<<16) & 0x00FF0000; 
    610614                    tmpval = IEC61883_AM824_SET_LABEL(tmpval, IEC61883_AM824_LABEL_MIDI_1X); 
    611                     *target_event = htonl(tmpval)
     615                    *target_event = tmpval
    612616 
    613617//                     debugOutput ( DEBUG_LEVEL_VERBOSE, "MIDI port %s, pos=%u, loc=%u, nevents=%u, dim=%d\n", 
     
    619623                    // or because this would exceed the maximum rate 
    620624                    // FIXME: this can be ifdef optimized since it's a constant 
    621                     *target_event = htonl(IEC61883_AM824_SET_LABEL(0, IEC61883_AM824_LABEL_MIDI_NO_DATA)); 
     625                    *target_event = IEC61883_AM824_SET_LABEL(0, IEC61883_AM824_LABEL_MIDI_NO_DATA); 
    622626                } 
    623627                buffer+=8; 
     
    627631                target_event = (quadlet_t *) (data + ((j * m_dimension) + p.position)); 
    628632                __builtin_prefetch(target_event, 1, 0); // prefetch events for write, no temporal locality 
    629                 *target_event = htonl(IEC61883_AM824_SET_LABEL(0, IEC61883_AM824_LABEL_MIDI_NO_DATA)); 
     633                *target_event = IEC61883_AM824_SET_LABEL(0, IEC61883_AM824_LABEL_MIDI_NO_DATA); 
    630634            } 
    631635        } 
  • trunk/libffado/tests/SConscript

    r843 r847  
    5151        "test-ieee1394service" : "test-ieee1394service.cpp", 
    5252        "test-streamdump" : "test-streamdump.cpp", 
     53        "test-bufferops" : "test-bufferops.cpp", 
    5354} 
    5455