diff --git a/bridges/source/cpp_uno/gcc3_linux_arm/armhelper.S b/bridges/source/cpp_uno/gcc3_linux_arm/armhelper.S
index d5faf15..27148dc 100644
--- a/bridges/source/cpp_uno/gcc3_linux_arm/armhelper.S
+++ b/bridges/source/cpp_uno/gcc3_linux_arm/armhelper.S
@@ -10,6 +10,17 @@
 #  define UNWIND @
 #endif
 
+@ If the VFP ABI variant (armhf in Debian/Ubuntu) is used, an additional extra 64 bytes
+@ are taken up on the stack (the equivalent of the 8 double precision VFP registers)
+
+#ifdef __ARM_PCS_VFP
+#  define PAD 80
+#  define DISCARDED 84
+#else
+#  define PAD 16
+#  define DISCARDED 20
+#endif
+
 	.file	"armhelper.s"
 	.text
 	.align	4
@@ -19,9 +30,12 @@ privateSnippetExecutor:
 	UNWIND .fnstart            @ start of unwinder entry
 
 	stmfd sp!, {r0-r3}         @ follow other parameters on stack
-	UNWIND .pad  #16           @ throw this data away on exception
 	mov   r0, ip               @ r0 points to functionoffset/vtable
 	mov   r1, sp               @ r1 points to this and params
+#ifdef __ARM_PCS_VFP
+	vpush {d0-d7}              @ floating point parameter on stack
+#endif
+	UNWIND .pad  #PAD          @ throw this data away on exception
 	                           @ (see cppuno.cxx:codeSnippet())
 	stmfd sp!, {r4,lr}         @ save return address 
 	                           @ (r4 pushed to preserve stack alignment)
@@ -30,7 +44,7 @@ privateSnippetExecutor:
 	bl    cpp_vtable_call(PLT)
 
 	add   sp, sp, #4           @ no need to restore r4 (we didn't touch it)
-	ldr   pc, [sp], #20        @ return, discarding function arguments
+	ldr   pc, [sp], #DISCARDED @ return, discarding function arguments
 
 	UNWIND .fnend              @ end of unwinder entry
 
diff --git a/bridges/source/cpp_uno/gcc3_linux_arm/cpp2uno.cxx b/bridges/source/cpp_uno/gcc3_linux_arm/cpp2uno.cxx
index d347aa0..07bdea1 100644
--- a/bridges/source/cpp_uno/gcc3_linux_arm/cpp2uno.cxx
+++ b/bridges/source/cpp_uno/gcc3_linux_arm/cpp2uno.cxx
@@ -69,6 +69,9 @@ namespace
         char * pTopStack = (char *)(pCallStack + 0);
         char * pCppStack = pTopStack;
 
+#ifdef __ARM_PCS_VFP
+        char * pFloatArgs = (char *)(pCppStack - 64);
+#endif
         // return
         typelib_TypeDescription * pReturnTypeDescr = 0;
         if (pReturnTypeRef)
@@ -125,7 +128,9 @@ namespace
                 {
                     case typelib_TypeClass_HYPER:
                     case typelib_TypeClass_UNSIGNED_HYPER:
+#ifndef __ARM_PCS_VFP
                     case typelib_TypeClass_DOUBLE:
+#endif
             if ((pCppStack - pTopStack) % 8) pCppStack+=sizeof(sal_Int32); //align to 8
                         break;
                     default:
@@ -133,13 +138,31 @@ namespace
                 }
 #endif
 
-                pCppArgs[nPos] = pCppStack;
-                pUnoArgs[nPos] = pCppStack;
+// For armhf we get the floating point arguments from a different area of the stack
+// TODO: deal with functions with more than 8 floating point args that need to overflow
+// to the stack. Find such an UNO API to try on.
+#ifdef __ARM_PCS_VFP
+                if (pParamTypeDescr->eTypeClass == typelib_TypeClass_FLOAT)
+                {
+                    pCppArgs[nPos] =  pUnoArgs[nPos] = pFloatArgs;
+                    pFloatArgs += sizeof(float);
+                } else
+                if (pParamTypeDescr->eTypeClass == typelib_TypeClass_DOUBLE)
+                {
+                    if ((pFloatArgs - pTopStack) % 8) pFloatArgs+=sizeof(float); //align to 8
+                    pCppArgs[nPos] = pUnoArgs[nPos] = pFloatArgs;
+                    pFloatArgs += sizeof(double);
+                } else
+#endif
+                    pCppArgs[nPos] = pUnoArgs[nPos] = pCppStack;
+
                 switch (pParamTypeDescr->eTypeClass)
                 {
                     case typelib_TypeClass_HYPER:
                     case typelib_TypeClass_UNSIGNED_HYPER:
+#ifndef __ARM_PCS_VFP
                     case typelib_TypeClass_DOUBLE:
+#endif
                         pCppStack += sizeof(sal_Int32); // extra long
                         break;
                     default:
@@ -179,6 +202,13 @@ namespace
                     TYPELIB_DANGER_RELEASE( pParamTypeDescr );
                 }
             }
+#ifdef __ARM_PCS_VFP
+            // use the stack for output parameters or non floating point values
+                if (rParam.bOut ||
+                        ((pParamTypeDescr->eTypeClass != typelib_TypeClass_DOUBLE)
+                         && (pParamTypeDescr->eTypeClass != typelib_TypeClass_FLOAT))
+                    )
+#endif
             pCppStack += sizeof(sal_Int32); // standard parameter length
         }
 
diff --git a/bridges/source/cpp_uno/gcc3_linux_arm/share.hxx b/bridges/source/cpp_uno/gcc3_linux_arm/share.hxx
index f7a85ba..da36c75 100644
--- a/bridges/source/cpp_uno/gcc3_linux_arm/share.hxx
+++ b/bridges/source/cpp_uno/gcc3_linux_arm/share.hxx
@@ -93,7 +93,7 @@ namespace CPPU_CURRENT_NAMESPACE
 
 namespace arm
 {
-    enum armlimits { MAX_GPR_REGS = 4 };
+    enum armlimits { MAX_GPR_REGS = 4, MAX_FPR_REGS = 8 };
     bool return_in_hidden_param( typelib_TypeDescriptionReference *pTypeRef );
 }
 
diff --git a/bridges/source/cpp_uno/gcc3_linux_arm/uno2cpp.cxx b/bridges/source/cpp_uno/gcc3_linux_arm/uno2cpp.cxx
index 9502b87..86a33d5 100644
--- a/bridges/source/cpp_uno/gcc3_linux_arm/uno2cpp.cxx
+++ b/bridges/source/cpp_uno/gcc3_linux_arm/uno2cpp.cxx
@@ -131,6 +131,20 @@ namespace arm
         return false;
     }
 
+#ifdef __ARM_PCS_VFP
+    bool is_float_only_struct(const typelib_TypeDescription * type)
+    {
+        const typelib_CompoundTypeDescription * p
+            = reinterpret_cast< const typelib_CompoundTypeDescription * >(type);
+        for (sal_Int32 i = 0; i < p->nMembers; ++i)
+        {
+            if (p->ppTypeRefs[i]->eTypeClass != typelib_TypeClass_FLOAT &&
+                p->ppTypeRefs[i]->eTypeClass != typelib_TypeClass_DOUBLE)
+                return false;
+        }
+        return true;
+    }
+#endif
     bool return_in_hidden_param( typelib_TypeDescriptionReference *pTypeRef )
     {
         if (bridges::cpp_uno::shared::isSimpleType(pTypeRef))
@@ -143,6 +157,13 @@ namespace arm
             //A Composite Type not larger than 4 bytes is returned in r0
             bool bRet = pTypeDescr->nSize > 4 || is_complex_struct(pTypeDescr);
 
+#ifdef __ARM_PCS_VFP
+            // In the VFP ABI, structs with only float/double values that fit in
+            // 16 bytes are returned in registers
+            if( pTypeDescr->nSize <= 16 && is_float_only_struct(pTypeDescr))
+                bRet = false;
+#endif
+
             TYPELIB_DANGER_RELEASE( pTypeDescr );
             return bRet;
         }
@@ -152,11 +173,6 @@ namespace arm
 
 void MapReturn(sal_uInt32 r0, sal_uInt32 r1, typelib_TypeDescriptionReference * pReturnType, sal_uInt32* pRegisterReturn)
 {
-#if !defined(__ARM_EABI__) && !defined(__SOFTFP__)
-    register float fret asm("f0");
-    register double dret asm("f0");
-#endif
-
     switch( pReturnType->eTypeClass )
     {
         case typelib_TypeClass_HYPER:
@@ -176,6 +192,7 @@ void MapReturn(sal_uInt32 r0, sal_uInt32 r1, typelib_TypeDescriptionReference *
 #if !defined(__ARM_PCS_VFP) && (defined(__ARM_EABI__) || defined(__SOFTFP__))
             pRegisterReturn[0] = r0;
 #else
+            register float fret asm("s0");
             *(float*)pRegisterReturn = fret;
 #endif
         break;
@@ -184,6 +201,7 @@ void MapReturn(sal_uInt32 r0, sal_uInt32 r1, typelib_TypeDescriptionReference *
             pRegisterReturn[1] = r1;
             pRegisterReturn[0] = r0;
 #else
+            register double dret asm("d0");
             *(double*)pRegisterReturn = dret;
 #endif
             break;
@@ -211,7 +229,9 @@ void callVirtualMethod(
     sal_uInt32 *pStack,
     sal_uInt32 nStack,
     sal_uInt32 *pGPR,
-    sal_uInt32 nGPR) __attribute__((noinline));
+    sal_uInt32 nGPR,
+    double *pFPR,
+    sal_uInt32 nFPR) __attribute__((noinline));
 
 void callVirtualMethod(
     void * pThis,
@@ -221,7 +241,9 @@ void callVirtualMethod(
     sal_uInt32 *pStack,
     sal_uInt32 nStack,
     sal_uInt32 *pGPR,
-    sal_uInt32 nGPR)
+    sal_uInt32 nGPR,
+    double *pFPR,
+    sal_uInt32 nFPR)
 {
     // never called
     if (! pThis)
@@ -238,39 +260,50 @@ void callVirtualMethod(
     // Should not happen, but...
     if ( nGPR > arm::MAX_GPR_REGS )
         nGPR = arm::MAX_GPR_REGS;
+    if ( nFPR > arm::MAX_FPR_REGS )
+        nFPR = arm::MAX_FPR_REGS;
 
     sal_uInt32 pMethod = *((sal_uInt32*)pThis);
     pMethod += 4 * nVtableIndex;
     pMethod = *((sal_uInt32 *)pMethod);
 
-    typedef void (*FunctionCall )( sal_uInt32, sal_uInt32, sal_uInt32, sal_uInt32);
-    FunctionCall pFunc = (FunctionCall)pMethod;
-
-    (*pFunc)(pGPR[0], pGPR[1], pGPR[2], pGPR[3]);
-
+    //Return registers
     sal_uInt32 r0;
     sal_uInt32 r1;
 
-    // get return value
     __asm__ __volatile__ (
-        "mov %0, r0\n\t"
-        "mov %1, r1\n\t"
-        : "=r" (r0), "=r" (r1) : );
+        //Fill in general purpose register arguments
+        "ldr r4, %[pgpr]\n\t"
+        "ldmia r4, {r0-r3}\n\t"
+
+#ifdef __ARM_PCS_VFP
+        //Fill in VFP register arguments as double precision values
+        "ldr r4, %[pfpr]\n\t"
+        "vldmia r4, {d0-d7}\n\t"
+#endif
+        //Make the call
+        "ldr r5, %[pmethod]\n\t"
+        "blx r5\n\t"
+
+        //Fill in return values
+        "mov %[r0], r0\n\t"
+        "mov %[r1], r1\n\t"
+        : [r0]"=r" (r0), [r1]"=r" (r1)
+        : [pmethod]"m" (pMethod), [pgpr]"m" (pGPR), [pfpr]"m" (pFPR)
+        : "r4", "r5");
 
     MapReturn(r0, r1, pReturnType, (sal_uInt32*)pRegisterReturn);
 }
 }
 
-#define INSERT_INT32( pSV, nr, pGPR, pDS, bOverflow ) \
+#define INSERT_INT32( pSV, nr, pGPR, pDS ) \
         if ( nr < arm::MAX_GPR_REGS ) \
                 pGPR[nr++] = *reinterpret_cast<sal_uInt32 *>( pSV ); \
         else \
-                bOverFlow = true; \
-        if (bOverFlow) \
                 *pDS++ = *reinterpret_cast<sal_uInt32 *>( pSV );
 
 #ifdef __ARM_EABI__
-#define INSERT_INT64( pSV, nr, pGPR, pDS, pStart, bOverflow ) \
+#define INSERT_INT64( pSV, nr, pGPR, pDS, pStart ) \
         if ( (nr < arm::MAX_GPR_REGS) && (nr % 2) ) \
         { \
                 ++nr; \
@@ -281,8 +314,6 @@ void callVirtualMethod(
                 pGPR[nr++] = *(reinterpret_cast<sal_uInt32 *>( pSV ) + 1); \
         } \
         else \
-                bOverFlow = true; \
-        if (bOverFlow) \
     { \
         if ( (pDS - pStart) % 2) \
                 { \
@@ -292,31 +323,65 @@ void callVirtualMethod(
                 *pDS++ = reinterpret_cast<sal_uInt32 *>( pSV )[1]; \
     }
 #else
-#define INSERT_INT64( pSV, nr, pGPR, pDS, pStart, bOverflow ) \
-        INSERT_INT32( pSV, nr, pGPR, pDS, bOverflow) \
-        INSERT_INT32( ((sal_uInt32*)pSV)+1, nr, pGPR, pDS, bOverflow)
+#define INSERT_INT64( pSV, nr, pGPR, pDS, pStart ) \
+        INSERT_INT32( pSV, nr, pGPR, pDS ) \
+        INSERT_INT32( ((sal_uInt32*)pSV)+1, nr, pGPR, pDS )
 #endif
 
-#define INSERT_FLOAT( pSV, nr, pFPR, pDS, bOverflow ) \
-        INSERT_INT32( pSV, nr, pGPR, pDS, bOverflow)
+#ifdef __ARM_PCS_VFP
+// Since single and double arguments share the same register bank the filling of the
+// registers is not always linear. Single values go to the first available single register,
+// while doubles need to have an 8 byte alignment, so only go into double registers starting
+// at every other single register. For ex a float, double, float sequence will fill registers
+// s0, d1, and s1, actually corresponding to the linear order s0,s1, d1.
+//
+// These use the single/double register array and counters and ignore the pGPR argument
+// nSR and nDR are the number of single and double precision registers that are no longer
+// available
+#define INSERT_FLOAT( pSV, nr, pGPR, pDS ) \
+        if (nSR % 2 == 0) {\
+            nSR = 2*nDR; \
+        }\
+        if ( nSR < arm::MAX_FPR_REGS*2 ) {\
+                pSPR[nSR++] = *reinterpret_cast<float *>( pSV ); \
+                if ((nSR % 2 == 1) && (nSR > 2*nDR)) {\
+                    nDR++; \
+                }\
+        }\
+        else \
+        {\
+                *pDS++ = *reinterpret_cast<float *>( pSV );\
+        }
+#define INSERT_DOUBLE( pSV, nr, pGPR, pDS, pStart ) \
+        if ( nDR < arm::MAX_FPR_REGS ) { \
+                pFPR[nDR++] = *reinterpret_cast<double *>( pSV ); \
+        }\
+        else\
+        {\
+            if ( (pDS - pStart) % 2) \
+                { \
+                    ++pDS; \
+                } \
+                *pDS++ = *reinterpret_cast<double *>( pSV );\
+        }
+#else
+#define INSERT_FLOAT( pSV, nr, pFPR, pDS ) \
+        INSERT_INT32( pSV, nr, pGPR, pDS )
 
-#define INSERT_DOUBLE( pSV, nr, pFPR, pDS, pStart, bOverflow ) \
-        INSERT_INT64( pSV, nr, pGPR, pDS, pStart, bOverflow )
+#define INSERT_DOUBLE( pSV, nr, pFPR, pDS, pStart ) \
+        INSERT_INT64( pSV, nr, pGPR, pDS, pStart )
+#endif
 
-#define INSERT_INT16( pSV, nr, pGPR, pDS, bOverflow ) \
+#define INSERT_INT16( pSV, nr, pGPR, pDS ) \
         if ( nr < arm::MAX_GPR_REGS ) \
                 pGPR[nr++] = *reinterpret_cast<sal_uInt16 *>( pSV ); \
         else \
-                bOverFlow = true; \
-        if (bOverFlow) \
                 *pDS++ = *reinterpret_cast<sal_uInt16 *>( pSV );
 
-#define INSERT_INT8( pSV, nr, pGPR, pDS, bOverflow ) \
+#define INSERT_INT8( pSV, nr, pGPR, pDS ) \
         if ( nr < arm::MAX_GPR_REGS ) \
                 pGPR[nr++] = *reinterpret_cast<sal_uInt8 *>( pSV ); \
         else \
-                bOverFlow = true; \
-        if (bOverFlow) \
                 *pDS++ = *reinterpret_cast<sal_uInt8 *>( pSV );
 
 namespace {
@@ -336,6 +401,11 @@ static void cpp_call(
     sal_uInt32 pGPR[arm::MAX_GPR_REGS];
     sal_uInt32 nGPR = 0;
 
+    double pFPR[arm::MAX_FPR_REGS];
+    float *pSPR = reinterpret_cast< float *>(&pFPR);
+    sal_uInt32 nSR = 0;
+    sal_uInt32 nDR = 0;
+
     // return
     typelib_TypeDescription * pReturnTypeDescr = 0;
     TYPELIB_DANGER_GET( &pReturnTypeDescr, pReturnTypeRef );
@@ -343,7 +413,6 @@ static void cpp_call(
 
     void * pCppReturn = 0; // if != 0 && != pUnoReturn, needs reconversion
 
-    bool bOverFlow = false;
     bool bSimpleReturn = true;
     if (pReturnTypeDescr)
     {
@@ -359,13 +428,13 @@ static void cpp_call(
                     ? __builtin_alloca( pReturnTypeDescr->nSize )
                     : pUnoReturn); // direct way
 
-            INSERT_INT32( &pCppReturn, nGPR, pGPR, pStack, bOverFlow );
+            INSERT_INT32( &pCppReturn, nGPR, pGPR, pStack );
         }
     }
     // push this
     void * pAdjustedThisPtr = reinterpret_cast< void ** >(pThis->getCppI())
         + aVtableSlot.offset;
-    INSERT_INT32( &pAdjustedThisPtr, nGPR, pGPR, pStack, bOverFlow );
+    INSERT_INT32( &pAdjustedThisPtr, nGPR, pGPR, pStack );
 
     // stack space
     OSL_ENSURE( sizeof(void *) == sizeof(sal_Int32), "### unexpected size!" );
@@ -397,7 +466,7 @@ static void cpp_call(
 #if OSL_DEBUG_LEVEL > 2
                 fprintf(stderr, "hyper is %lx\n", pCppArgs[nPos]);
 #endif
-                INSERT_INT64( pCppArgs[nPos], nGPR, pGPR, pStack, pStackStart, bOverFlow );
+                INSERT_INT64( pCppArgs[nPos], nGPR, pGPR, pStack, pStackStart );
                 break;
             case typelib_TypeClass_LONG:
             case typelib_TypeClass_UNSIGNED_LONG:
@@ -405,22 +474,22 @@ static void cpp_call(
 #if OSL_DEBUG_LEVEL > 2
                 fprintf(stderr, "long is %x\n", pCppArgs[nPos]);
 #endif
-                INSERT_INT32( pCppArgs[nPos], nGPR, pGPR, pStack, bOverFlow );
+                INSERT_INT32( pCppArgs[nPos], nGPR, pGPR, pStack );
                 break;
             case typelib_TypeClass_SHORT:
             case typelib_TypeClass_CHAR:
             case typelib_TypeClass_UNSIGNED_SHORT:
-                INSERT_INT16( pCppArgs[nPos], nGPR, pGPR, pStack, bOverFlow );
+                INSERT_INT16( pCppArgs[nPos], nGPR, pGPR, pStack );
                 break;
             case typelib_TypeClass_BOOLEAN:
             case typelib_TypeClass_BYTE:
-                INSERT_INT8( pCppArgs[nPos], nGPR, pGPR, pStack, bOverFlow );
+                INSERT_INT8( pCppArgs[nPos], nGPR, pGPR, pStack );
                 break;
             case typelib_TypeClass_FLOAT:
-                INSERT_FLOAT( pCppArgs[nPos], nGPR, pGPR, pStack, bOverFlow );
+                INSERT_FLOAT( pCppArgs[nPos], nGPR, pGPR, pStack );
                 break;
             case typelib_TypeClass_DOUBLE:
-                INSERT_DOUBLE( pCppArgs[nPos], nGPR, pGPR, pStack, pStackStart, bOverFlow );
+                INSERT_DOUBLE( pCppArgs[nPos], nGPR, pGPR, pStack, pStackStart );
                 break;
             default:
                 break;
@@ -457,7 +526,7 @@ static void cpp_call(
                 // no longer needed
                 TYPELIB_DANGER_RELEASE( pParamTypeDescr );
             }
-            INSERT_INT32( &(pCppArgs[nPos]), nGPR, pGPR, pStack, bOverFlow );
+            INSERT_INT32( &(pCppArgs[nPos]), nGPR, pGPR, pStack );
         }
     }
 
@@ -468,7 +537,8 @@ static void cpp_call(
             pCppReturn, pReturnTypeRef,
             pStackStart,
             (pStack - pStackStart),
-            pGPR, nGPR);
+            pGPR, nGPR,
+            pFPR, nDR);
 
         // NO exception occurred...
         *ppUnoExc = 0;