diff --git a/trunk/fcl/CMakeLists.txt b/trunk/fcl/CMakeLists.txt
index ed92ad62440ce26cf541a1261f931b9b7f1bdf69..2a862dbe6bfdeb813ba3eb98908e44a917e78f4f 100644
--- a/trunk/fcl/CMakeLists.txt
+++ b/trunk/fcl/CMakeLists.txt
@@ -14,17 +14,15 @@ set(CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/CMakeModules")
 
 include(FCLVersion)
 
-# Check for SSE flags
+# Whether to enable SSE
+option(FCL_USE_SSE "Whether FCL should SSE instructions" ON)
 set(FCL_HAVE_SSE 0)
-include(FCLCheckSSE)
-fcl_check_for_sse()
-if (SSE_FLAGS)
-  message(STATUS "FCL uses SSE")
-  add_definitions(${SSE_FLAGS})
-  set(FCL_HAVE_SSE 0)
-  # waiting for Jia to fix this; not using sse anyway
-else()
-  message(STATUS "FCL does not use SSE")
+if(FCL_USE_SSE)
+  if(CMAKE_COMPILER_IS_GNUCXX OR CMAKE_CXX_COMPILER_ID STREQUAL "Clang")
+    set(FCL_HAVE_SSE 1)
+    add_definitions(-march=native)
+  endif()
+  # TODO: do something similar for other compilers
 endif()
 
 # Find Octomap (optional)
diff --git a/trunk/fcl/include/fcl/broadphase/broadphase_SaP.h b/trunk/fcl/include/fcl/broadphase/broadphase_SaP.h
index 165fbd57ce956519de841b18209e30cb600ab23e..8fe62b35d0230d38dfa5ea62936f6782993bdd37 100644
--- a/trunk/fcl/include/fcl/broadphase/broadphase_SaP.h
+++ b/trunk/fcl/include/fcl/broadphase/broadphase_SaP.h
@@ -163,13 +163,13 @@ protected:
       else return aabb->cached.min_;
     }
 
-    inline FCL_REAL getVal(size_t i) const
+    inline Vec3f::U getVal(size_t i) const
     {
       if(minmax) return aabb->cached.max_[i];
       else return aabb->cached.min_[i];
     }
 
-    inline FCL_REAL& getVal(size_t i)
+    inline Vec3f::U& getVal(size_t i)
     {
       if(minmax) return aabb->cached.max_[i];
       else return aabb->cached.min_[i];
diff --git a/trunk/fcl/include/fcl/math/matrix_3f.h b/trunk/fcl/include/fcl/math/matrix_3f.h
index 36fc4352e77682c4ee4e775f1714a20a09ed1cae..f711924b74effb6aafcab806ac6eb60deb13193f 100644
--- a/trunk/fcl/include/fcl/math/matrix_3f.h
+++ b/trunk/fcl/include/fcl/math/matrix_3f.h
@@ -407,8 +407,11 @@ typename T::meta_type quadraticForm(const Matrix3fX<T>& R, const Vec3fX<typename
 }
 
 
-typedef Matrix3fX<details::Matrix3Data<FCL_REAL> > Matrix3f;
-//typedef Matrix3fX<details::sse_meta_f12> Matrix3f;
+#if FCL_HAVE_SSE
+  typedef Matrix3fX<details::sse_meta_f12> Matrix3f;
+#else
+  typedef Matrix3fX<details::Vec3Data<FCL_REAL> > Matrix3f;
+#endif
 
 static inline std::ostream& operator << (std::ostream& o, const Matrix3f& m)
 {
@@ -428,7 +431,7 @@ public:
   Matrix3f Sigma;
 
   /// @brief Variations along the eign axes
-  FCL_REAL sigma[3];
+  Matrix3f::U sigma[3];
 
   /// @brief Eigen axes of the variation matrix
   Vec3f axis[3];
diff --git a/trunk/fcl/include/fcl/traversal/traversal_node_shapes.h b/trunk/fcl/include/fcl/traversal/traversal_node_shapes.h
index f015079da9ed8ac1c4868bf00320691d6d14c924..3bb3cc3d54185a33c9aff8c8b457cb198e45f95f 100644
--- a/trunk/fcl/include/fcl/traversal/traversal_node_shapes.h
+++ b/trunk/fcl/include/fcl/traversal/traversal_node_shapes.h
@@ -148,7 +148,7 @@ public:
   void leafTesting(int, int) const
   {
     FCL_REAL distance;
-    !nsolver->shapeDistance(*model1, tf1, *model2, tf2, &distance);
+    nsolver->shapeDistance(*model1, tf1, *model2, tf2, &distance);
     result->update(distance, model1, model2, DistanceResult::NONE, DistanceResult::NONE);
   }
 
diff --git a/trunk/fcl/src/BV/OBB.cpp b/trunk/fcl/src/BV/OBB.cpp
index d83f8533b94a0b7e3c6dd3dd16cf0a2b188cff88..6b1246a030a7141218fa2da4d3b6eb203142ab9e 100644
--- a/trunk/fcl/src/BV/OBB.cpp
+++ b/trunk/fcl/src/BV/OBB.cpp
@@ -74,7 +74,7 @@ inline OBB merge_largedist(const OBB& b1, const OBB& b2)
   computeVertices(b2, vertex + 8);
   Matrix3f M;
   Vec3f E[3];
-  FCL_REAL s[3] = {0, 0, 0};
+  Matrix3f::U s[3] = {0, 0, 0};
 
   // obb axes
   Vec3f& R0 = b.axis[0];
diff --git a/trunk/fcl/src/BV/RSS.cpp b/trunk/fcl/src/BV/RSS.cpp
index 363e752d590d8dca0177ea5a2d2b799facb056fa..4c29a127e2b57d5ff403c3fd5ebb6fafec5430eb 100644
--- a/trunk/fcl/src/BV/RSS.cpp
+++ b/trunk/fcl/src/BV/RSS.cpp
@@ -1076,7 +1076,7 @@ RSS RSS::operator + (const RSS& other) const
 
   Matrix3f M; // row first matrix
   Vec3f E[3]; // row first eigen-vectors
-  FCL_REAL s[3] = {0, 0, 0};
+  Matrix3f::U s[3] = {0, 0, 0};
 
   getCovariance(v, NULL, NULL, NULL, 16, M);
   eigen(M, s, E);
diff --git a/trunk/fcl/src/BVH/BV_fitter.cpp b/trunk/fcl/src/BVH/BV_fitter.cpp
index 7455f53ba925cfcc32f35cdddac22708966d6145..e01e3643241e89494d2492fe6fa87727f2f7443c 100644
--- a/trunk/fcl/src/BVH/BV_fitter.cpp
+++ b/trunk/fcl/src/BVH/BV_fitter.cpp
@@ -48,7 +48,7 @@ static const double invCosA = 2.0 / sqrt(3.0);
 static const double sinA = 0.5;
 static const double cosA = sqrt(3.0) / 2.0;
 
-static inline void axisFromEigen(Vec3f eigenV[3], FCL_REAL eigenS[3], Vec3f axis[3])
+static inline void axisFromEigen(Vec3f eigenV[3], Matrix3f::U eigenS[3], Vec3f axis[3])
 {
   int min, mid, max;
   if(eigenS[0] > eigenS[1]) { max = 0; min = 1; }
@@ -137,7 +137,7 @@ void fitn(Vec3f* ps, int n, OBB& bv)
 {
   Matrix3f M;
   Vec3f E[3];
-  FCL_REAL s[3] = {0, 0, 0}; // three eigen values
+  Matrix3f::U s[3] = {0, 0, 0}; // three eigen values
 
   getCovariance(ps, NULL, NULL, NULL, n, M);
   eigen(M, s, E);
@@ -223,7 +223,7 @@ void fitn(Vec3f* ps, int n, RSS& bv)
 {
   Matrix3f M; // row first matrix
   Vec3f E[3]; // row first eigen-vectors
-  FCL_REAL s[3] = {0, 0, 0};
+  Matrix3f::U s[3] = {0, 0, 0};
 
   getCovariance(ps, NULL, NULL, NULL, n, M);
   eigen(M, s, E);
@@ -340,7 +340,7 @@ void fitn(Vec3f* ps, int n, kIOS& bv)
 {
   Matrix3f M;
   Vec3f E[3];
-  FCL_REAL s[3] = {0, 0, 0}; // three eigen values;
+  Matrix3f::U s[3] = {0, 0, 0}; // three eigen values;
 
   getCovariance(ps, NULL, NULL, NULL, n, M);
   eigen(M, s, E);
@@ -522,7 +522,7 @@ OBB BVFitter<OBB>::fit(unsigned int* primitive_indices, int num_primitives)
 
   Matrix3f M; // row first matrix
   Vec3f E[3]; // row first eigen-vectors
-  FCL_REAL s[3]; // three eigen values
+  Matrix3f::U s[3]; // three eigen values
 
   getCovariance(vertices, prev_vertices, tri_indices, primitive_indices, num_primitives, M);
   eigen(M, s, E);
@@ -540,7 +540,7 @@ OBBRSS BVFitter<OBBRSS>::fit(unsigned int* primitive_indices, int num_primitives
   OBBRSS bv;
   Matrix3f M;
   Vec3f E[3];
-  FCL_REAL s[3];
+  Matrix3f::U s[3];
 
   getCovariance(vertices, prev_vertices, tri_indices, primitive_indices, num_primitives, M);
   eigen(M, s, E);
@@ -571,7 +571,7 @@ RSS BVFitter<RSS>::fit(unsigned int* primitive_indices, int num_primitives)
 
   Matrix3f M; // row first matrix
   Vec3f E[3]; // row first eigen-vectors
-  FCL_REAL s[3]; // three eigen values
+  Matrix3f::U s[3]; // three eigen values
   getCovariance(vertices, prev_vertices, tri_indices, primitive_indices, num_primitives, M);
   eigen(M, s, E);
   axisFromEigen(E, s, bv.axis);
@@ -599,7 +599,7 @@ kIOS BVFitter<kIOS>::fit(unsigned int* primitive_indices, int num_primitives)
 
   Matrix3f M; // row first matrix
   Vec3f E[3]; // row first eigen-vectors
-  FCL_REAL s[3];
+  Matrix3f::U s[3];
   
   getCovariance(vertices, prev_vertices, tri_indices, primitive_indices, num_primitives, M);
   eigen(M, s, E);
diff --git a/trunk/fcl/src/broadphase/broadphase_SaP.cpp b/trunk/fcl/src/broadphase/broadphase_SaP.cpp
index 432a94b333827f11c72deccb0f4ff2d2d0b23238..c22af618ade49cfec6d890cbb9eddb543d9f4423 100644
--- a/trunk/fcl/src/broadphase/broadphase_SaP.cpp
+++ b/trunk/fcl/src/broadphase/broadphase_SaP.cpp
@@ -117,9 +117,9 @@ void SaPCollisionManager::registerObjects(const std::vector<CollisionObject*>& o
     for(size_t coord = 0; coord < 3; ++coord)
     { 
       std::sort(endpoints.begin(), endpoints.end(), 
-                boost::bind(std::less<FCL_REAL>(),
-                            boost::bind(static_cast<FCL_REAL (EndPoint::*)(size_t) const >(&EndPoint::getVal), _1, coord),
-                            boost::bind(static_cast<FCL_REAL (EndPoint::*)(size_t) const >(&EndPoint::getVal), _2, coord)));
+                boost::bind(std::less<Vec3f::U>(),
+                            boost::bind(static_cast<Vec3f::U (EndPoint::*)(size_t) const >(&EndPoint::getVal), _1, coord),
+                            boost::bind(static_cast<Vec3f::U (EndPoint::*)(size_t) const >(&EndPoint::getVal), _2, coord)));
 
       endpoints[0]->prev[coord] = NULL;
       endpoints[0]->next[coord] = endpoints[1];
@@ -508,9 +508,9 @@ bool SaPCollisionManager::collide_(CollisionObject* obj, void* cdata, CollisionC
   
   // compute stop_pos by binary search, this is cheaper than check it in while iteration linearly
   std::vector<EndPoint*>::const_iterator res_it = std::upper_bound(velist[axis].begin(), velist[axis].end(), &dummy,
-                                                                   boost::bind(std::less<FCL_REAL>(),
-                                                                               boost::bind(static_cast<FCL_REAL (EndPoint::*)(size_t) const>(&EndPoint::getVal), _1, axis),
-                                                                               boost::bind(static_cast<FCL_REAL (EndPoint::*)(size_t) const>(&EndPoint::getVal), _2, axis)));
+                                                                   boost::bind(std::less<Vec3f::U>(),
+                                                                               boost::bind(static_cast<Vec3f::U (EndPoint::*)(size_t) const>(&EndPoint::getVal), _1, axis),
+                                                                               boost::bind(static_cast<Vec3f::U (EndPoint::*)(size_t) const>(&EndPoint::getVal), _2, axis)));
   
   EndPoint* end_pos = NULL;
   if(res_it != velist[axis].end())
@@ -574,9 +574,9 @@ bool SaPCollisionManager::distance_(CollisionObject* obj, void* cdata, DistanceC
     
  
     std::vector<EndPoint*>::const_iterator res_it = std::upper_bound(velist[axis].begin(), velist[axis].end(), &dummy,
-                                                                     boost::bind(std::less<FCL_REAL>(),
-                                                                                 boost::bind(static_cast<FCL_REAL (EndPoint::*)(size_t) const>(&EndPoint::getVal), _1, axis),
-                                                                                 boost::bind(static_cast<FCL_REAL (EndPoint::*)(size_t) const>(&EndPoint::getVal), _2, axis)));
+                                                                     boost::bind(std::less<Vec3f::U>(),
+                                                                                 boost::bind(static_cast<Vec3f::U (EndPoint::*)(size_t) const>(&EndPoint::getVal), _1, axis),
+                                                                                 boost::bind(static_cast<Vec3f::U (EndPoint::*)(size_t) const>(&EndPoint::getVal), _2, axis)));
 
     EndPoint* end_pos = NULL;
     if(res_it != velist[axis].end())
diff --git a/trunk/fcl/src/ccd/taylor_model.cpp b/trunk/fcl/src/ccd/taylor_model.cpp
index 769985ab024aa70c56d20b079b7472e9e6923239..0851ed88950e5b0a368d6569931e88d1719bef92 100644
--- a/trunk/fcl/src/ccd/taylor_model.cpp
+++ b/trunk/fcl/src/ccd/taylor_model.cpp
@@ -98,6 +98,7 @@ TaylorModel TaylorModel::operator + (FCL_REAL d) const
 TaylorModel& TaylorModel::operator += (FCL_REAL d)
 {
   coeffs_[0] += d;
+  return *this;
 }
 
 TaylorModel TaylorModel::operator + (const TaylorModel& other) const
diff --git a/trunk/fcl/src/narrowphase/gjk.cpp b/trunk/fcl/src/narrowphase/gjk.cpp
index 2813b2f14e170c4c0b81af99ff6d828402f7e4da..8419d0103951c7b485aca20a478ff25a01e99dff 100644
--- a/trunk/fcl/src/narrowphase/gjk.cpp
+++ b/trunk/fcl/src/narrowphase/gjk.cpp
@@ -158,6 +158,8 @@ Vec3f getSupport(const ShapeBase* shape, const Vec3f& dir)
       return Vec3f(0, 0, 0);
     }
     break;
+  default:
+    ; // nothing
   }
 
   return Vec3f(0, 0, 0);