diff --git a/libtcc.c b/libtcc.c
index 76ee11ca..58dddb9e 100644
--- a/libtcc.c
+++ b/libtcc.c
@@ -1303,6 +1303,16 @@ static int tcc_set_flag(TCCState *s, const char *flag_name, int value)
                     flag_name, value);
 }
 
+static const FlagDef m_defs[] = {
+#ifdef TCC_TARGET_X86_64
+    { offsetof(TCCState, nosse), FD_INVERT, "sse" },
+#endif
+    { 0, 0, " no flag" },
+};
+static int tcc_set_m_flag(TCCState *s, const char *flag_name, int value)
+{
+    return set_flag(s, m_defs, countof(m_defs), flag_name, value);
+}
 
 static int strstart(const char *val, const char **str)
 {
@@ -1778,9 +1788,10 @@ reparse:
             s->soname = tcc_strdup(optarg);
             break;
         case TCC_OPTION_m:
-	    if (strcmp(optarg, "32") && strcmp(optarg, "64"))
+	    if (!strcmp(optarg, "32") || !strcmp(optarg, "64"))
+	        s->option_m = tcc_strdup(optarg);
+	    else if (tcc_set_m_flag(s, optarg, 1) < 0)
 	        goto unsupported_option;
-            s->option_m = tcc_strdup(optarg);
             break;
         case TCC_OPTION_o:
             if (s->outfile) {
diff --git a/tcc.h b/tcc.h
index a2e3d0d3..2434b944 100644
--- a/tcc.h
+++ b/tcc.h
@@ -643,6 +643,9 @@ struct TCCState {
 #ifdef TCC_TARGET_I386
     int seg_size; /* 32. Can be 16 with i386 assembler (.code16) */
 #endif
+#ifdef TCC_TARGET_X86_64
+    int nosse; /* For -mno-sse support. */
+#endif
 
     /* array of all loaded dlls (including those referenced by loaded dlls) */
     DLLReference **loaded_dlls;
diff --git a/x86_64-gen.c b/x86_64-gen.c
index f5a2cddd..35ec1ab7 100644
--- a/x86_64-gen.c
+++ b/x86_64-gen.c
@@ -851,6 +851,8 @@ void gfunc_call(int nb_args)
             struct_size += size;
         } else {
             if (is_sse_float(vtop->type.t)) {
+		if (tcc_state->nosse)
+		  tcc_error("SSE disabled");
                 gv(RC_XMM0); /* only use one float register */
                 if (arg >= REGN) {
                     /* movq %xmm0, j*8(%rsp) */
@@ -961,6 +963,8 @@ void gfunc_prolog(CType *func_type)
             if (reg_param_index < REGN) {
                 /* save arguments passed by register */
                 if ((bt == VT_FLOAT) || (bt == VT_DOUBLE)) {
+		    if (tcc_state->nosse)
+		      tcc_error("SSE disabled");
                     o(0xd60f66); /* movq */
                     gen_modrm(reg_param_index, VT_LOCAL, NULL, addr);
                 } else {
@@ -1208,6 +1212,9 @@ void gfunc_call(int nb_args)
             nb_reg_args += reg_count;
     }
 
+    if (nb_sse_args && tcc_state->nosse)
+      tcc_error("SSE disabled but floating point arguments passed");
+
     /* arguments are collected in runs. Each run is a collection of 8-byte aligned arguments
        and ended by a 16-byte aligned argument. This is because, from the point of view of
        the callee, argument alignment is computed from the bottom up. */
@@ -1541,8 +1548,10 @@ void gfunc_prolog(CType *func_type)
         /* save all register passing arguments */
         for (i = 0; i < 8; i++) {
             loc -= 16;
-            o(0xd60f66); /* movq */
-            gen_modrm(7 - i, VT_LOCAL, NULL, loc);
+	    if (!tcc_state->nosse) {
+		o(0xd60f66); /* movq */
+		gen_modrm(7 - i, VT_LOCAL, NULL, loc);
+	    }
             /* movq $0, loc+8(%rbp) */
             o(0x85c748);
             gen_le32(loc + 8);
@@ -1572,6 +1581,8 @@ void gfunc_prolog(CType *func_type)
         mode = classify_x86_64_arg(type, NULL, &size, &align, &reg_count);
         switch (mode) {
         case x86_64_mode_sse:
+	    if (tcc_state->nosse)
+	        tcc_error("SSE disabled but floating point arguments used");
             if (sse_param_index + reg_count <= 8) {
                 /* save arguments passed by register */
                 loc -= reg_count * 8;