From 7ad400d5a72d87233cce6d327218fedad44fa6e2 Mon Sep 17 00:00:00 2001
From: Lioncash <mathew1800@gmail.com>
Date: Mon, 29 Dec 2014 00:49:10 -0500
Subject: [PATCH 1/2] armemu: Implement QADD8/QSUB8

---
 src/core/arm/interpreter/armemu.cpp  | 68 ++++++++++++++--------------
 src/core/arm/interpreter/armsupp.cpp | 60 ++++++++++++++++++++++++
 src/core/arm/skyeye_common/armdefs.h |  5 ++
 3 files changed, 100 insertions(+), 33 deletions(-)

diff --git a/src/core/arm/interpreter/armemu.cpp b/src/core/arm/interpreter/armemu.cpp
index f0d349de7..b9c2aa6c2 100644
--- a/src/core/arm/interpreter/armemu.cpp
+++ b/src/core/arm/interpreter/armemu.cpp
@@ -5948,56 +5948,58 @@ L_stm_s_takeabort:
                 printf("Unhandled v6 insn: %08x", instr);
             }
             break;
-        case 0x62: // QADD16, QASX, QSAX, and QSUB16
-            if ((instr & 0xFF0) == 0xf10 || (instr & 0xFF0) == 0xf30 ||
-                (instr & 0xFF0) == 0xf50 || (instr & 0xFF0) == 0xf70)
+        case 0x62: // QADD16, QASX, QSAX, QSUB16, QADD8, and QSUB8
             {
+                const u8 op2 = BITS(5, 7);
+
                 const u8 rd_idx = BITS(12, 15);
                 const u8 rn_idx = BITS(16, 19);
                 const u8 rm_idx = BITS(0, 3);
-                const s16 rm_lo = (state->Reg[rm_idx] & 0xFFFF);
-                const s16 rm_hi = ((state->Reg[rm_idx] >> 0x10) & 0xFFFF);
-                const s16 rn_lo = (state->Reg[rn_idx] & 0xFFFF);
-                const s16 rn_hi = ((state->Reg[rn_idx] >> 0x10) & 0xFFFF);
+                const u16 rm_lo = (state->Reg[rm_idx] & 0xFFFF);
+                const u16 rm_hi = ((state->Reg[rm_idx] >> 0x10) & 0xFFFF);
+                const u16 rn_lo = (state->Reg[rn_idx] & 0xFFFF);
+                const u16 rn_hi = ((state->Reg[rn_idx] >> 0x10) & 0xFFFF);
 
-                s32 lo_result;
-                s32 hi_result;
+                u16 lo_result = 0;
+                u16 hi_result = 0;
 
                 // QADD16
-                if ((instr & 0xFF0) == 0xf10) {
-                    lo_result = (rn_lo + rm_lo);
-                    hi_result = (rn_hi + rm_hi);
+                if (op2 == 0x00) {
+                    lo_result = ARMul_SignedSaturatedAdd16(rn_lo, rm_lo);
+                    hi_result = ARMul_SignedSaturatedAdd16(rn_hi, rm_hi);
                 }
                 // QASX
-                else if ((instr & 0xFF0) == 0xf30) {
-                    lo_result = (rn_lo - rm_hi);
-                    hi_result = (rn_hi + rm_lo);
+                else if (op2 == 0x01) {
+                    lo_result = ARMul_SignedSaturatedSub16(rn_lo, rm_hi);
+                    hi_result = ARMul_SignedSaturatedAdd16(rn_hi, rm_lo);
                 }
                 // QSAX
-                else if ((instr & 0xFF0) == 0xf50) {
-                    lo_result = (rn_lo + rm_hi);
-                    hi_result = (rn_hi - rm_lo);
+                else if (op2 == 0x02) {
+                    lo_result = ARMul_SignedSaturatedAdd16(rn_lo, rm_hi);
+                    hi_result = ARMul_SignedSaturatedSub16(rn_hi, rm_lo);
                 }
                 // QSUB16
-                else {
-                    lo_result = (rn_lo - rm_lo);
-                    hi_result = (rn_hi - rm_hi);
+                else if (op2 == 0x03) {
+                    lo_result = ARMul_SignedSaturatedSub16(rn_lo, rm_lo);
+                    hi_result = ARMul_SignedSaturatedSub16(rn_hi, rm_hi);
+                }
+                // QADD8
+                else if (op2 == 0x04) {
+                    lo_result = ARMul_SignedSaturatedAdd8(rn_lo & 0xFF, rm_lo & 0xFF) |
+                                ARMul_SignedSaturatedAdd8(rn_lo >> 8, rm_lo >> 8) << 8;
+                    hi_result = ARMul_SignedSaturatedAdd8(rn_hi & 0xFF, rm_hi & 0xFF) |
+                                ARMul_SignedSaturatedAdd8(rn_hi >> 8, rm_hi >> 8) << 8;
+                }
+                // QSUB8
+                else if (op2 == 0x07) {
+                    lo_result = ARMul_SignedSaturatedSub8(rn_lo & 0xFF, rm_lo & 0xFF) |
+                                ARMul_SignedSaturatedSub8(rn_lo >> 8, rm_lo >> 8) << 8;
+                    hi_result = ARMul_SignedSaturatedSub8(rn_hi & 0xFF, rm_hi & 0xFF) |
+                                ARMul_SignedSaturatedSub8(rn_hi >> 8, rm_hi >> 8) << 8;
                 }
-
-                if (lo_result > 0x7FFF)
-                    lo_result = 0x7FFF;
-                else if (lo_result < -0x8000)
-                    lo_result = -0x8000;
-
-                if (hi_result > 0x7FFF)
-                    hi_result = 0x7FFF;
-                else if (hi_result < -0x8000)
-                    hi_result = -0x8000;
 
                 state->Reg[rd_idx] = (lo_result & 0xFFFF) | ((hi_result & 0xFFFF) << 16);
                 return 1;
-            } else {
-                printf("Unhandled v6 insn: %08x", BITS(20, 27));
             }
             break;
         case 0x63:
diff --git a/src/core/arm/interpreter/armsupp.cpp b/src/core/arm/interpreter/armsupp.cpp
index 8f158e2c8..8b3661c8f 100644
--- a/src/core/arm/interpreter/armsupp.cpp
+++ b/src/core/arm/interpreter/armsupp.cpp
@@ -478,6 +478,66 @@ ARMul_SubOverflow (ARMul_State * state, ARMword a, ARMword b, ARMword result)
     ASSIGNV (SubOverflow (a, b, result));
 }
 
+/* 8-bit signed saturated addition */
+u8 ARMul_SignedSaturatedAdd8(u8 left, u8 right)
+{
+    u8 result = left + right;
+
+    if (((result ^ left) & 0x80) && ((left ^ right) & 0x80) == 0) {
+        if (left & 0x80)
+            result = 0x80;
+        else
+            result = 0x7F;
+    }
+
+    return result;
+}
+
+/* 8-bit signed saturated subtraction */
+u8 ARMul_SignedSaturatedSub8(u8 left, u8 right)
+{
+    u8 result = left - right;
+
+    if (((result ^ left) & 0x80) && ((left ^ right) & 0x80) != 0) {
+        if (left & 0x80)
+            result = 0x80;
+        else
+            result = 0x7F;
+    }
+
+    return result;
+}
+
+/* 16-bit signed saturated addition */
+u16 ARMul_SignedSaturatedAdd16(u16 left, u16 right)
+{
+    u16 result = left + right;
+
+    if (((result ^ left) & 0x8000) && ((left ^ right) & 0x8000) == 0) {
+        if (left & 0x8000)
+            result = 0x8000;
+        else
+            result = 0x7FFF;
+    }
+
+    return result;
+}
+
+/* 16-bit signed saturated subtraction */
+u16 ARMul_SignedSaturatedSub16(u16 left, u16 right)
+{
+    u16 result = left - right;
+
+    if (((result ^ left) & 0x8000) && ((left ^ right) & 0x8000) != 0) {
+        if (left & 0x8000)
+            result = 0x8000;
+        else
+            result = 0x7FFF;
+    }
+
+    return result;
+}
+
 /* 8-bit unsigned saturated addition */
 u8 ARMul_UnsignedSaturatedAdd8(u8 left, u8 right)
 {
diff --git a/src/core/arm/skyeye_common/armdefs.h b/src/core/arm/skyeye_common/armdefs.h
index c7509fcb2..0f2bcbdb1 100644
--- a/src/core/arm/skyeye_common/armdefs.h
+++ b/src/core/arm/skyeye_common/armdefs.h
@@ -790,6 +790,11 @@ extern void ARMul_FixSPSR(ARMul_State*, ARMword, ARMword);
 extern void ARMul_ConsolePrint(ARMul_State*, const char*, ...);
 extern void ARMul_SelectProcessor(ARMul_State*, unsigned);
 
+extern u8 ARMul_SignedSaturatedAdd8(u8, u8);
+extern u8 ARMul_SignedSaturatedSub8(u8, u8);
+extern u16 ARMul_SignedSaturatedAdd16(u16, u16);
+extern u16 ARMul_SignedSaturatedSub16(u16, u16);
+
 extern u8 ARMul_UnsignedSaturatedAdd8(u8, u8);
 extern u16 ARMul_UnsignedSaturatedAdd16(u16, u16);
 extern u8 ARMul_UnsignedSaturatedSub8(u8, u8);

From e412c0fc46ee151412f9d83d9bd3549a0a90e955 Mon Sep 17 00:00:00 2001
From: Lioncash <mathew1800@gmail.com>
Date: Mon, 29 Dec 2014 00:54:48 -0500
Subject: [PATCH 2/2] dyncom: Implement QADD8/QSUB8

---
 .../arm/dyncom/arm_dyncom_interpreter.cpp     | 74 +++++++++++--------
 1 file changed, 42 insertions(+), 32 deletions(-)

diff --git a/src/core/arm/dyncom/arm_dyncom_interpreter.cpp b/src/core/arm/dyncom/arm_dyncom_interpreter.cpp
index 45e7b441f..808e2085a 100644
--- a/src/core/arm/dyncom/arm_dyncom_interpreter.cpp
+++ b/src/core/arm/dyncom/arm_dyncom_interpreter.cpp
@@ -2419,8 +2419,7 @@ ARM_INST_PTR INTERPRETER_TRANSLATE(pld)(unsigned int inst, int index)
 	return inst_base;
 }
 ARM_INST_PTR INTERPRETER_TRANSLATE(qadd)(unsigned int inst, int index)     { UNIMPLEMENTED_INSTRUCTION("QADD"); }
-ARM_INST_PTR INTERPRETER_TRANSLATE(qadd8)(unsigned int inst, int index)    { UNIMPLEMENTED_INSTRUCTION("QADD8"); }
-ARM_INST_PTR INTERPRETER_TRANSLATE(qadd16)(unsigned int inst, int index)
+ARM_INST_PTR INTERPRETER_TRANSLATE(qadd8)(unsigned int inst, int index)
 {
 	arm_inst* const inst_base = (arm_inst*)AllocBuffer(sizeof(arm_inst) + sizeof(generic_arm_inst));
 	generic_arm_inst* const inst_cream = (generic_arm_inst*)inst_base->component;
@@ -2438,21 +2437,28 @@ ARM_INST_PTR INTERPRETER_TRANSLATE(qadd16)(unsigned int inst, int index)
 
 	return inst_base;
 }
+ARM_INST_PTR INTERPRETER_TRANSLATE(qadd16)(unsigned int inst, int index)
+{
+	return INTERPRETER_TRANSLATE(qadd8)(inst, index);
+}
 ARM_INST_PTR INTERPRETER_TRANSLATE(qaddsubx)(unsigned int inst, int index)
 {
-	return INTERPRETER_TRANSLATE(qadd16)(inst, index);
+	return INTERPRETER_TRANSLATE(qadd8)(inst, index);
 }
 ARM_INST_PTR INTERPRETER_TRANSLATE(qdadd)(unsigned int inst, int index)    { UNIMPLEMENTED_INSTRUCTION("QDADD"); }
 ARM_INST_PTR INTERPRETER_TRANSLATE(qdsub)(unsigned int inst, int index)    { UNIMPLEMENTED_INSTRUCTION("QDSUB"); }
 ARM_INST_PTR INTERPRETER_TRANSLATE(qsub)(unsigned int inst, int index)     { UNIMPLEMENTED_INSTRUCTION("QSUB"); }
-ARM_INST_PTR INTERPRETER_TRANSLATE(qsub8)(unsigned int inst, int index)    { UNIMPLEMENTED_INSTRUCTION("QSUB8"); }
+ARM_INST_PTR INTERPRETER_TRANSLATE(qsub8)(unsigned int inst, int index)
+{
+	return INTERPRETER_TRANSLATE(qadd8)(inst, index);
+}
 ARM_INST_PTR INTERPRETER_TRANSLATE(qsub16)(unsigned int inst, int index)
 {
-	return INTERPRETER_TRANSLATE(qadd16)(inst, index);
+	return INTERPRETER_TRANSLATE(qadd8)(inst, index);
 }
 ARM_INST_PTR INTERPRETER_TRANSLATE(qsubaddx)(unsigned int inst, int index)
 {
-	return INTERPRETER_TRANSLATE(qadd16)(inst, index);
+	return INTERPRETER_TRANSLATE(qadd8)(inst, index);
 }
 ARM_INST_PTR INTERPRETER_TRANSLATE(rev)(unsigned int inst, int index)
 {
@@ -5777,55 +5783,60 @@ unsigned InterpreterMainLoop(ARMul_State* state)
 		GOTO_NEXT_INST;
 	}
 	QADD_INST:
-	QADD8_INST:
 
+	QADD8_INST:
 	QADD16_INST:
 	QADDSUBX_INST:
+	QSUB8_INST:
 	QSUB16_INST:
 	QSUBADDX_INST:
 	{
 		INC_ICOUNTER;
 		if (inst_base->cond == 0xE || CondPassed(cpu, inst_base->cond)) {
 			generic_arm_inst* const inst_cream = (generic_arm_inst*)inst_base->component;
-			const s16 rm_lo = (RM & 0xFFFF);
-			const s16 rm_hi = ((RM >> 16) & 0xFFFF);
-			const s16 rn_lo = (RN & 0xFFFF);
-			const s16 rn_hi = ((RN >> 16) & 0xFFFF);
+			const u16 rm_lo = (RM & 0xFFFF);
+			const u16 rm_hi = ((RM >> 16) & 0xFFFF);
+			const u16 rn_lo = (RN & 0xFFFF);
+			const u16 rn_hi = ((RN >> 16) & 0xFFFF);
 			const u8 op2    = inst_cream->op2;
 
-			s32 lo_result = 0;
-			s32 hi_result = 0;
+			u16 lo_result = 0;
+			u16 hi_result = 0;
 
 			// QADD16
 			if (op2 == 0x00) {
-				lo_result = (rn_lo + rm_lo);
-				hi_result = (rn_hi + rm_hi);
+				lo_result = ARMul_SignedSaturatedAdd16(rn_lo, rm_lo);
+				hi_result = ARMul_SignedSaturatedAdd16(rn_hi, rm_hi);
 			}
 			// QASX
 			else if (op2 == 0x01) {
-				lo_result = (rn_lo - rm_hi);
-				hi_result = (rn_hi + rm_lo);
+				lo_result = ARMul_SignedSaturatedSub16(rn_lo, rm_hi);
+				hi_result = ARMul_SignedSaturatedAdd16(rn_hi, rm_lo);
 			}
 			// QSAX
 			else if (op2 == 0x02) {
-				lo_result = (rn_lo + rm_hi);
-				hi_result = (rn_hi - rm_lo);
+				lo_result = ARMul_SignedSaturatedAdd16(rn_lo, rm_hi);
+				hi_result = ARMul_SignedSaturatedSub16(rn_hi, rm_lo);
 			}
 			// QSUB16
 			else if (op2 == 0x03) {
-				lo_result = (rn_lo - rm_lo);
-				hi_result = (rn_hi - rm_hi);
+				lo_result = ARMul_SignedSaturatedSub16(rn_lo, rm_lo);
+				hi_result = ARMul_SignedSaturatedSub16(rn_hi, rm_hi);
+			}
+			// QADD8
+			else if (op2 == 0x04) {
+				lo_result = ARMul_SignedSaturatedAdd8(rn_lo & 0xFF, rm_lo & 0xFF) |
+				            ARMul_SignedSaturatedAdd8(rn_lo >> 8, rm_lo >> 8) << 8;
+				hi_result = ARMul_SignedSaturatedAdd8(rn_hi & 0xFF, rm_hi & 0xFF) |
+				            ARMul_SignedSaturatedAdd8(rn_hi >> 8, rm_hi >> 8) << 8;
+			}
+			// QSUB8
+			else if (op2 == 0x07) {
+				lo_result = ARMul_SignedSaturatedSub8(rn_lo & 0xFF, rm_lo & 0xFF) |
+				            ARMul_SignedSaturatedSub8(rn_lo >> 8, rm_lo >> 8) << 8;
+				hi_result = ARMul_SignedSaturatedSub8(rn_hi & 0xFF, rm_hi & 0xFF) |
+				            ARMul_SignedSaturatedSub8(rn_hi >> 8, rm_hi >> 8) << 8;
 			}
-
-			if (lo_result > 0x7FFF)
-				lo_result = 0x7FFF;
-			else if (lo_result < -0x8000)
-				lo_result = -0x8000;
-
-			if (hi_result > 0x7FFF)
-				hi_result = 0x7FFF;
-			else if (hi_result < -0x8000)
-				hi_result = -0x8000;
 
 			RD = (lo_result & 0xFFFF) | ((hi_result & 0xFFFF) << 16);
 		}
@@ -5839,7 +5850,6 @@ unsigned InterpreterMainLoop(ARMul_State* state)
 	QDADD_INST:
 	QDSUB_INST:
 	QSUB_INST:
-	QSUB8_INST:
 	REV_INST:
 	{
 		INC_ICOUNTER;