diff --git a/src/target/arm.h b/src/target/arm.h
index 30e2c76eabb2fcea9e040461277a12be91dda13f..916b321eb95bfa26a1b1684557882d332feba15c 100644
--- a/src/target/arm.h
+++ b/src/target/arm.h
@@ -237,8 +237,6 @@ int arm_blank_check_memory(struct target *target,
 void arm_set_cpsr(struct arm *arm, uint32_t cpsr);
 struct reg *arm_reg_current(struct arm *arm, unsigned regnum);
 
-void arm_endianness(uint8_t *tmp, void *in, int size, int be, int flip);
-
 extern struct reg arm_gdb_dummy_fp_reg;
 extern struct reg arm_gdb_dummy_fps_reg;
 
diff --git a/src/target/arm7_9_common.c b/src/target/arm7_9_common.c
index 3461da4c48a3689f93ac5156ce9c600d10197333..faeed0d24de157cd07df3e282a037b6808d0f4ae 100644
--- a/src/target/arm7_9_common.c
+++ b/src/target/arm7_9_common.c
@@ -2659,6 +2659,46 @@ int arm7_9_check_reset(struct target *target)
 	return ERROR_OK;
 }
 
+int arm7_9_endianness_callback(jtag_callback_data_t pu8_in,
+		jtag_callback_data_t i_size, jtag_callback_data_t i_be,
+		jtag_callback_data_t i_flip)
+{
+	uint8_t *in = (uint8_t *)pu8_in;
+	int size = (int)i_size;
+	int be = (int)i_be;
+	int flip = (int)i_flip;
+	uint32_t readback;
+
+	switch (size) {
+	case 4:
+		readback = le_to_h_u32(in);
+		if (flip)
+			readback = flip_u32(readback, 32);
+		if (be)
+			h_u32_to_be(in, readback);
+		else
+			h_u32_to_le(in, readback);
+		break;
+	case 2:
+		readback = le_to_h_u16(in);
+		if (flip)
+			readback = flip_u32(readback, 16);
+		if (be)
+			h_u16_to_be(in, readback & 0xffff);
+		else
+			h_u16_to_le(in, readback & 0xffff);
+		break;
+	case 1:
+		readback = *in;
+		if (flip)
+			readback = flip_u32(readback, 8);
+		*in = readback & 0xff;
+		break;
+	}
+
+	return ERROR_OK;
+}
+
 COMMAND_HANDLER(handle_arm7_9_dbgrq_command)
 {
 	struct target *target = get_current_target(CMD_CTX);
diff --git a/src/target/arm7_9_common.h b/src/target/arm7_9_common.h
index 0e706a75a2a899fac7cf1fd8ffda0ba945eb3910..5b79a0a5a4791ef45e7b20d1ff96d0d491da7a5b 100644
--- a/src/target/arm7_9_common.h
+++ b/src/target/arm7_9_common.h
@@ -172,4 +172,8 @@ int arm7_9_init_arch_info(struct target *target, struct arm7_9_common *arm7_9);
 int arm7_9_examine(struct target *target);
 int arm7_9_check_reset(struct target *target);
 
+int arm7_9_endianness_callback(jtag_callback_data_t pu8_in,
+		jtag_callback_data_t i_size, jtag_callback_data_t i_be,
+		jtag_callback_data_t i_flip);
+
 #endif /* ARM7_9_COMMON_H */
diff --git a/src/target/arm7tdmi.c b/src/target/arm7tdmi.c
index 2721502f3914e856dbba84258d94cad456fd06c5..634aa2997f42700bab3d85f050690e82df970d04 100644
--- a/src/target/arm7tdmi.c
+++ b/src/target/arm7tdmi.c
@@ -168,40 +168,6 @@ static int arm7tdmi_clock_data_in(struct arm_jtag *jtag_info, uint32_t *in)
 	return ERROR_OK;
 }
 
-void arm_endianness(uint8_t *tmp, void *in, int size, int be, int flip)
-{
-	uint32_t readback = le_to_h_u32(tmp);
-	if (flip)
-		readback = flip_u32(readback, 32);
-	switch (size) {
-		case 4:
-			if (be)
-				h_u32_to_be(((uint8_t *)in), readback);
-			else
-				 h_u32_to_le(((uint8_t *)in), readback);
-			break;
-		case 2:
-			if (be)
-				h_u16_to_be(((uint8_t *)in), readback & 0xffff);
-			else
-				h_u16_to_le(((uint8_t *)in), readback & 0xffff);
-			break;
-		case 1:
-			*((uint8_t *)in) = readback & 0xff;
-			break;
-	}
-}
-
-static int arm7endianness(jtag_callback_data_t arg,
-	jtag_callback_data_t size, jtag_callback_data_t be,
-	jtag_callback_data_t captured)
-{
-	uint8_t *in = (uint8_t *)arg;
-
-	arm_endianness((uint8_t *)captured, in, (int)size, (int)be, 1);
-	return ERROR_OK;
-}
-
 /* clock the target, and read the databus
  * the *in pointer points to a buffer where elements of 'size' bytes
  * are stored in big (be == 1) or little (be == 0) endianness
@@ -210,7 +176,7 @@ static int arm7tdmi_clock_data_in_endianness(struct arm_jtag *jtag_info,
 		void *in, int size, int be)
 {
 	int retval = ERROR_OK;
-	struct scan_field fields[2];
+	struct scan_field fields[3];
 
 	retval = arm_jtag_scann(jtag_info, 0x1, TAP_DRPAUSE);
 	if (retval != ERROR_OK)
@@ -223,17 +189,29 @@ static int arm7tdmi_clock_data_in_endianness(struct arm_jtag *jtag_info,
 	fields[0].out_value = NULL;
 	fields[0].in_value = NULL;
 
-	fields[1].num_bits = size * 8;
-	fields[1].out_value = NULL;
-	fields[1].in_value = in;
+	if (size == 4) {
+		fields[1].num_bits = 32;
+		fields[1].out_value = NULL;
+		fields[1].in_value = in;
+	} else {
+		/* Discard irrelevant bits of the scan, making sure we don't write more
+		 * than size bytes to in */
+		fields[1].num_bits = 32 - size * 8;
+		fields[1].out_value = NULL;
+		fields[1].in_value = NULL;
 
-	jtag_add_dr_scan(jtag_info->tap, 2, fields, TAP_DRPAUSE);
+		fields[2].num_bits = size * 8;
+		fields[2].out_value = NULL;
+		fields[2].in_value = in;
+	}
+
+	jtag_add_dr_scan(jtag_info->tap, size == 4 ? 2 : 3, fields, TAP_DRPAUSE);
 
-	jtag_add_callback4(arm7endianness,
+	jtag_add_callback4(arm7_9_endianness_callback,
 		(jtag_callback_data_t)in,
 		(jtag_callback_data_t)size,
 		(jtag_callback_data_t)be,
-		(jtag_callback_data_t)in);
+		(jtag_callback_data_t)1);
 
 	jtag_add_runtest(0, TAP_DRPAUSE);
 
diff --git a/src/target/arm9tdmi.c b/src/target/arm9tdmi.c
index e8ad93289cc18eead404bf14c460f83fee50bff4..1cb9fbdb2be2326f221535d4b60ff6b8758b2bca 100644
--- a/src/target/arm9tdmi.c
+++ b/src/target/arm9tdmi.c
@@ -244,16 +244,6 @@ int arm9tdmi_clock_data_in(struct arm_jtag *jtag_info, uint32_t *in)
 	return ERROR_OK;
 }
 
-static int arm9endianness(jtag_callback_data_t arg,
-	jtag_callback_data_t size, jtag_callback_data_t be,
-	jtag_callback_data_t captured)
-{
-	uint8_t *in = (uint8_t *)arg;
-
-	arm_endianness((uint8_t *)captured, in, (int)size, (int)be, 0);
-	return ERROR_OK;
-}
-
 /* clock the target, and read the databus
  * the *in pointer points to a buffer where elements of 'size' bytes
  * are stored in big (be == 1) or little (be == 0) endianness
@@ -262,7 +252,7 @@ int arm9tdmi_clock_data_in_endianness(struct arm_jtag *jtag_info,
 		void *in, int size, int be)
 {
 	int retval = ERROR_OK;
-	struct scan_field fields[3];
+	struct scan_field fields[2];
 
 	retval = arm_jtag_scann(jtag_info, 0x1, TAP_DRPAUSE);
 	if (retval != ERROR_OK)
@@ -272,25 +262,33 @@ int arm9tdmi_clock_data_in_endianness(struct arm_jtag *jtag_info,
 	if (retval != ERROR_OK)
 		return retval;
 
-	fields[0].num_bits = size * 8;
-	fields[0].out_value = NULL;
-	fields[0].in_value = in;
+	if (size == 4) {
+		fields[0].num_bits = 32;
+		fields[0].out_value = NULL;
+		fields[0].in_value = in;
 
-	fields[1].num_bits = 3;
-	fields[1].out_value = NULL;
-	fields[1].in_value = NULL;
+		fields[1].num_bits = 3 + 32;
+		fields[1].out_value = NULL;
+		fields[1].in_value = NULL;
+	} else {
+		/* Discard irrelevant bits of the scan, making sure we don't write more
+		 * than size bytes to in */
+		fields[0].num_bits = size * 8;
+		fields[0].out_value = NULL;
+		fields[0].in_value = in;
 
-	fields[2].num_bits = 32;
-	fields[2].out_value = NULL;
-	fields[2].in_value = NULL;
+		fields[1].num_bits = 3 + 32 + 32 - size * 8;
+		fields[1].out_value = NULL;
+		fields[1].in_value = NULL;
+	}
 
-	jtag_add_dr_scan(jtag_info->tap, 3, fields, TAP_DRPAUSE);
+	jtag_add_dr_scan(jtag_info->tap, 2, fields, TAP_DRPAUSE);
 
-	jtag_add_callback4(arm9endianness,
+	jtag_add_callback4(arm7_9_endianness_callback,
 		(jtag_callback_data_t)in,
 		(jtag_callback_data_t)size,
 		(jtag_callback_data_t)be,
-		(jtag_callback_data_t)in);
+		(jtag_callback_data_t)0);
 
 	jtag_add_runtest(0, TAP_DRPAUSE);