Keystone, Capstone, and Unicorn Engines

From JaxHax
Revision as of 01:08, 12 February 2019 by Travis (Talk | contribs) ((>^_^)>c|_| First)

(diff) ← Older revision | Latest revision (diff) | Newer revision → (diff)
Jump to: navigation, search

Overview

Keystone, Capstone, and Unicorn Engines presentation presented by Travis Phillips @ the PyJax group on 02/12/2019.

The Keystone Engine is a Assembler. The Capstone Engine is a Disssembler. The Unicorn Engine is an Emulator.

Slides and Code

Keystone, Capstone, and Unicorn - Code and Slides (Gzip Tarball 1.42)

Slides Only

Keystone, Capstone, and Unicorn (pdf - 1.58 MB)

Code Only

Code for the presentation only (Gzip Tarball 7 KB)

Code Example: keystone_example.py

This example will provide you with a GUI tool that wraps some of the Keystone functionality.

#!/usr/bin/env python
# -*- coding: utf-8 -*-
########################################################################
#
# Program: keystone_example.py
#
# Author: Travis Phillips
#
# Date: 02/12/2019
#
# Purpose: This application is to provide an example of how to use the
#          Keystone assembler engine. This code is an example for the
#          PyJax presentation presented on 02/12/2019.
#
# Website: https://wiki.jaxhax.org/
#
########################################################################
import pygtk
pygtk.require('2.0')
import gtk
import pango
 
from keystone import *
 
class Assembler():
	NAME = 'Keystone Assembler'
	VERSION = 'v1.0'
 
	def declareArchs(self):
		self.ARCH = {"x86 - 16 bit": [KS_ARCH_X86, KS_MODE_16],
				"x86 - 32 bit": [KS_ARCH_X86, KS_MODE_32], 
				"x64": [KS_ARCH_X86, KS_MODE_64],
				"ARM": [KS_ARCH_ARM, KS_MODE_ARM], 
				"ARM - Thumbs": [KS_ARCH_ARM, KS_MODE_THUMB],
				"ARM64": [KS_ARCH_ARM64, KS_MODE_LITTLE_ENDIAN],
				"MIPS": [KS_ARCH_MIPS, KS_MODE_MIPS32],
				"MIPS - 64 bit": [KS_ARCH_MIPS, KS_MODE_MIPS64]}
 
	####################################################################
	# Support Functions
	####################################################################
	def Assemble(self, value, archDetails):
		ks = Ks(archDetails[0], archDetails[1])
		try:
			encoding, count = ks.asm(value)
		except keystone.KsError as e:
			return e
		buf = ""
		for x in encoding:
			buf += "\\x{0:02x}".format(x)
		return buf
 
	####################################################################
	# Callback Functions
	####################################################################
	def delete_event(self, widget, event, data=None):
		gtk.main_quit()
		return False
 
	def btnAssembleClicked(self,widget):
		asmCode = self.txtAsmCode.get_text(self.txtAsmCode.get_start_iter(), self.txtAsmCode.get_end_iter(), True)
		Arch = self.cmbArchSelector.get_active_text()
		if Arch is None:
			self.txtOutput.set_text(" [*] ERROR: You Must select a architecture.")
			return
		binCode = self.Assemble(asmCode.decode('string_escape'), self.ARCH[Arch])
		if type(binCode) is KsError:
			buf = "{0:s}".format(binCode)
		else:
			buf  = ""
			buf += "{0:s}".format(binCode)
			buf += "\n\n###############################################\n"
			buf += "Arch: {0:s}\n".format(Arch)
			buf += "Length: {0:d}\n".format(len(binCode)/4)
			buf += "\n    ---===[ Code ]===--- \n{0:s}\n".format(asmCode)
			buf += "\n    ---===[ End Code ]===--- \n\n"
			buf += "###############################################\n"
		self.txtOutput.set_text(buf)
 
	####################################################################
	# Window Building Functions
	####################################################################
	def CreateWindow(self, data=None):
		self.tooltips = gtk.Tooltips()
		self.window = gtk.Window(gtk.WINDOW_TOPLEVEL)
		self.window.set_title("{0:s} {1:s}".format(self.NAME, self.VERSION))
		self.window.connect("delete_event", self.delete_event)
		self.window.set_border_width(10)
		self.window.set_default_size(500, 650)
		self.window.set_position(gtk.WIN_POS_CENTER)
 
	def AddMainVBox(self, data=None):
		self.VboxMain = gtk.VBox(False, 5)
		self.window.add(self.VboxMain)
 
	def AddInputFrame(self, data=None):
		frame = gtk.Frame("Assembly Code")
		self.VboxMain.pack_start(frame, True, True, 10)
		self.VBoxInput = gtk.VBox(False,10)
		frame.add(self.VBoxInput)
 
	def AddInputs(self, data=None):
		hbox = gtk.HBox(False, 0)
		self.VBoxInput.pack_start(hbox, True, True, 0)
 
		sw = gtk.ScrolledWindow()
		sw.set_policy(gtk.POLICY_AUTOMATIC, gtk.POLICY_AUTOMATIC)
 
		self.txtviewAsmCode = gtk.TextView()
		self.txtAsmCode = self.txtviewAsmCode.get_buffer()
		hbox.pack_start(self.txtviewAsmCode, True, True, 0)
 
		hbox = gtk.HBox(False, 0)
		self.VBoxInput.pack_start(hbox, False, False, 3)
		self.cmbArchSelector = gtk.combo_box_new_text()
		archs = []
		for key, value in self.ARCH.iteritems():
			archs.append(key)
		archs.sort()
		for key in archs:
			self.cmbArchSelector.append_text(key)
		label = gtk.Label("<span fgcolor='#0000FF'><u>Architecture</u>:</span>")
		label.set_alignment(0.1,0.5)
		label.set_use_markup(True)
		self.tooltips.set_tip(label, "The architecture the payload is for.")
		hbox.pack_start(label, False, False, 5)
		hbox.pack_start(self.cmbArchSelector, True, True, 0)
 
		button = gtk.Button("\nAssemble\n")
		button.connect("clicked", self.btnAssembleClicked)
		self.VBoxInput.pack_start(button, False, False, 3)
 
	def AddOutput(self, data=None):
		sw = gtk.ScrolledWindow()
		sw.set_policy(gtk.POLICY_AUTOMATIC, gtk.POLICY_AUTOMATIC)
		frame = gtk.Frame("Output")		
		self.txtviewOutput = gtk.TextView()
		fontdesc = pango.FontDescription("monospace 10")
		self.txtviewOutput.modify_font(fontdesc)
		self.txtviewOutput.set_wrap_mode(gtk.WRAP_CHAR)
		self.txtOutput = self.txtviewOutput.get_buffer()
		self.VboxMain.pack_start(frame, True, True, 0)
		frame.add(sw)
		sw.add(self.txtviewOutput)
 
	def __init__(self):
		self.declareArchs()
		self.CreateWindow()
		self.AddMainVBox()
		self.AddInputFrame()
		self.AddInputs()
		self.AddOutput()
		self.window.show_all()
 
def main():
    gtk.main()
 
if __name__ == "__main__":
	UI = Assembler()
	main()

Code Example: capstone_example.py

This example will provide you with a GUI tool that wraps some of the Capstone functionality.

#!/usr/bin/env python
# -*- coding: utf-8 -*-
########################################################################
#
# Program: capstone_example.py
#
# Author: Travis Phillips
#
# Date: 02/12/2019
#
# Purpose: This application is to provide an example of how to use the
#          Capstone disassembler engine. This code is an example for the
#          PyJax presentation presented on 02/12/2019.
#
# Website: https://wiki.jaxhax.org
#
########################################################################
import pygtk
pygtk.require('2.0')
import gtk
import pango
 
from capstone import *
 
class Disassembler():
	NAME = 'Capstone Disassembler'
	VERSION = 'v1.0'	
 
	def declareArchs(self):
		self.ARCH = {"x86 - 16 bit": [CS_ARCH_X86, CS_MODE_16],
				"x86 - 32 bit": [CS_ARCH_X86, CS_MODE_32], 
				"x64": [CS_ARCH_X86, CS_MODE_64],
				"ARM": [CS_ARCH_ARM, CS_MODE_ARM], 
				"ARM - Thumbs": [CS_ARCH_ARM, CS_MODE_THUMB],
				"ARM64": [CS_ARCH_ARM64, CS_MODE_ARM],
				"MIPS": [CS_ARCH_MIPS, CS_MODE_MIPS32],
				"MIPS - 64 bit": [CS_ARCH_MIPS, CS_MODE_MIPS64]}
 
	####################################################################
	# Support Functions
	####################################################################
	def Disassemble(self, value, archDetails):
		md = Cs(archDetails[0], archDetails[1])
		arr = []
		buf = ""
		for i in md.disasm(value, 0x1000):
			temp = ""
			for x in xrange(0, i.size):
				temp += "{0:02X} ".format(i.bytes[x])
			temp.strip(" ")
			arr.append([i.address, temp, i.mnemonic, i.op_str])
		longest = 0
		for x in arr:
			if len(x[1]) > longest:
				longest = len(x[1])
		for x in arr:
			buf +="0x{0:x}:\t{1:s}\t{2:s}\t{3:s}\n".format(x[0], x[1].ljust(longest), x[2], x[3])
		return buf
 
	####################################################################
	# Callback Functions
	####################################################################
	def delete_event(self, widget, event, data=None):
		gtk.main_quit()
		return False
 
	def btnDisassembleClicked(self,widget):
		value = self.txtInput.get_text()
		Arch = self.cmbArchSelector.get_active_text()
		if Arch is None:
			self.txtOutput.set_text(" [*] ERROR: You Must select a architecture.")
			return
		if len(value) % 4 != 0:
			self.txtOutput.set_text(" [*] ERROR: Length is wrong for hex encoded string.\n\nExample: \\x41\\x41\\x41\\x41")
			return
		buf = "###############################################\n"
		buf += "# Arch: {0:s}\n".format(Arch)
		buf += "# Payload: {0:s}\n".format(value)
		buf += "###############################################\n\n"
		buf += "{0:s}".format(self.Disassemble(value.decode('string_escape'), self.ARCH[Arch]))
		self.txtOutput.set_text(buf)
 
	####################################################################
	# Window Building Functions
	####################################################################
	def CreateWindow(self, data=None):
		self.tooltips = gtk.Tooltips()
		self.window = gtk.Window(gtk.WINDOW_TOPLEVEL)
		self.window.set_title("{0:s} {1:s}".format(self.NAME, self.VERSION))
		self.window.connect("delete_event", self.delete_event)
		self.window.set_border_width(10)
		self.window.set_default_size(500, 650)
		self.window.set_position(gtk.WIN_POS_CENTER)
 
	def AddMainVBox(self, data=None):
		self.VboxMain = gtk.VBox(False, 5)
		self.window.add(self.VboxMain)
 
	def AddInputFrame(self, data=None):
		frame = gtk.Frame("Payload")
		self.VboxMain.pack_start(frame, False, False, 10)
		self.VBoxInput = gtk.VBox(False,10)
		frame.add(self.VBoxInput)
 
	def AddInputs(self, data=None):
		hbox = gtk.HBox(False, 0)
		self.VBoxInput.pack_start(hbox, False, False, 0)
		self.entryInput = gtk.Entry()
		self.txtInput = self.entryInput.get_buffer()
		label = gtk.Label("<span fgcolor='#0000FF'><u>Value</u>:</span>")
		label.set_alignment(0.1,0.5)
		label.set_use_markup(True)
		self.tooltips.set_tip(label, "The hex encoded payload you wish to disassemble.")
		hbox.pack_start(label, False, False, 5)
		hbox.pack_start(self.entryInput, True, True, 0)
 
		hbox = gtk.HBox(False, 0)
		self.VBoxInput.pack_start(hbox, False, False, 3)
		self.cmbArchSelector = gtk.combo_box_new_text()
		archs = []
		for key, value in self.ARCH.iteritems():
			archs.append(key)
		archs.sort()
		for key in archs:
			self.cmbArchSelector.append_text(key)
		label = gtk.Label("<span fgcolor='#0000FF'><u>Architecture</u>:</span>")
		label.set_alignment(0.1,0.5)
		label.set_use_markup(True)
		self.tooltips.set_tip(label, "The architecture the payload was built for.")
		hbox.pack_start(label, False, False, 5)
		hbox.pack_start(self.cmbArchSelector, True, True, 0)
 
		button = gtk.Button("\nDisassemble\n")
		button.connect("clicked", self.btnDisassembleClicked)
		self.VBoxInput.pack_start(button, False, False, 3)
 
	def AddOutput(self, data=None):
		sw = gtk.ScrolledWindow()
		sw.set_policy(gtk.POLICY_AUTOMATIC, gtk.POLICY_AUTOMATIC)
		frame = gtk.Frame("Output")		
		self.txtviewOutput = gtk.TextView()
		fontdesc = pango.FontDescription("monospace 10")
		self.txtviewOutput.modify_font(fontdesc)
		self.txtviewOutput.set_wrap_mode(gtk.WRAP_CHAR)
		self.txtOutput = self.txtviewOutput.get_buffer()
		self.VboxMain.pack_start(frame, True, True, 0)
		frame.add(sw)
		sw.add(self.txtviewOutput)
 
	def __init__(self):
		self.declareArchs()
		self.CreateWindow()
		self.AddMainVBox()
		self.AddInputFrame()
		self.AddInputs()
		self.AddOutput()
		self.window.show_all()
 
def main():
    gtk.main()
 
if __name__ == "__main__":
	UI = Disassembler()
	main()

Code Example: raytheon_dv.bin_emulator_v1.py

This shows how to use unicorn to just run code. However this example allows a branching instruction that calls itself to run over and over, but addresses it by using a timeout on the emulator.

#!/usr/bin/env python
# -*- coding: utf-8 -*-
########################################################################
#
# Program: raytheon_dv.bin_emulator_v1.py
#
# Author: Travis Phillips
#
# Date: 02/12/2019
#
# Purpose: This application is to provide an example of how to use the
#          Unicorn emulator engine to solve an old Raytheon CTF challenge.
#          This code is an example for the PyJax presentation presented
#          on 02/12/2019.
#
# Website: https://wiki.jaxhax.org
#
########################################################################
from unicorn import *
from unicorn.arm_const import *
from capstone import *
 
# This is the binary code from the Raytheon
# CTF binary 'dv.bin'.
ARM_CODE  = b"\x00\x00\x20\xe0\x24\x10\x8f\xe2\x36\x20\x8f\xe2\x18\x00"
ARM_CODE += b"\x91\xe8\x60\x00\xb2\xe8\x05\x30\x23\xe0\x06\x40\x24\xe0"
ARM_CODE += b"\x18\x00\xa1\xe8\x08\x00\x80\xe2\x14\x00\x50\xe3\xf7\xff"
ARM_CODE += b"\xff\xba\xfe\xff\xff\xea\x41\x2d\x21\xd6\x92\xaf\x49\x15"
ARM_CODE += b"\x4b\x34\x22\x14\x1a\x34\x2a\xab\x0f\x60\x51\x2f\x52\x47"
ARM_CODE += b"\x08\x43\x01\xa2\xfa\xca\x69\x40\x00\x1a\x0c\x3a\x73\x40"
ARM_CODE += b"\x0a\xc2\x7c\x40\x14\x60\x00\x47\x7b\x53\x07\x28\x2a\xa3"
ARM_CODE += b"\x12\x60\x4b\x25\x5f\x47"
 
# ARM Disassembly:
# 0x1000:	00 00 20 E0 	eor	r0, r0, r0
# 0x1004:	24 10 8F E2 	add	r1, pc, #0x24
# 0x1008:	36 20 8F E2 	add	r2, pc, #0x36
# 0x100c:	18 00 91 E8 	ldm	r1, {r3, r4}
# 0x1010:	60 00 B2 E8 	ldm	r2!, {r5, r6}
# 0x1014:	05 30 23 E0 	eor	r3, r3, r5
# 0x1018:	06 40 24 E0 	eor	r4, r4, r6
# 0x101c:	18 00 A1 E8 	stm	r1!, {r3, r4}
# 0x1020:	08 00 80 E2 	add	r0, r0, #8
# 0x1024:	14 00 50 E3 	cmp	r0, #0x14
# 0x1028:	F7 FF FF BA 	blt	#0x100c
# 0x102c:	FE FF FF EA 	b	#0x102c  ; <== Infinite Branch-to-self loop.
# 0x1030:	41 2D 21 D6 	strtle	r2, [r1], -r1, asr #26
# 0x1034:	92 AF 49 15 	strbne	sl, [sb, #-0xf92]
# 0x1038:	4B 34 22 14 	strtne	r3, [r2], #-0x44b
# 0x103c:	1A 34 2A AB 	blge	#0xa8e0ac
# 0x1040:	0F 60 51 2F 	svchs	#0x51600f
# 0x1044:	52 47 08 43 	movwmi	r4, #0x8752
# 0x1048:	01 A2 FA CA 	bgt	#0xffea9854
# 0x104c:	69 40 00 1A 	bne	#0x111f8
# 0x1050:	0C 3A 73 40 	rsbsmi	r3, r3, ip, lsl #20
# 0x1054:	0A C2 7C 40 	rsbsmi	ip, ip, sl, lsl #4
# 0x1058:	14 60 00 47 	smladmi	r0, r4, r0, r6
# 0x105c:	7B 53 07 28 	stmdahs	r7, {r0, r1, r3, r4, r5, r6, r8, sb, ip, lr}
# 0x1060:	2A A3 12 60 	andsvs	sl, r2, sl, lsr #6
# 0x1064:	4B 25 5F 47 	ldrbmi	r2, [pc, -fp, asr #10]
 
# memory address where emulation starts
ADDRESS = 0x1000
 
########################################################################
#                       SUPPORT FUNCTIONS
########################################################################
# Convert bytes into a hex string for printing in a
# disassembler dump.
def HexStr(bytes, size):
	buffer = ""
	for i in xrange(len(bytes)):
		buffer += "{0:02X} ".format(bytes[i])
	if (size - len(bytes)) !=0:
		for i in xrange(size - len(bytes)):
			buffer += "00 "
	return buffer.strip()
 
########################################################################
#                       CODE HOOK FUNCTIONS
########################################################################
# callback for tracing instructions
def hook_code(uc, address, size, user_data):
	# Get bytes to be executed.
	data = str(uc.mem_read(address, size))
 
	# Use Capstone to disassemble the bytes.
	md = Cs(CS_ARCH_ARM, CS_MODE_ARM)
	for i in md.disasm(data, address):
		print(" [*] \033[32m0x{:08x}\033[33;1m:\033[0m   \033[90;1m{}\033[0m    \033[31;1m{}\033[0m{}".format(i.address, HexStr(i.bytes, i.size).ljust(20), i.mnemonic.ljust(6), i.op_str))
 
if __name__ == '__main__':
	print("\n\t\033[33;1m---===[ Emulate Raytheon CTF dv.bin ]===----\033[0m\n")
	try:
		# Initialize emulator in ARM mode
		print(" [*] Creating ARM emulator")
		mu = Uc(UC_ARCH_ARM, UC_MODE_ARM)
 
		# Map 1MB memory for this emulation
		print(" [*] Creating 1MB of memory for the emulator")
		mu.mem_map(ADDRESS, 1 * 1024 * 1024)
 
		# Tracing one instruction at ADDRESS with customized callback
		print(" [*] Attaching code hooks")
		mu.hook_add(UC_HOOK_CODE, hook_code)
 
		# Write machine code to be emulated to memory
		print(" [*] Writing ARM code to memory")
		mu.mem_write(ADDRESS, ARM_CODE)
 
		# Emulate machine code [timeout is microseconds]
		print(" [*] Starting emulator...")
		mu.emu_start(ADDRESS, len(ARM_CODE), timeout=250000)
		print(" [*] Emulation finished!")
 
		print(" [*] Done Son!\n")
 
	except UcError as e:
		print("ERROR: {0:s}".format(e))

Code Example: raytheon_dv.bin_emulator_v2.py

Same as version one, except we stop before the branch to self instruction, and dump the decoded data.

#!/usr/bin/env python
# -*- coding: utf-8 -*-
########################################################################
#
# Program: raytheon_dv.bin_emulator_v2.py
#
# Author: Travis Phillips
#
# Date: 02/12/2019
#
# Purpose: This application is to provide an example of how to use the
#          Unicorn emulator engine to solve an old Raytheon CTF challenge.
#          This code is an example for the PyJax presentation presented
#          on 02/12/2019.
#
# Website: https://wiki.jaxhax.org
#
########################################################################
from unicorn import *
from unicorn.arm_const import *
from capstone import *
 
# This is the binary code from the Raytheon
# CTF binary 'dv.bin'.
ARM_CODE  = b"\x00\x00\x20\xe0\x24\x10\x8f\xe2\x36\x20\x8f\xe2\x18\x00"
ARM_CODE += b"\x91\xe8\x60\x00\xb2\xe8\x05\x30\x23\xe0\x06\x40\x24\xe0"
ARM_CODE += b"\x18\x00\xa1\xe8\x08\x00\x80\xe2\x14\x00\x50\xe3\xf7\xff"
ARM_CODE += b"\xff\xba\xfe\xff\xff\xea\x41\x2d\x21\xd6\x92\xaf\x49\x15"
ARM_CODE += b"\x4b\x34\x22\x14\x1a\x34\x2a\xab\x0f\x60\x51\x2f\x52\x47"
ARM_CODE += b"\x08\x43\x01\xa2\xfa\xca\x69\x40\x00\x1a\x0c\x3a\x73\x40"
ARM_CODE += b"\x0a\xc2\x7c\x40\x14\x60\x00\x47\x7b\x53\x07\x28\x2a\xa3"
ARM_CODE += b"\x12\x60\x4b\x25\x5f\x47"
 
# ARM Disassembly:
# 0x1000:	00 00 20 E0 	eor	r0, r0, r0
# 0x1004:	24 10 8F E2 	add	r1, pc, #0x24
# 0x1008:	36 20 8F E2 	add	r2, pc, #0x36
# 0x100c:	18 00 91 E8 	ldm	r1, {r3, r4}
# 0x1010:	60 00 B2 E8 	ldm	r2!, {r5, r6}
# 0x1014:	05 30 23 E0 	eor	r3, r3, r5
# 0x1018:	06 40 24 E0 	eor	r4, r4, r6
# 0x101c:	18 00 A1 E8 	stm	r1!, {r3, r4}
# 0x1020:	08 00 80 E2 	add	r0, r0, #8
# 0x1024:	14 00 50 E3 	cmp	r0, #0x14
# 0x1028:	F7 FF FF BA 	blt	#0x100c
# 0x102c:	FE FF FF EA 	b	#0x102c  ; <== Infinite Branch-to-self loop.
# 0x1030:	41 2D 21 D6 	strtle	r2, [r1], -r1, asr #26
# 0x1034:	92 AF 49 15 	strbne	sl, [sb, #-0xf92]
# 0x1038:	4B 34 22 14 	strtne	r3, [r2], #-0x44b
# 0x103c:	1A 34 2A AB 	blge	#0xa8e0ac
# 0x1040:	0F 60 51 2F 	svchs	#0x51600f
# 0x1044:	52 47 08 43 	movwmi	r4, #0x8752
# 0x1048:	01 A2 FA CA 	bgt	#0xffea9854
# 0x104c:	69 40 00 1A 	bne	#0x111f8
# 0x1050:	0C 3A 73 40 	rsbsmi	r3, r3, ip, lsl #20
# 0x1054:	0A C2 7C 40 	rsbsmi	ip, ip, sl, lsl #4
# 0x1058:	14 60 00 47 	smladmi	r0, r4, r0, r6
# 0x105c:	7B 53 07 28 	stmdahs	r7, {r0, r1, r3, r4, r5, r6, r8, sb, ip, lr}
# 0x1060:	2A A3 12 60 	andsvs	sl, r2, sl, lsr #6
# 0x1064:	4B 25 5F 47 	ldrbmi	r2, [pc, -fp, asr #10]
 
# memory address where emulation starts
ADDRESS = 0x1000
 
########################################################################
#                       SUPPORT FUNCTIONS
########################################################################
# Convert bytes into a hex string for printing in a
# disassembler dump.
def HexStr(bytes, size):
	buffer = ""
	for i in xrange(len(bytes)):
		buffer += "{0:02X} ".format(bytes[i])
	if (size - len(bytes)) !=0:
		for i in xrange(size - len(bytes)):
			buffer += "00 "
	return buffer.strip()
 
# This function will read bytes from memory to a buffer
# untill it encounters a null byte...
def readStringFromMemory(uc, address):
	strVal = ""
	i = address
	byteIn = uc.mem_read(i, 1)
	while byteIn[0] != 0x00:
		strVal += str(byteIn)
		i += 1
		byteIn = uc.mem_read(i, 1)
	return strVal
 
########################################################################
#                       CODE HOOK FUNCTIONS
########################################################################
# callback for tracing instructions
def hook_code(uc, address, size, user_data):
	# Get bytes to be executed.
	data = str(uc.mem_read(address, size))
 
	# Use Capstone to disassemble the bytes.
	md = Cs(CS_ARCH_ARM, CS_MODE_ARM)
	for i in md.disasm(data, address):
		print(" [*] \033[32m0x{:08x}\033[33;1m:\033[0m   \033[90;1m{}\033[0m    \033[31;1m{}\033[0m{}".format(i.address, HexStr(i.bytes, i.size).ljust(20), i.mnemonic.ljust(6), i.op_str))
 
if __name__ == '__main__':
	print("\n\t\033[33;1m---===[ Emulate Raytheon CTF dv.bin ]===----\033[0m\n")
	try:
		# Initialize emulator in ARM mode
		print(" [*] Creating ARM emulator")
		mu = Uc(UC_ARCH_ARM, UC_MODE_ARM)
 
		# map 1MB memory for this emulation
		print(" [*] Creating 1MB of memory for the emulator")
		mu.mem_map(ADDRESS, 1 * 1024 * 1024)
 
		# tracing one instruction at ADDRESS with customized callback
		print(" [*] Attaching code hooks")
		mu.hook_add(UC_HOOK_CODE, hook_code)
 
		# write machine code to be emulated to memory
		print(" [*] Writing ARM code to memory")
		mu.mem_write(ADDRESS, ARM_CODE)
 
		# emulate machine code (+0x2c is a branch-to-self infinite loop.)
		print(" [*] Starting emulator...")
		mu.emu_start(ADDRESS, ADDRESS + 0x2c, timeout=1000000)
		print(" [*] Emulation finished!")
 
		# Read string from memory
		print(" [*] Reading decoded data from emulator memory...")
		data = readStringFromMemory(mu, ADDRESS + 0x30)
		print(" \033[32;1m[+] Decoded Data:\033[0m {0:s}".format(data))
 
		print(" [*] Done Son!\n")
 
	except UcError as e:
		print("ERROR: {0:s}".format(e))

Code Example: deobfuscate_x86_payload.py

This example shows how Unicorn can be used to decode a payload by running it without executing any of the syscalls and how to hook the interrupts to help decode the syscalls.

#!/usr/bin/env python
# -*- coding: utf-8 -*-
########################################################################
#
# Program: raytheon_dv.bin_emulator_v2.py
#
# Author: Travis Phillips
#
# Date: 02/12/2019
#
# Purpose: This application is to provide an example of how to use the
#          Unicorn emulator engine to solve an old Raytheon CTF challenge.
#          This code is an example for the PyJax presentation presented
#          on 02/12/2019.
#
# Website: https://wiki.jaxhax.org
#
########################################################################
from unicorn import *
from unicorn.x86_const import *
from capstone import *
import struct
 
# Code Address Parameters
BASE_ADDR = 0x100000
BASE_SIZE = 1 * 1024 * 1024 # 1MB
 
# Stack Address Parameters
STACK_ADDR = 0x0
STACK_SIZE = 1 * 1024*1024 # 1MB
 
# Payload to deobfuscate... Generated with the following code:
# ./msfvenom -p linux/x86/exec -f python -e x86/call4_dword_xor -i 15 CMD="ls -l /etc"
shellcode =  "\x33\xc9\x83\xe9\xa0\xe8\xff\xff\xff\xff\xc0\x5e\x81\x76\x0e\x36\x84\x35"
shellcode += "\xb8\x83\xee\xfc\xe2\xf4\x05\x4d\xb6\x51\x90\x6c\xca\x47\xc9\x7b\xf5\xe6"
shellcode += "\xb7\xf2\x3b\x28\xda\xaa\x6d\x3b\xd8\x78\xd7\x4c\x8d\xa1\x98\x09\x0a\x80"
shellcode += "\xe4\x1f\x59\x97\xdb\xbe\x27\x1e\x15\xe0\x9c\xd2\xd7\x63\x48\x94\xf9\x14"
shellcode += "\x8f\x9b\x22\xc5\x14\xba\x5e\xd3\x59\xad\x61\x72\x27\x24\xaf\x91\x61\x41"
shellcode += "\x93\xaf\x48\xae\x43\xd8\x30\x5c\x31\xf7\xa3\x7d\x4d\xe1\xe4\x6a\x72\x40"
shellcode += "\x9a\xe3\xbc\x1b\x22\xa3\xf0\x9d\xf5\x69\x50\xea\x35\x65\x07\xb5\xa0\x44"
shellcode += "\x7b\xa3\xe1\x53\x44\x02\x9f\xda\x8a\xd5\xfe\xa0\xe4\xdf\xf0\x50\x66\xa8"
shellcode += "\xa6\x85\x0b\xd5\x53\xa4\x77\xc3\x68\xb3\x48\x62\x16\x3a\x86\x85\x13\xbd"
shellcode += "\x61\xbf\x79\xb0\x6a\xc8\x1f\x01\xfa\x3c\xe4\x20\x86\x2a\xd1\x37\xb9\x8b"
shellcode += "\xaf\xbe\x77\x7e\x60\x25\x8b\x56\xc0\x34\x9b\x21\xb6\x4f\x17\xce\x55\x6e"
shellcode += "\x6b\xd8\x7a\x79\x54\x79\x04\xf0\x9a\xd9\x1a\xd3\x8d\xa4\x6b\x7a\x76\xd3"
shellcode += "\x52\xd0\x42\xd7\xad\xf1\x3e\xc1\x84\xe6\x01\x60\xfa\x6f\xcf\x15\x73\x31"
shellcode += "\x87\xbd\x95\xe5\x23\xca\x63\xd8\x6a\x91\x8c\xf9\x16\x87\xaf\xee\x29\x26"
shellcode += "\xd1\x67\xe7\xf9\x9f\x54\x51\xfb\xbe\xed\x0b\x8c\xe2\x17\x2f\x29\x33\x36"
shellcode += "\x53\x3f\x2e\x21\x6c\x9e\x50\xa8\xa2\x28\x15\x5b\x49\x43\x3f\x22\x4e\x34"
shellcode += "\x12\xd3\xaa\xcc\xd1\xf2\xd6\xda\xc6\xe5\xe9\x7b\xb8\x6c\x27\xe7\x27\x8c"
shellcode += "\x62\xa6\xd7\xe6\xcb\xd1\xc8\xcd\x3c\x87\x15\xec\x40\x91\x04\xfb\x7f\x30"
shellcode += "\x7a\x72\xb1\xbb\x48\x5f\x4c\xed\x15\xf8\x5d\x9a\x05\x7e\x67\x74\xda\x5f"
shellcode += "\x1b\x62\xd1\x48\x24\xc3\xaf\xc1\xea\x20\x2a\x75\x90\x1e\xc0\x4b\x06\x69"
shellcode += "\xf9\xb8\x7e\x70\xc1\xd5\x4e\xc4\xf0\x3a\xc1\x81\xbc\xc0\x4e\xe9\xfb\x9c"
shellcode += "\x44\x80\xfd\x3a\xc5\xbb\x7b\xb8\x26\xe9\x93\xdf\x55\xc9\xbe\xdf\x06\xc6"
shellcode += "\xf6\xc7\x45\xe9\xc4\xe0\xaf\x08\x5e\x33\x26\xe9"
 
########################################################################
#                       SUPPORT FUNCTIONS
########################################################################
def HexStr(bytes, size):
	buffer = ""
	for i in xrange(len(bytes)):
		buffer += "{0:02X} ".format(bytes[i])
	if (size - len(bytes)) !=0:
		for i in xrange(size - len(bytes)):
			buffer += "00 "
	return buffer.strip()
 
def readStringFromMemory(uc, address):
	strVal = ""
	i = address
	byteIn = uc.mem_read(i, 1)
	while byteIn[0] != 0x00:
		strVal += str(byteIn)
		i += 1
		byteIn = uc.mem_read(i, 1)
	return strVal
 
def readStringArray(uc, address):
	strArray = []
	if address == 0:
		return None
	ptr = struct.unpack("<I", uc.mem_read(address, 4))[0]
	while ptr != 0:
		strArray.append(readStringFromMemory(uc, ptr))
		address += 4
		ptr = struct.unpack("<I", uc.mem_read(address, 4))[0]
	return strArray
 
########################################################################
#                       CODE HOOK FUNCTIONS
########################################################################
def hook_intr(uc, intnum, user_data):
	####################################################################
	# Unicorn only emulates a cpu without an abstraction to the OS,
	# that said, we must enumlate syscalls ourself. This is kinda nice
	# as they normally would affect our OS, and we can add whatever logging
	# around it we want. We can emulate exactly what we want it to do, or
	# even fake it and alter results. In this case, let's just show what
	# it would attempt to do.
	####################################################################
	if intnum != 0x80:
		print(">>> got interrupt 0x{0:x} ???".format(intnum));
		uc.emu_stop()
		return
 
	eax = uc.reg_read(UC_X86_REG_EAX)
	eip = uc.reg_read(UC_X86_REG_EIP)
	ebx = uc.reg_read(UC_X86_REG_EBX)
	ecx = uc.reg_read(UC_X86_REG_ECX)
	edx = uc.reg_read(UC_X86_REG_EDX)
 
	if eax == 0xb:   # sys_execve
		cmd = readStringFromMemory(uc, ebx)
		argv = readStringArray(uc, ecx)
		argp = readStringArray(uc, edx)
		print(">>> 0x{:08x}: \033[36;1msyscall invoked\033[0m:\n\texecve(name='{}', argv={}, argp={})".format(eip, cmd, argv, argp))
	else:
		print(">>> 0x{0:x}: interrupt 0x{1:x}, EAX = 0x{2:x}".format(eip, intnum, eax))
		print(" [*] Halting emulation due to unknown syscall...")
		uc.emu_stop()
 
def hook_code(uc, address, size, user_data):
	# Get bytes to be executed.
	data = str(uc.mem_read(address, size))
 
	# Use Capstone to disassemble the bytes.
	md = Cs(CS_ARCH_X86, CS_MODE_32)
	for i in md.disasm(data, address):
		print(" [*] \033[32m0x{:08x}\033[33;1m:\033[0m   \033[90;1m{}\033[0m    \033[31;1m{}\033[0m{}".format(i.address, HexStr(i.bytes, i.size).ljust(20), i.mnemonic.ljust(6), i.op_str))
 
if __name__ == '__main__':
	print("\n\t\033[33;1m---===[ Deobfuscate x86 Payload ]===----\033[0m\n")
	try:
		# Initialize emulator in X86 mode
		print(" [*] Creating x86 emulator")
		mu = Uc(UC_ARCH_X86, UC_MODE_32)
 
		# Map out code and stack addresses.
		print(" [*] Creating 1MB of code memory for the emulator")
		mu.mem_map(BASE_ADDR, BASE_SIZE)
		print(" [*] Creating 1MB of stack memory for the emulator")
		mu.mem_map(STACK_ADDR, STACK_SIZE)
 
		# Print out memory regions
		print(" [*] Memory Map for this machine:")
		for i in mu.mem_regions():
			print("\t [*] 0x{:08x} - 0x{:08x} ({})".format(i[0], i[1], i[2]))
 
		# write machine code to be emulated to memory
		print(" [*] Writing payload to memory 0x{0:08x}".format(BASE_ADDR))
		mu.mem_write(BASE_ADDR, shellcode)
 
		# set ESP to somewhere in the middle of the stack memory region.
		print(" [*] Setting ESP to somewhere in middle of the stack")
		mu.reg_write(UC_X86_REG_ESP, STACK_ADDR + STACK_SIZE - 4096)
		print(" [*] Stack pointer: 0x{:08x}".format(mu.reg_read(UC_X86_REG_ESP)))
 
		# Tracing one instruction at ADDRESS with customized callback
		print(" [*] Attaching code hooks")
		mu.hook_add(UC_HOOK_CODE, hook_code)
 
		# Hook and attempt to decode known Linux Syscall with customized callback
		print(" [*] Attaching interrupt hooks")
		mu.hook_add(UC_HOOK_INTR, hook_intr)
 
		# Emulate machine code [timeout is microseconds]
		print(" [*] Starting emulator...")
		mu.emu_start(BASE_ADDR, BASE_ADDR + len(shellcode), timeout=1000000)
		print(" [*] Emulation finished!")
 
		print(" [*] Done Son!\n")
 
	except UcError as e:
		print("ERROR: {0:s}".format(e))