First Commit of my working state
[simh.git] / AltairZ80 / disasm.c
1 /* disasm.c where all the _work_ gets done in the Netwide Disassembler
2 *
3 * The Netwide Assembler is copyright (C) 1996 Simon Tatham and
4 * Julian Hall. All rights reserved. The software is
5 * redistributable under the licence given in the file "Licence"
6 * distributed in the NASM archive.
7 *
8 * initial version 27/iii/95 by Simon Tatham
9 */
10
11 #include <stdio.h>
12 #include <string.h>
13
14 #include "nasm.h"
15 #include "insns.h"
16
17 /* names.c included source file defining instruction and register
18 * names for the Netwide [Dis]Assembler
19 *
20 * The Netwide Assembler is copyright (C) 1996 Simon Tatham and
21 * Julian Hall. All rights reserved. The software is
22 * redistributable under the licence given in the file "Licence"
23 * distributed in the NASM archive.
24 */
25
26 static const char *conditions[] = { /* condition code names */
27 "a", "ae", "b", "be", "c", "e", "g", "ge", "l", "le", "na", "nae",
28 "nb", "nbe", "nc", "ne", "ng", "nge", "nl", "nle", "no", "np",
29 "ns", "nz", "o", "p", "pe", "po", "s", "z"
30 };
31
32 /* Register names automatically generated from regs.dat */
33 /* automatically generated from ./regs.dat - do not edit */
34 static const char *reg_names[] = {
35 "ah",
36 "al",
37 "ax",
38 "bh",
39 "bl",
40 "bp",
41 "bx",
42 "ch",
43 "cl",
44 "cr0",
45 "cr1",
46 "cr2",
47 "cr3",
48 "cr4",
49 "cr5",
50 "cr6",
51 "cr7",
52 "cs",
53 "cx",
54 "dh",
55 "di",
56 "dl",
57 "dr0",
58 "dr1",
59 "dr2",
60 "dr3",
61 "dr4",
62 "dr5",
63 "dr6",
64 "dr7",
65 "ds",
66 "dx",
67 "eax",
68 "ebp",
69 "ebx",
70 "ecx",
71 "edi",
72 "edx",
73 "es",
74 "esi",
75 "esp",
76 "fs",
77 "gs",
78 "mm0",
79 "mm1",
80 "mm2",
81 "mm3",
82 "mm4",
83 "mm5",
84 "mm6",
85 "mm7",
86 "segr6",
87 "segr7",
88 "si",
89 "sp",
90 "ss",
91 "st0",
92 "st1",
93 "st2",
94 "st3",
95 "st4",
96 "st5",
97 "st6",
98 "st7",
99 "tr0",
100 "tr1",
101 "tr2",
102 "tr3",
103 "tr4",
104 "tr5",
105 "tr6",
106 "tr7",
107 "xmm0",
108 "xmm1",
109 "xmm2",
110 "xmm3",
111 "xmm4",
112 "xmm5",
113 "xmm6",
114 "xmm7"
115 };
116
117 /* Instruction names automatically generated from insns.dat */
118 /* This file is auto-generated from insns.dat by insns.pl - don't edit it */
119 /* This file in included by names.c */
120 static const char *insn_names[] = {
121 "aaa",
122 "aad",
123 "aam",
124 "aas",
125 "adc",
126 "add",
127 "addpd",
128 "addps",
129 "addsd",
130 "addss",
131 "addsubpd",
132 "addsubps",
133 "and",
134 "andnpd",
135 "andnps",
136 "andpd",
137 "andps",
138 "arpl",
139 "bound",
140 "bsf",
141 "bsr",
142 "bswap",
143 "bt",
144 "btc",
145 "btr",
146 "bts",
147 "call",
148 "cbw",
149 "cdq",
150 "clc",
151 "cld",
152 "clflush",
153 "cli",
154 "clts",
155 "cmc",
156 "cmp",
157 "cmpeqpd",
158 "cmpeqps",
159 "cmpeqsd",
160 "cmpeqss",
161 "cmplepd",
162 "cmpleps",
163 "cmplesd",
164 "cmpless",
165 "cmpltpd",
166 "cmpltps",
167 "cmpltsd",
168 "cmpltss",
169 "cmpneqpd",
170 "cmpneqps",
171 "cmpneqsd",
172 "cmpneqss",
173 "cmpnlepd",
174 "cmpnleps",
175 "cmpnlesd",
176 "cmpnless",
177 "cmpnltpd",
178 "cmpnltps",
179 "cmpnltsd",
180 "cmpnltss",
181 "cmpordpd",
182 "cmpordps",
183 "cmpordsd",
184 "cmpordss",
185 "cmppd",
186 "cmpps",
187 "cmpsb",
188 "cmpsd",
189 "cmpss",
190 "cmpsw",
191 "cmpunordpd",
192 "cmpunordps",
193 "cmpunordsd",
194 "cmpunordss",
195 "cmpxchg",
196 "cmpxchg486",
197 "cmpxchg8b",
198 "comisd",
199 "comiss",
200 "cpuid",
201 "cvtdq2pd",
202 "cvtdq2ps",
203 "cvtpd2dq",
204 "cvtpd2pi",
205 "cvtpd2ps",
206 "cvtpi2pd",
207 "cvtpi2ps",
208 "cvtps2dq",
209 "cvtps2pd",
210 "cvtps2pi",
211 "cvtsd2si",
212 "cvtsd2ss",
213 "cvtsi2sd",
214 "cvtsi2ss",
215 "cvtss2sd",
216 "cvtss2si",
217 "cvttpd2dq",
218 "cvttpd2pi",
219 "cvttps2dq",
220 "cvttps2pi",
221 "cvttsd2si",
222 "cvttss2si",
223 "cwd",
224 "cwde",
225 "daa",
226 "das",
227 "db",
228 "dd",
229 "dec",
230 "div",
231 "divpd",
232 "divps",
233 "divsd",
234 "divss",
235 "dq",
236 "dt",
237 "dw",
238 "emms",
239 "enter",
240 "equ",
241 "f2xm1",
242 "fabs",
243 "fadd",
244 "faddp",
245 "fbld",
246 "fbstp",
247 "fchs",
248 "fclex",
249 "fcmovb",
250 "fcmovbe",
251 "fcmove",
252 "fcmovnb",
253 "fcmovnbe",
254 "fcmovne",
255 "fcmovnu",
256 "fcmovu",
257 "fcom",
258 "fcomi",
259 "fcomip",
260 "fcomp",
261 "fcompp",
262 "fcos",
263 "fdecstp",
264 "fdisi",
265 "fdiv",
266 "fdivp",
267 "fdivr",
268 "fdivrp",
269 "femms",
270 "feni",
271 "ffree",
272 "ffreep",
273 "fiadd",
274 "ficom",
275 "ficomp",
276 "fidiv",
277 "fidivr",
278 "fild",
279 "fimul",
280 "fincstp",
281 "finit",
282 "fist",
283 "fistp",
284 "fisttp",
285 "fisub",
286 "fisubr",
287 "fld",
288 "fld1",
289 "fldcw",
290 "fldenv",
291 "fldl2e",
292 "fldl2t",
293 "fldlg2",
294 "fldln2",
295 "fldpi",
296 "fldz",
297 "fmul",
298 "fmulp",
299 "fnclex",
300 "fndisi",
301 "fneni",
302 "fninit",
303 "fnop",
304 "fnsave",
305 "fnstcw",
306 "fnstenv",
307 "fnstsw",
308 "fpatan",
309 "fprem",
310 "fprem1",
311 "fptan",
312 "frndint",
313 "frstor",
314 "fsave",
315 "fscale",
316 "fsetpm",
317 "fsin",
318 "fsincos",
319 "fsqrt",
320 "fst",
321 "fstcw",
322 "fstenv",
323 "fstp",
324 "fstsw",
325 "fsub",
326 "fsubp",
327 "fsubr",
328 "fsubrp",
329 "ftst",
330 "fucom",
331 "fucomi",
332 "fucomip",
333 "fucomp",
334 "fucompp",
335 "fwait",
336 "fxam",
337 "fxch",
338 "fxrstor",
339 "fxsave",
340 "fxtract",
341 "fyl2x",
342 "fyl2xp1",
343 "haddpd",
344 "haddps",
345 "hlt",
346 "hsubpd",
347 "hsubps",
348 "ibts",
349 "icebp",
350 "idiv",
351 "imul",
352 "in",
353 "inc",
354 "incbin",
355 "insb",
356 "insd",
357 "insw",
358 "int",
359 "int01",
360 "int03",
361 "int1",
362 "int3",
363 "into",
364 "invd",
365 "invlpg",
366 "iret",
367 "iretd",
368 "iretw",
369 "jcxz",
370 "jecxz",
371 "jmp",
372 "jmpe",
373 "lahf",
374 "lar",
375 "lddqu",
376 "ldmxcsr",
377 "lds",
378 "lea",
379 "leave",
380 "les",
381 "lfence",
382 "lfs",
383 "lgdt",
384 "lgs",
385 "lidt",
386 "lldt",
387 "lmsw",
388 "loadall",
389 "loadall286",
390 "lodsb",
391 "lodsd",
392 "lodsw",
393 "loop",
394 "loope",
395 "loopne",
396 "loopnz",
397 "loopz",
398 "lsl",
399 "lss",
400 "ltr",
401 "maskmovdqu",
402 "maskmovq",
403 "maxpd",
404 "maxps",
405 "maxsd",
406 "maxss",
407 "mfence",
408 "minpd",
409 "minps",
410 "minsd",
411 "minss",
412 "monitor",
413 "mov",
414 "movapd",
415 "movaps",
416 "movd",
417 "movddup",
418 "movdq2q",
419 "movdqa",
420 "movdqu",
421 "movhlps",
422 "movhpd",
423 "movhps",
424 "movlhps",
425 "movlpd",
426 "movlps",
427 "movmskpd",
428 "movmskps",
429 "movntdq",
430 "movnti",
431 "movntpd",
432 "movntps",
433 "movntq",
434 "movq",
435 "movq2dq",
436 "movsb",
437 "movsd",
438 "movshdup",
439 "movsldup",
440 "movss",
441 "movsw",
442 "movsx",
443 "movupd",
444 "movups",
445 "movzx",
446 "mul",
447 "mulpd",
448 "mulps",
449 "mulsd",
450 "mulss",
451 "mwait",
452 "neg",
453 "nop",
454 "not",
455 "or",
456 "orpd",
457 "orps",
458 "out",
459 "outsb",
460 "outsd",
461 "outsw",
462 "packssdw",
463 "packsswb",
464 "packuswb",
465 "paddb",
466 "paddd",
467 "paddq",
468 "paddsb",
469 "paddsiw",
470 "paddsw",
471 "paddusb",
472 "paddusw",
473 "paddw",
474 "pand",
475 "pandn",
476 "pause",
477 "paveb",
478 "pavgb",
479 "pavgusb",
480 "pavgw",
481 "pcmpeqb",
482 "pcmpeqd",
483 "pcmpeqw",
484 "pcmpgtb",
485 "pcmpgtd",
486 "pcmpgtw",
487 "pdistib",
488 "pextrw",
489 "pf2id",
490 "pf2iw",
491 "pfacc",
492 "pfadd",
493 "pfcmpeq",
494 "pfcmpge",
495 "pfcmpgt",
496 "pfmax",
497 "pfmin",
498 "pfmul",
499 "pfnacc",
500 "pfpnacc",
501 "pfrcp",
502 "pfrcpit1",
503 "pfrcpit2",
504 "pfrsqit1",
505 "pfrsqrt",
506 "pfsub",
507 "pfsubr",
508 "pi2fd",
509 "pi2fw",
510 "pinsrw",
511 "pmachriw",
512 "pmaddwd",
513 "pmagw",
514 "pmaxsw",
515 "pmaxub",
516 "pminsw",
517 "pminub",
518 "pmovmskb",
519 "pmulhriw",
520 "pmulhrwa",
521 "pmulhrwc",
522 "pmulhuw",
523 "pmulhw",
524 "pmullw",
525 "pmuludq",
526 "pmvgezb",
527 "pmvlzb",
528 "pmvnzb",
529 "pmvzb",
530 "pop",
531 "popa",
532 "popad",
533 "popaw",
534 "popf",
535 "popfd",
536 "popfw",
537 "por",
538 "prefetch",
539 "prefetchnta",
540 "prefetcht0",
541 "prefetcht1",
542 "prefetcht2",
543 "prefetchw",
544 "psadbw",
545 "pshufd",
546 "pshufhw",
547 "pshuflw",
548 "pshufw",
549 "pslld",
550 "pslldq",
551 "psllq",
552 "psllw",
553 "psrad",
554 "psraw",
555 "psrld",
556 "psrldq",
557 "psrlq",
558 "psrlw",
559 "psubb",
560 "psubd",
561 "psubq",
562 "psubsb",
563 "psubsiw",
564 "psubsw",
565 "psubusb",
566 "psubusw",
567 "psubw",
568 "pswapd",
569 "punpckhbw",
570 "punpckhdq",
571 "punpckhqdq",
572 "punpckhwd",
573 "punpcklbw",
574 "punpckldq",
575 "punpcklqdq",
576 "punpcklwd",
577 "push",
578 "pusha",
579 "pushad",
580 "pushaw",
581 "pushf",
582 "pushfd",
583 "pushfw",
584 "pxor",
585 "rcl",
586 "rcpps",
587 "rcpss",
588 "rcr",
589 "rdmsr",
590 "rdpmc",
591 "rdshr",
592 "rdtsc",
593 "resb",
594 "resd",
595 "resq",
596 "rest",
597 "resw",
598 "ret",
599 "retf",
600 "retn",
601 "rol",
602 "ror",
603 "rsdc",
604 "rsldt",
605 "rsm",
606 "rsqrtps",
607 "rsqrtss",
608 "rsts",
609 "sahf",
610 "sal",
611 "salc",
612 "sar",
613 "sbb",
614 "scasb",
615 "scasd",
616 "scasw",
617 "sfence",
618 "sgdt",
619 "shl",
620 "shld",
621 "shr",
622 "shrd",
623 "shufpd",
624 "shufps",
625 "sidt",
626 "sldt",
627 "smi",
628 "smint",
629 "smintold",
630 "smsw",
631 "sqrtpd",
632 "sqrtps",
633 "sqrtsd",
634 "sqrtss",
635 "stc",
636 "std",
637 "sti",
638 "stmxcsr",
639 "stosb",
640 "stosd",
641 "stosw",
642 "str",
643 "sub",
644 "subpd",
645 "subps",
646 "subsd",
647 "subss",
648 "svdc",
649 "svldt",
650 "svts",
651 "syscall",
652 "sysenter",
653 "sysexit",
654 "sysret",
655 "test",
656 "ucomisd",
657 "ucomiss",
658 "ud0",
659 "ud1",
660 "ud2",
661 "umov",
662 "unpckhpd",
663 "unpckhps",
664 "unpcklpd",
665 "unpcklps",
666 "verr",
667 "verw",
668 "wait",
669 "wbinvd",
670 "wrmsr",
671 "wrshr",
672 "xadd",
673 "xbts",
674 "xchg",
675 "xlat",
676 "xlatb",
677 "xor",
678 "xorpd",
679 "xorps",
680 "xstore"
681 };
682
683 /* Conditional instructions */
684 static const char *icn[] = {
685 "cmov",
686 "j",
687 "set"
688 };
689
690 /* and the corresponding opcodes */
691 static int ico[] = {
692 I_CMOVcc,
693 I_Jcc,
694 I_SETcc
695 };
696
697 #define INSN_MAX 32 /* one instruction can't be longer than this */
698 long disasm (unsigned char *data, char *output, int segsize, long offset);
699 extern struct itemplate **itable[];
700
701 /*
702 * Flags that go into the `segment' field of `insn' structures
703 * during disassembly.
704 */
705 #define SEG_RELATIVE 1
706 #define SEG_32BIT 2
707 #define SEG_RMREG 4
708 #define SEG_DISP8 8
709 #define SEG_DISP16 16
710 #define SEG_DISP32 32
711 #define SEG_NODISP 64
712 #define SEG_SIGNED 128
713
714 static int whichreg(long regflags, int regval)
715 {
716 /* automatically generated from ./regs.dat - do not edit */
717 static const int creg [] = {R_CR0,R_CR1,R_CR2,R_CR3,R_CR4,R_CR5,R_CR6,R_CR7};
718 static const int dreg [] = {R_DR0,R_DR1,R_DR2,R_DR3,R_DR4,R_DR5,R_DR6,R_DR7};
719 static const int fpureg [] = {R_ST0,R_ST1,R_ST2,R_ST3,R_ST4,R_ST5,R_ST6,R_ST7};
720 static const int mmxreg [] = {R_MM0,R_MM1,R_MM2,R_MM3,R_MM4,R_MM5,R_MM6,R_MM7};
721 static const int reg16 [] = {R_AX,R_CX,R_DX,R_BX,R_SP,R_BP,R_SI,R_DI};
722 static const int reg32 [] = {R_EAX,R_ECX,R_EDX,R_EBX,R_ESP,R_EBP,R_ESI,R_EDI};
723 static const int reg8 [] = {R_AL,R_CL,R_DL,R_BL,R_AH,R_CH,R_DH,R_BH};
724 static const int sreg [] = {R_ES,R_CS,R_SS,R_DS,R_FS,R_GS,R_SEGR6,R_SEGR7};
725 static const int treg [] = {R_TR0,R_TR1,R_TR2,R_TR3,R_TR4,R_TR5,R_TR6,R_TR7};
726 static const int xmmreg [] = {R_XMM0,R_XMM1,R_XMM2,R_XMM3,R_XMM4,R_XMM5,R_XMM6,R_XMM7};
727
728 if (!(REG_AL & ~regflags))
729 return R_AL;
730 if (!(REG_AX & ~regflags))
731 return R_AX;
732 if (!(REG_EAX & ~regflags))
733 return R_EAX;
734 if (!(REG_DL & ~regflags))
735 return R_DL;
736 if (!(REG_DX & ~regflags))
737 return R_DX;
738 if (!(REG_EDX & ~regflags))
739 return R_EDX;
740 if (!(REG_CL & ~regflags))
741 return R_CL;
742 if (!(REG_CX & ~regflags))
743 return R_CX;
744 if (!(REG_ECX & ~regflags))
745 return R_ECX;
746 if (!(FPU0 & ~regflags))
747 return R_ST0;
748 if (!(REG_CS & ~regflags))
749 return (regval == 1) ? R_CS : 0;
750 if (!(REG_DESS & ~regflags))
751 return (regval == 0 || regval == 2 || regval == 3 ? sreg[regval] : 0);
752 if (!(REG_FSGS & ~regflags))
753 return (regval == 4 || regval == 5 ? sreg[regval] : 0);
754 if (!(REG_SEG67 & ~regflags))
755 return (regval == 6 || regval == 7 ? sreg[regval] : 0);
756
757 /* All the entries below look up regval in an 8-entry array */
758 if (regval < 0 || regval > 7)
759 return 0;
760
761 if (!((REGMEM|BITS8) & ~regflags))
762 return reg8[regval];
763 if (!((REGMEM|BITS16) & ~regflags))
764 return reg16[regval];
765 if (!((REGMEM|BITS32) & ~regflags))
766 return reg32[regval];
767 if (!(REG_SREG & ~regflags))
768 return sreg[regval];
769 if (!(REG_CREG & ~regflags))
770 return creg[regval];
771 if (!(REG_DREG & ~regflags))
772 return dreg[regval];
773 if (!(REG_TREG & ~regflags))
774 return treg[regval];
775 if (!(FPUREG & ~regflags))
776 return fpureg[regval];
777 if (!(MMXREG & ~regflags))
778 return mmxreg[regval];
779 if (!(XMMREG & ~regflags))
780 return xmmreg[regval];
781
782 return 0;
783 }
784
785 static const char *whichcond(int condval)
786 {
787 static int conds[] = {
788 C_O, C_NO, C_C, C_NC, C_Z, C_NZ, C_NA, C_A,
789 C_S, C_NS, C_PE, C_PO, C_L, C_NL, C_NG, C_G
790 };
791 return conditions[conds[condval]];
792 }
793
794 /*
795 * Process an effective address (ModRM) specification.
796 */
797 static unsigned char *do_ea (unsigned char *data, int modrm, int asize,
798 int segsize, operand *op)
799 {
800 int mod, rm, scale, index, base;
801
802 mod = (modrm >> 6) & 03;
803 rm = modrm & 07;
804
805 if (mod == 3) { /* pure register version */
806 op->basereg = rm;
807 op->segment |= SEG_RMREG;
808 return data;
809 }
810
811 op->addr_size = 0;
812
813 if (asize == 16) {
814 /*
815 * <mod> specifies the displacement size (none, byte or
816 * word), and <rm> specifies the register combination.
817 * Exception: mod=0,rm=6 does not specify [BP] as one might
818 * expect, but instead specifies [disp16].
819 */
820 op->indexreg = op->basereg = -1;
821 op->scale = 1; /* always, in 16 bits */
822 switch (rm) {
823 case 0: op->basereg = R_BX; op->indexreg = R_SI; break;
824 case 1: op->basereg = R_BX; op->indexreg = R_DI; break;
825 case 2: op->basereg = R_BP; op->indexreg = R_SI; break;
826 case 3: op->basereg = R_BP; op->indexreg = R_DI; break;
827 case 4: op->basereg = R_SI; break;
828 case 5: op->basereg = R_DI; break;
829 case 6: op->basereg = R_BP; break;
830 case 7: op->basereg = R_BX; break;
831 }
832 if (rm == 6 && mod == 0) { /* special case */
833 op->basereg = -1;
834 if (segsize != 16)
835 op->addr_size = 16;
836 mod = 2; /* fake disp16 */
837 }
838 switch (mod) {
839 case 0:
840 op->segment |= SEG_NODISP;
841 break;
842 case 1:
843 op->segment |= SEG_DISP8;
844 op->offset = (signed char) *data++;
845 break;
846 case 2:
847 op->segment |= SEG_DISP16;
848 op->offset = *data++;
849 op->offset |= ((unsigned) *data++) << 8;
850 break;
851 }
852 return data;
853 } else {
854 /*
855 * Once again, <mod> specifies displacement size (this time
856 * none, byte or *dword*), while <rm> specifies the base
857 * register. Again, [EBP] is missing, replaced by a pure
858 * disp32 (this time that's mod=0,rm=*5*). However, rm=4
859 * indicates not a single base register, but instead the
860 * presence of a SIB byte...
861 */
862 op->indexreg = -1;
863 switch (rm) {
864 case 0: op->basereg = R_EAX; break;
865 case 1: op->basereg = R_ECX; break;
866 case 2: op->basereg = R_EDX; break;
867 case 3: op->basereg = R_EBX; break;
868 case 5: op->basereg = R_EBP; break;
869 case 6: op->basereg = R_ESI; break;
870 case 7: op->basereg = R_EDI; break;
871 }
872 if (rm == 5 && mod == 0) {
873 op->basereg = -1;
874 if (segsize != 32)
875 op->addr_size = 32;
876 mod = 2; /* fake disp32 */
877 }
878 if (rm == 4) { /* process SIB */
879 scale = (*data >> 6) & 03;
880 index = (*data >> 3) & 07;
881 base = *data & 07;
882 data++;
883
884 op->scale = 1 << scale;
885 switch (index) {
886 case 0: op->indexreg = R_EAX; break;
887 case 1: op->indexreg = R_ECX; break;
888 case 2: op->indexreg = R_EDX; break;
889 case 3: op->indexreg = R_EBX; break;
890 case 4: op->indexreg = -1; break;
891 case 5: op->indexreg = R_EBP; break;
892 case 6: op->indexreg = R_ESI; break;
893 case 7: op->indexreg = R_EDI; break;
894 }
895
896 switch (base) {
897 case 0: op->basereg = R_EAX; break;
898 case 1: op->basereg = R_ECX; break;
899 case 2: op->basereg = R_EDX; break;
900 case 3: op->basereg = R_EBX; break;
901 case 4: op->basereg = R_ESP; break;
902 case 6: op->basereg = R_ESI; break;
903 case 7: op->basereg = R_EDI; break;
904 case 5:
905 if (mod == 0) {
906 mod = 2;
907 op->basereg = -1;
908 } else
909 op->basereg = R_EBP;
910 break;
911 }
912 }
913 switch (mod) {
914 case 0:
915 op->segment |= SEG_NODISP;
916 break;
917 case 1:
918 op->segment |= SEG_DISP8;
919 op->offset = (signed char) *data++;
920 break;
921 case 2:
922 op->segment |= SEG_DISP32;
923 op->offset = *data++;
924 op->offset |= ((unsigned) *data++) << 8;
925 op->offset |= ((long) *data++) << 16;
926 op->offset |= ((long) *data++) << 24;
927 break;
928 }
929 return data;
930 }
931 }
932
933 /*
934 * Determine whether the instruction template in t corresponds to the data
935 * stream in data. Return the number of bytes matched if so.
936 */
937 static int matches (struct itemplate *t, unsigned char *data, int asize,
938 int osize, int segsize, int rep, insn *ins)
939 {
940 unsigned char * r = (unsigned char *)(t->code);
941 unsigned char * origdata = data;
942 int a_used = FALSE, o_used = FALSE;
943 int drep = 0;
944
945 if ( rep == 0xF2 )
946 drep = P_REPNE;
947 else if ( rep == 0xF3 )
948 drep = P_REP;
949
950 while (*r)
951 {
952 int c = *r++;
953 if (c >= 01 && c <= 03) {
954 while (c--)
955 if (*r++ != *data++)
956 return FALSE;
957 }
958 if (c == 04) {
959 switch (*data++) {
960 case 0x07: ins->oprs[0].basereg = 0; break;
961 case 0x17: ins->oprs[0].basereg = 2; break;
962 case 0x1F: ins->oprs[0].basereg = 3; break;
963 default: return FALSE;
964 }
965 }
966 if (c == 05) {
967 switch (*data++) {
968 case 0xA1: ins->oprs[0].basereg = 4; break;
969 case 0xA9: ins->oprs[0].basereg = 5; break;
970 default: return FALSE;
971 }
972 }
973 if (c == 06) {
974 switch (*data++) {
975 case 0x06: ins->oprs[0].basereg = 0; break;
976 case 0x0E: ins->oprs[0].basereg = 1; break;
977 case 0x16: ins->oprs[0].basereg = 2; break;
978 case 0x1E: ins->oprs[0].basereg = 3; break;
979 default: return FALSE;
980 }
981 }
982 if (c == 07) {
983 switch (*data++) {
984 case 0xA0: ins->oprs[0].basereg = 4; break;
985 case 0xA8: ins->oprs[0].basereg = 5; break;
986 default: return FALSE;
987 }
988 }
989 if (c >= 010 && c <= 012) {
990 int t = *r++, d = *data++;
991 if (d < t || d > t+7)
992 return FALSE;
993 else {
994 ins->oprs[c-010].basereg = d-t;
995 ins->oprs[c-010].segment |= SEG_RMREG;
996 }
997 }
998 if (c == 017)
999 if (*data++)
1000 return FALSE;
1001 if (c >= 014 && c <= 016) {
1002 ins->oprs[c-014].offset = (signed char) *data++;
1003 ins->oprs[c-014].segment |= SEG_SIGNED;
1004 }
1005 if (c >= 020 && c <= 022)
1006 ins->oprs[c-020].offset = *data++;
1007 if (c >= 024 && c <= 026)
1008 ins->oprs[c-024].offset = *data++;
1009 if (c >= 030 && c <= 032) {
1010 ins->oprs[c-030].offset = *data++;
1011 ins->oprs[c-030].offset |= (((unsigned) *data++) << 8);
1012 }
1013 if (c >= 034 && c <= 036) {
1014 ins->oprs[c-034].offset = *data++;
1015 ins->oprs[c-034].offset |= (((unsigned) *data++) << 8);
1016 if (osize == 32) {
1017 ins->oprs[c-034].offset |= (((long) *data++) << 16);
1018 ins->oprs[c-034].offset |= (((long) *data++) << 24);
1019 }
1020 if (segsize != asize)
1021 ins->oprs[c-034].addr_size = asize;
1022 }
1023 if (c >= 040 && c <= 042) {
1024 ins->oprs[c-040].offset = *data++;
1025 ins->oprs[c-040].offset |= (((unsigned) *data++) << 8);
1026 ins->oprs[c-040].offset |= (((long) *data++) << 16);
1027 ins->oprs[c-040].offset |= (((long) *data++) << 24);
1028 }
1029 if (c >= 044 && c <= 046) {
1030 ins->oprs[c-044].offset = *data++;
1031 ins->oprs[c-044].offset |= (((unsigned) *data++) << 8);
1032 if (asize == 32) {
1033 ins->oprs[c-044].offset |= (((long) *data++) << 16);
1034 ins->oprs[c-044].offset |= (((long) *data++) << 24);
1035 }
1036 if (segsize != asize)
1037 ins->oprs[c-044].addr_size = asize;
1038 }
1039 if (c >= 050 && c <= 052) {
1040 ins->oprs[c-050].offset = (signed char) *data++;
1041 ins->oprs[c-050].segment |= SEG_RELATIVE;
1042 }
1043 if (c >= 060 && c <= 062) {
1044 ins->oprs[c-060].offset = *data++;
1045 ins->oprs[c-060].offset |= (((unsigned) *data++) << 8);
1046 ins->oprs[c-060].segment |= SEG_RELATIVE;
1047 ins->oprs[c-060].segment &= ~SEG_32BIT;
1048 }
1049 if (c >= 064 && c <= 066) {
1050 ins->oprs[c-064].offset = *data++;
1051 ins->oprs[c-064].offset |= (((unsigned) *data++) << 8);
1052 if (osize == 32) {
1053 ins->oprs[c-064].offset |= (((long) *data++) << 16);
1054 ins->oprs[c-064].offset |= (((long) *data++) << 24);
1055 ins->oprs[c-064].segment |= SEG_32BIT;
1056 } else
1057 ins->oprs[c-064].segment &= ~SEG_32BIT;
1058 ins->oprs[c-064].segment |= SEG_RELATIVE;
1059 if (segsize != osize) {
1060 ins->oprs[c-064].type =
1061 (ins->oprs[c-064].type & NON_SIZE)
1062 | ((osize == 16) ? BITS16 : BITS32);
1063 }
1064 }
1065 if (c >= 070 && c <= 072) {
1066 ins->oprs[c-070].offset = *data++;
1067 ins->oprs[c-070].offset |= (((unsigned) *data++) << 8);
1068 ins->oprs[c-070].offset |= (((long) *data++) << 16);
1069 ins->oprs[c-070].offset |= (((long) *data++) << 24);
1070 ins->oprs[c-070].segment |= SEG_32BIT | SEG_RELATIVE;
1071 }
1072 if (c >= 0100 && c < 0130) {
1073 int modrm = *data++;
1074 ins->oprs[c & 07].basereg = (modrm >> 3) & 07;
1075 ins->oprs[c & 07].segment |= SEG_RMREG;
1076 data = do_ea (data, modrm, asize, segsize,
1077 &ins->oprs[(c >> 3) & 07]);
1078 }
1079 if (c >= 0130 && c <= 0132) {
1080 ins->oprs[c-0130].offset = *data++;
1081 ins->oprs[c-0130].offset |= (((unsigned) *data++) << 8);
1082 }
1083 if (c >= 0140 && c <= 0142) {
1084 ins->oprs[c-0140].offset = *data++;
1085 ins->oprs[c-0140].offset |= (((unsigned) *data++) << 8);
1086 ins->oprs[c-0140].offset |= (((long) *data++) << 16);
1087 ins->oprs[c-0140].offset |= (((long) *data++) << 24);
1088 }
1089 if (c >= 0200 && c <= 0277) {
1090 int modrm = *data++;
1091 if (((modrm >> 3) & 07) != (c & 07))
1092 return FALSE; /* spare field doesn't match up */
1093 data = do_ea (data, modrm, asize, segsize,
1094 &ins->oprs[(c >> 3) & 07]);
1095 }
1096 if (c >= 0300 && c <= 0302) {
1097 if (asize)
1098 ins->oprs[c-0300].segment |= SEG_32BIT;
1099 else
1100 ins->oprs[c-0300].segment &= ~SEG_32BIT;
1101 a_used = TRUE;
1102 }
1103 if (c == 0310) {
1104 if (asize == 32)
1105 return FALSE;
1106 else
1107 a_used = TRUE;
1108 }
1109 if (c == 0311) {
1110 if (asize == 16)
1111 return FALSE;
1112 else
1113 a_used = TRUE;
1114 }
1115 if (c == 0312) {
1116 if (asize != segsize)
1117 return FALSE;
1118 else
1119 a_used = TRUE;
1120 }
1121 if (c == 0320) {
1122 if (osize == 32)
1123 return FALSE;
1124 else
1125 o_used = TRUE;
1126 }
1127 if (c == 0321) {
1128 if (osize == 16)
1129 return FALSE;
1130 else
1131 o_used = TRUE;
1132 }
1133 if (c == 0322) {
1134 if (osize != segsize)
1135 return FALSE;
1136 else
1137 o_used = TRUE;
1138 }
1139 if (c == 0330) {
1140 int t = *r++, d = *data++;
1141 if (d < t || d > t+15)
1142 return FALSE;
1143 else
1144 ins->condition = d - t;
1145 }
1146 if (c == 0331) {
1147 if ( rep )
1148 return FALSE;
1149 }
1150 if (c == 0332) {
1151 if (drep == P_REP)
1152 drep = P_REPE;
1153 }
1154 if (c == 0333) {
1155 if ( rep != 0xF3 )
1156 return FALSE;
1157 drep = 0;
1158 }
1159 }
1160
1161 /*
1162 * Check for unused rep or a/o prefixes.
1163 */
1164 ins->nprefix = 0;
1165 if (drep)
1166 ins->prefixes[ins->nprefix++] = drep;
1167 if (!a_used && asize != segsize)
1168 ins->prefixes[ins->nprefix++] = (asize == 16 ? P_A16 : P_A32);
1169 if (!o_used && osize != segsize)
1170 ins->prefixes[ins->nprefix++] = (osize == 16 ? P_O16 : P_O32);
1171
1172 return data - origdata;
1173 }
1174
1175 long disasm (unsigned char *data, char *output, int segsize, long offset)
1176 {
1177 struct itemplate **p, **best_p;
1178 int length, best_length = 0;
1179 char *segover;
1180 int rep, lock, asize, osize, i, slen, colon;
1181 unsigned char *origdata;
1182 int works;
1183 insn tmp_ins, ins;
1184 unsigned long goodness, best;
1185
1186 /*
1187 * Scan for prefixes.
1188 */
1189 asize = osize = segsize;
1190 segover = NULL;
1191 ins.condition = ins.nprefix = rep = lock = 0;
1192 origdata = data;
1193 for (;;) {
1194 if (*data == 0xF3 || *data == 0xF2)
1195 rep = *data++;
1196 else if (*data == 0xF0)
1197 lock = *data++;
1198 else if (*data == 0x2E || *data == 0x36 || *data == 0x3E ||
1199 *data == 0x26 || *data == 0x64 || *data == 0x65) {
1200 switch (*data++) {
1201 case 0x2E: segover = "cs"; break;
1202 case 0x36: segover = "ss"; break;
1203 case 0x3E: segover = "ds"; break;
1204 case 0x26: segover = "es"; break;
1205 case 0x64: segover = "fs"; break;
1206 case 0x65: segover = "gs"; break;
1207 }
1208 } else if (*data == 0x66)
1209 osize = 48 - segsize, data++;
1210 else if (*data == 0x67)
1211 asize = 48 - segsize, data++;
1212 else
1213 break;
1214 }
1215
1216 tmp_ins.oprs[0].segment = tmp_ins.oprs[1].segment =
1217 tmp_ins.oprs[2].segment =
1218 tmp_ins.oprs[0].addr_size = tmp_ins.oprs[1].addr_size =
1219 tmp_ins.oprs[2].addr_size = (segsize == 16 ? 0 : SEG_32BIT);
1220 tmp_ins.condition = -1;
1221 best = ~0UL; /* Worst possible */
1222 best_p = NULL;
1223 for (p = itable[*data]; *p; p++) {
1224 if ( (length = matches(*p, data, asize, osize,
1225 segsize, rep, &tmp_ins)) ) {
1226 works = TRUE;
1227 /*
1228 * Final check to make sure the types of r/m match up.
1229 */
1230 for (i = 0; i < (*p)->operands; i++) {
1231 if (
1232 /* If it's a mem-only EA but we have a register, die. */
1233 ((tmp_ins.oprs[i].segment & SEG_RMREG) &&
1234 !(MEMORY & ~(*p)->opd[i])) ||
1235
1236 /* If it's a reg-only EA but we have a memory ref, die. */
1237 (!(tmp_ins.oprs[i].segment & SEG_RMREG) &&
1238 !(REGNORM & ~(*p)->opd[i]) &&
1239 !((*p)->opd[i] & REG_SMASK)) ||
1240
1241 /* Register type mismatch (eg FS vs REG_DESS): die. */
1242 ((((*p)->opd[i] & (REGISTER | FPUREG)) ||
1243 (tmp_ins.oprs[i].segment & SEG_RMREG)) &&
1244 !whichreg ((*p)->opd[i], tmp_ins.oprs[i].basereg))) {
1245 works = FALSE;
1246 break;
1247 }
1248 }
1249
1250 if (works) {
1251 goodness = (*p)->flags & IF_PFMASK;
1252 if ( goodness < best ) {
1253 /* This is the best one found so far */
1254 best = goodness;
1255 best_p = p;
1256 best_length = length;
1257 ins = tmp_ins;
1258 }
1259 }
1260 }
1261 }
1262
1263 if (!best_p)
1264 return 0; /* no instruction was matched */
1265
1266 /* Pick the best match */
1267 p = best_p;
1268 length = best_length;
1269
1270 slen = 0;
1271
1272 if (lock)
1273 slen += sprintf(output+slen, "lock ");
1274 for (i = 0; i < ins.nprefix; i++)
1275 switch (ins.prefixes[i]) {
1276 case P_REP: slen += sprintf(output+slen, "rep "); break;
1277 case P_REPE: slen += sprintf(output+slen, "repe "); break;
1278 case P_REPNE: slen += sprintf(output+slen, "repne "); break;
1279 case P_A16: slen += sprintf(output+slen, "a16 "); break;
1280 case P_A32: slen += sprintf(output+slen, "a32 "); break;
1281 case P_O16: slen += sprintf(output+slen, "o16 "); break;
1282 case P_O32: slen += sprintf(output+slen, "o32 "); break;
1283 }
1284
1285 for (i = 0; i < elements(ico); i++)
1286 if ((*p)->opcode == ico[i]) {
1287 slen += sprintf(output+slen, "%s%s", icn[i],
1288 whichcond(ins.condition));
1289 break;
1290 }
1291 if (i >= elements(ico))
1292 slen += sprintf(output+slen, "%s", insn_names[(*p)->opcode]);
1293 colon = FALSE;
1294 length += data - origdata; /* fix up for prefixes */
1295 for (i=0; i<(*p)->operands; i++) {
1296 output[slen++] = (colon ? ':' : i==0 ? ' ' : ',');
1297
1298 if (ins.oprs[i].segment & SEG_RELATIVE) {
1299 ins.oprs[i].offset += offset + length;
1300 /*
1301 * sort out wraparound
1302 */
1303 if (!(ins.oprs[i].segment & SEG_32BIT))
1304 ins.oprs[i].offset &= 0xFFFF;
1305 }
1306
1307 if ((*p)->opd[i] & COLON)
1308 colon = TRUE;
1309 else
1310 colon = FALSE;
1311
1312 if (((*p)->opd[i] & (REGISTER | FPUREG)) ||
1313 (ins.oprs[i].segment & SEG_RMREG))
1314 {
1315 ins.oprs[i].basereg = whichreg ((*p)->opd[i],
1316 ins.oprs[i].basereg);
1317 if ( (*p)->opd[i] & TO )
1318 slen += sprintf(output+slen, "to ");
1319 slen += sprintf(output+slen, "%s",
1320 reg_names[ins.oprs[i].basereg-EXPR_REG_START]);
1321 } else if (!(UNITY & ~(*p)->opd[i])) {
1322 output[slen++] = '1';
1323 } else if ( (*p)->opd[i] & IMMEDIATE ) {
1324 if ( (*p)->opd[i] & BITS8 ) {
1325 slen += sprintf(output+slen, "byte ");
1326 if (ins.oprs[i].segment & SEG_SIGNED) {
1327 if (ins.oprs[i].offset < 0) {
1328 ins.oprs[i].offset *= -1;
1329 output[slen++] = '-';
1330 } else
1331 output[slen++] = '+';
1332 }
1333 } else if ( (*p)->opd[i] & BITS16 ) {
1334 slen += sprintf(output+slen, "word ");
1335 } else if ( (*p)->opd[i] & BITS32 ) {
1336 slen += sprintf(output+slen, "dword ");
1337 } else if ( (*p)->opd[i] & NEAR ) {
1338 slen += sprintf(output+slen, "near ");
1339 } else if ( (*p)->opd[i] & SHORT ) {
1340 slen += sprintf(output+slen, "short ");
1341 }
1342 slen += sprintf(output+slen, "0x%lx", ins.oprs[i].offset);
1343 } else if ( !(MEM_OFFS & ~(*p)->opd[i]) ) {
1344 slen += sprintf(output+slen, "[%s%s%s0x%lx]",
1345 (segover ? segover : ""),
1346 (segover ? ":" : ""),
1347 (ins.oprs[i].addr_size == 32 ? "dword " :
1348 ins.oprs[i].addr_size == 16 ? "word " : ""),
1349 ins.oprs[i].offset);
1350 segover = NULL;
1351 } else if ( !(REGMEM & ~(*p)->opd[i]) ) {
1352 int started = FALSE;
1353 if ( (*p)->opd[i] & BITS8 )
1354 slen += sprintf(output+slen, "byte ");
1355 if ( (*p)->opd[i] & BITS16 )
1356 slen += sprintf(output+slen, "word ");
1357 if ( (*p)->opd[i] & BITS32 )
1358 slen += sprintf(output+slen, "dword ");
1359 if ( (*p)->opd[i] & BITS64 )
1360 slen += sprintf(output+slen, "qword ");
1361 if ( (*p)->opd[i] & BITS80 )
1362 slen += sprintf(output+slen, "tword ");
1363 if ( (*p)->opd[i] & FAR )
1364 slen += sprintf(output+slen, "far ");
1365 if ( (*p)->opd[i] & NEAR )
1366 slen += sprintf(output+slen, "near ");
1367 output[slen++] = '[';
1368 if (ins.oprs[i].addr_size)
1369 slen += sprintf(output+slen, "%s",
1370 (ins.oprs[i].addr_size == 32 ? "dword " :
1371 ins.oprs[i].addr_size == 16 ? "word " : ""));
1372 if (segover) {
1373 slen += sprintf(output+slen, "%s:", segover);
1374 segover = NULL;
1375 }
1376 if (ins.oprs[i].basereg != -1) {
1377 slen += sprintf(output+slen, "%s",
1378 reg_names[(ins.oprs[i].basereg -
1379 EXPR_REG_START)]);
1380 started = TRUE;
1381 }
1382 if (ins.oprs[i].indexreg != -1) {
1383 if (started)
1384 output[slen++] = '+';
1385 slen += sprintf(output+slen, "%s",
1386 reg_names[(ins.oprs[i].indexreg -
1387 EXPR_REG_START)]);
1388 if (ins.oprs[i].scale > 1)
1389 slen += sprintf(output+slen, "*%d", ins.oprs[i].scale);
1390 started = TRUE;
1391 }
1392 if (ins.oprs[i].segment & SEG_DISP8) {
1393 int sign = '+';
1394 if (ins.oprs[i].offset & 0x80) {
1395 ins.oprs[i].offset = - (signed char) ins.oprs[i].offset;
1396 sign = '-';
1397 }
1398 slen += sprintf(output+slen, "%c0x%lx", sign,
1399 ins.oprs[i].offset);
1400 } else if (ins.oprs[i].segment & SEG_DISP16) {
1401 if (started)
1402 output[slen++] = '+';
1403 slen += sprintf(output+slen, "0x%lx", ins.oprs[i].offset);
1404 } else if (ins.oprs[i].segment & SEG_DISP32) {
1405 if (started)
1406 output[slen++] = '+';
1407 slen += sprintf(output+slen, "0x%lx", ins.oprs[i].offset);
1408 }
1409 output[slen++] = ']';
1410 } else {
1411 slen += sprintf(output+slen, "<operand%d>", i);
1412 }
1413 }
1414 output[slen] = '\0';
1415 if (segover) { /* unused segment override */
1416 char *p = output;
1417 int count = slen+1;
1418 while (count--)
1419 p[count+3] = p[count];
1420 strncpy (output, segover, 2);
1421 output[2] = ' ';
1422 }
1423 return length;
1424 }