CS 367-002 "
Machine-Level Programming (4) Structured Data
Basic
Data
Types
Integral
Stored
&
operated
on
in
general
(integer)
registers
Signed
vs.
unsigned
depends
on
instruc;ons
used
Intel byte word double
word quad
word
ASM
b
w
l
q
Bytes
1
2
4
8
C
[unsigned]
[unsigned]
[unsigned]
[unsigned]
char short
int long int (x86-64)
Floa;ng
Point
Stored
&
operated
on
in
oa;ng
point
registers
Intel Single Double Extended
ASM
s
l
t
Bytes
4
8
10/12/16
C
float
double
long double
2!
Array
Alloca;on
Basic
Principle
T
A[L];
Array
of
data
type
T
and
length
L
Con;guously
allocated
region
of
L
*
sizeof(T)
bytes
char string[12]; x
int val[5]; x
double a[3]; x
char *p[3]; x
x
+
4
x
+
8
x
+
12
x
+
8
x
+
16
x
+
4
x
+
8
x
+
12
x
+
16
x
+
20
x
+
12
x
+
24
3!
Array
Access
Basic
Principle
T
A[L];
Array
of
data
type
T
and
length
L
Iden;er
A
can
be
used
as
a
pointer
to
array
element
0:
Type
T*
int val[5]; x
1
x
+
4
5
x
+
8
2
x
+
12
1
x
+
16
3
x
+
20
Reference
val[4] val val+1 &val[2] val[5] *(val+1) val + i
Type
int int
int
int
int
int
int * * *
Value
3
x
x
+
4
x
+
8
??
5
x
+
4
i
4!
Array
Example
#define ZLEN 5 typedef int zip_dig[ZLEN]; zip_dig cmu = { 1, 5, 2, 1, 3 }; zip_dig mit = { 0, 2, 1, 3, 9 }; zip_dig ucb = { 9, 4, 7, 2, 0 }; zip_dig cmu; 16
zip_dig mit; 36
zip_dig ucb; 56
9
60
0
40
4
64
1
20
2
44
7
68
5
24
1
48
2
72
2
28
3
52
0
76
1
32
9
56
3
36
Declara;on
zip_dig cmu
equivalent
to
int cmu[5]
Example
arrays
were
allocated
in
successive
20
byte
blocks
Not
guaranteed
to
happen
in
general
5!
Array
Accessing
Example
zip_dig cmu; 16
1
20
5
24
2
28
1
32
3
36
int get_digit (zip_dig z, int dig) { return z[dig]; } IA32
# %edx = z # %eax = dig movl (%edx,%eax,4),%eax
# z[dig]
Register
%edx
contains
star;ng
address
of
array
Register
%eax
contains
array
index
Desired
digit
at
4*%eax + %edx
Use
memory
reference
(%edx,%eax,4)
6!
Array
Loop
Example
(IA32)
void zincr(zip_dig z) { int i; for (i = 0; i < ZLEN; i++) z[i]++; }
# edx movl .L4: addl addl cmpl jne
= z $0, %eax
# %eax = i # loop: $1, (%edx,%eax,4) # z[i]++ $1, %eax # i++ $5, %eax # i:5 .L4 # if !=, goto loop
7!
Pointer
Loop
Example
(IA32)
void zincr_p(zip_dig z) { int *zend = z+ZLEN; do { (*z)++; z++; } while (z != zend); } void zincr_v(zip_dig z) { void *vz = z; int i = 0; do { (*((int *) (vz+i)))++; i += ISIZE; } while (i != ISIZE*ZLEN); }
# edx movl .L8: addl addl cmpl jne
= z = vz $0, %eax $1, (%edx,%eax) $4, %eax $20, %eax .L8
# i = 0 # loop: # Increment vz+i # i += 4 # Compare i:20 # if !=, goto loop
8!
Nested
Array
Example
#define PCOUNT 4 zip_dig pgh[PCOUNT] = {{1, 5, 2, 0, 6}, {1, 5, 2, 1, 3 }, {1, 5, 2, 1, 7 }, {1, 5, 2, 2, 1 }};
zip_dig pgh[4];
1 5 2 0 6 1 5 2 1 3 1 5 2 1 7 1 5 2 2 1 76 96 116 136 156
zip_dig pgh[4]
equivalent
to
int pgh[4][5]
Variable
pgh:
array
of
4
elements,
allocated
con;guously
Each
element
is
an
array
of
5
ints,
allocated
con;guously
9!
Row-Major
ordering
of
all
elements
guaranteed
Mul;dimensional
(Nested)
Arrays
Declara;on
T
A[R][C];
2D
array
of
data
type
T
R
rows,
C
columns
Type
T
element
requires
K
bytes
A[0][0] A[R-1][0] A[0][C-1] A[R-1][C-1]
Array
Size
R
*
C
*
K
bytes
Arrangement
Row-Major
Ordering
int A[R][C];
A [0] [0]
A A [0] [1] [C-1] [0]
A [1] [C-1]
A A [R-1] [R-1] [0] [C-1]
10 !
4*R*C
Bytes
Nested
Array
Row
Access
Row
Vectors
A[i]
is
array
of
C
elements
Each
element
of
type
T
requires
K
bytes
Star;ng
address
A +
i
*
(C
*
K)
int A[R][C];
A[0]
A [0] [0] A [0] [C-1] A [i] [0] A[i]
A [i] [C-1] A A[R-1]
A [R-1] [C-1]
[R-1]
[0]
A+i*C*4
A+(R-1)*C*4
11 !
Nested
Array
Row
Access
Code
int *get_pgh_zip(int index) { return pgh[index]; } #define PCOUNT 4 zip_dig pgh[PCOUNT] = {{1, 5, 2, 0, 6}, {1, 5, 2, 1, 3 }, {1, 5, 2, 1, 7 }, {1, 5, 2, 2, 1 }};
# %eax = index leal (%eax,%eax,4),%eax # 5 * index leal pgh(,%eax,4),%eax # pgh + (20 * index)
Row
Vector
pgh[index]
is
array
of
5
ints
Star;ng
address
pgh+20*index Computes
and
returns
address
Compute
as
pgh + 4*(index+4*index)
12 !
IA32
Code
Nested
Array
Row
Access
Array
Elements
A[i][j]
is
element
of
type
T,
which
requires
K
bytes Address
A + i
*
(C
*
K)
+
j
*
K
=
A
+
(i
*
C
+
j)*
K
int A[R][C];
A[0]
A [0] [0] A [0] [C-1] A[i]
A
[i]
[j] A A[R-1]
A [R-1] [C-1]
[R-1]
[0]
A+i*C*4 A+i*C*4+j*4
A+(R-1)*C*4
13 !
Nested
Array
Element
Access
Code
int get_pgh_digit (int index, int dig) { return pgh[index][dig]; } movl leal addl movl 8(%ebp), %eax (%eax,%eax,4), %eax 12(%ebp), %eax pgh(,%eax,4), %eax # # # # index 5*index 5*index+dig offset 4*(5*index+dig)
Array
Elements
pgh[index][dig]
is
int Address:
pgh + 20*index + 4*dig
= pgh + 4*(5*index + dig)
IA32
Code
Computes
address
pgh + 4*((index+4*index)+dig)
14 !
Referencing Examples"
zip_dig pgh[4]; 1 5 2 0 6 1 5 2 1 3 1 5 2 1 7 1 5 2 2 1 76 96 116 136 156
"Reference "Address
"Value "Guaranteed?"
pgh[3][3] 76+20*3+4*3 = 148 2 pgh[2][5] 76+20*2+4*5 = 136 1 pgh[2][-1] 76+20*2+4*-1 = 112 3 pgh[4][-1] 76+20*4+4*-1 = 152 1 pgh[0][19] 76+20*0+4*19 = 152 1 " pgh[0][-1] 76+20*0+4*-1 = 72 ?? " Code does not do any bounds checking" Ordering of elements within array guaranteed"
15 !
Mul;-Level
Array
Example
zip_dig cmu = { 1, 5, 2, 1, 3 }; zip_dig mit = { 0, 2, 1, 3, 9 }; zip_dig ucb = { 9, 4, 7, 2, 0 }; #define UCOUNT 3 int *univ[UCOUNT] = {mit, cmu, ucb};
Variable
univ
denotes
array
of
3
elements
Each
element
is
a
pointer
4
bytes
Each
pointer
points
to
array
of
ints
2
24
2
1
44
4
7
64
68
48
2
72
28
3
52
0
76
16 !
cmu univ 160 164 168 36 16 56 mit 16
1
20
0
40
9
56
60
1
32
3
36
9
56
ucb 36
Element
Access
in
Mul;-Level
Array
int get_univ_digit (int index, int dig) { return univ[index][dig]; } movl movl movl movl 8(%ebp), %eax univ(,%eax,4), %edx 12(%ebp), %eax (%edx,%eax,4), %eax # # # # index p = univ[index] dig p[dig]
Computa;on
(IA32)
Element
access
Mem[Mem[univ+4*index]+4*dig] Must
do
two
memory
reads
First
get
pointer
to
row
array
Then
access
element
within
array
17 !
Array
Element
Accesses
Nested
array
int get_pgh_digit (int index, int dig) { return pgh[index][dig]; } Mul;-level
array
int get_univ_digit (int index, int dig) { return univ[index][dig]; }
Accesses
looks
similar
in
C,
but
addresses
very
dierent:
Mem[pgh+20*index+4*dig]
Mem[Mem[univ+4*index]+4*dig]"
18 !
N
X
N
Matrix
Code
#define N 16fix_matrix[N][N]; typedef int
Fixed
dimensions
Know
value
of
N
at
compile
;me
/* Get element a[i][j] */ int fix_ele (fix_matrix a, int i, int j) { return a[i][j]; } #define IDX(n, i, j) ((i)*(n)+(j)) /* Get element a[i][j] */ int vec_ele (int n, int *a, int i, int j) { return a[IDX(n,i,j)]; } /* Get element a[i][j] */ int var_ele (int n, int a[n][n], int i, int j) { return a[i][j]; }
19 !
Variable
dimensions,
explicit
indexing
Tradi;onal
way
to
implement
dynamic
arrays
Variable
dimensions,
implicit
indexing
Supported
by
ISO
C99
16
X
16
Matrix
Access
Array
Elements
Address
A + i
*
(C
*
K)
+
j
*
K
C
=
16,
K
=
4
/* Get element a[i][j] */ int fix_ele(fix_matrix a, int i, int j) { return a[i][j]; } movl sall movl sall addl movl 12(%ebp), %edx $6, %edx 16(%ebp), %eax $2, %eax 8(%ebp), %eax (%eax,%edx), %eax # # # # # # i i*64 j j*4 a + j*4 *(a + j*4 + i*64)
20 !
n
X
n
Matrix
Access
Array
Elements
Address
A + i
*
(C
*
K)
+
j
*
K
C
=
n,
K
=
4
/* Get element a[i][j] */ int var_ele(int n, int a[n][n], int i, int j) { return a[i][j]; } movl sall movl imull movl sall addl movl 8(%ebp), %eax $2, %eax %eax, %edx 16(%ebp), %edx 20(%ebp), %eax $2, %eax 12(%ebp), %eax (%eax,%edx), %eax # # # # # # # # n n*4 n*4 i*n*4 j j*4 a + j*4 *(a + j*4 + i*n*4)
21 !
Op;mizing
Fixed
Array
Access
a
j-th
column
#define N 16 typedef int fix_matrix[N][N]; /* Retrieve column j from array */ void fix_column (fix_matrix a, int j, int *dest) Step
through
all
elements
in
{ column
j
int i; for (i = 0; i < N; i++) Op;miza;on
dest[i] = a[i][j]; }
Computa;on
Retrieving
successive
elements
from
single
column
22 !
Op;mizing
Fixed
Array
Access
Op;miza;on
Compute
ajp
=
&a[i][j]
Ini;ally
=
a
+
4*j
Increment
by
4*N
Register
%ecx %ebx %edx .L8: movl movl addl addl cmpl jne
Value
ajp dest i
/* Retrieve column j from array */ void fix_column (fix_matrix a, int j, int *dest) { int i; for (i = 0; i < N; i++) dest[i] = a[i][j]; }
(%ecx), %eax %eax, (%ebx,%edx,4) $1, %edx $64, %ecx $16, %edx .L8
# loop: # Read *ajp # Save in dest[i] # i++ # ajp += 4*N # i:N # if !=, goto loop
23 !
Structure
Alloca;on
struct rec { int a[3]; int i; struct rec *n; }; Memory
Layout
a 0
i n 12 16 20
Concept
Con;guously-allocated
region
of
memory
Refer
to
members
within
structure
by
names
Members
may
be
of
dierent
types
24 !
Structure
Access
struct rec { int a[3]; int i; struct rec *n; };
r+12
a 0
n 12 16 20
Accessing
Structure
Member
Pointer
indicates
rst
byte
of
structure
Access
elements
with
osets
IA32
Assembly
# # # # %edx = val %eax = r movl %edx, 12(%eax) Mem[r+12] = val
void set_i(struct rec *r, int val) { r->i = val; }
25 !
Genera;ng
Pointer
to
rStructure
Member
r+idx*4
struct rec { int a[3]; int i; struct rec *n; };
a 0
n 12 16 20
Genera;ng
Pointer
to
Array
Element
Oset
of
each
structure
member
determined
at
compile
;me
Arguments
Mem[%ebp+8]:
r Mem[%ebp+12]:
idx
int *get_ap (struct rec *r, int idx) { return &r->a[idx]; } movl sall addl 12(%ebp), %eax $2, %eax 8(%ebp), %eax # Get idx # idx*4 # r+idx*4
26 !
Following
Linked
List
C
Code
void set_val (struct rec *r, int val) { while (r) { int i = r->i; r->a[i] = val; r = r->n; } }
struct rec { int a[3]; int i; struct rec *n; };
a 0
Element
i
Register
%edx %ecx # # # # # #
i n 12 16 20
Value
r val loop: r->i r->a[i] = val r = r->n Test r If != 0 goto loop
27 !
.L17: movl movl movl testl jne
12(%edx), %eax %ecx, (%edx,%eax,4) 16(%edx), %edx %edx, %edx .L17
Structures & Alignment"
Unaligned Data"
c i[0]
p+5
i[1]
p+9
v
p+17
p p+1
struct S1 { char c; int i[2]; double v; } *p;
Aligned Data"
Primitive data type requires K bytes" Address must be multiple of K"
3
bytes
c
p+0
i[0]
p+8
i[1]
4
bytes
v
p+16 p+24
p+4
Mul;ple
of
4
Mul;ple
of
8
Mul;ple
of
8
Mul;ple
of
8
28 !
Alignment Principles"
Aligned Data"
Primitive data type requires K bytes" Address must be multiple of K" Required on some machines; advised on IA32"
treated differently by IA32 Linux, x86-64 Linux, and Windows!"
Motivation for Aligning Data"
Memory accessed by (aligned) chunks of 4 or 8 bytes (system dependent)"
Inefcient to load or store datum that spans quad word boundaries " Virtual memory very tricky when datum spans 2 pages (more on
this later)"
Compiler"
Inserts gaps in structure to ensure correct alignment of elds"
29 !
Specic Cases of Alignment (IA32)"
1 byte: char, "
no restrictions on address" lowest 1 bit of address must be 02" lowest 2 bits of address must be 002" Windows (and most other OSs & instruction sets):"
lowest 3 bits of address must be 0002"
2 bytes: short, "
4 bytes: int, float, char *, "
8 bytes: double, "
Linux:"
lowest 2 bits of address must be 002" i.e., treated the same as a 4-byte primitive data type"
12 bytes: long double"
Windows, Linux:"
lowest 2 bits of address must be 002" i.e., treated the same as a 4-byte primitive data type"
30 !
Satisfying Alignment with Structures"
Within structure:"
Must satisfy each elements alignment requirement" Each structure has alignment requirement K"
K = Largest alignment of any element"
Overall structure placement"
Initial address & structure length must be multiples of K"
31 !
Different Alignment Conventions"
x86-64 or IA32 Windows:"
K = 8, due to double element"
struct S1 { char c; int i[2]; double v; } *p;
4
bytes
c p+0
3
bytes
i[0] p+8
i[1]
v p+16 p+24
p+4
IA32 Linux"
K = 4; double treated like a 4-byte data type"
3
bytes
c p+0
i[0] p+8
i[1] p+12
v p+20
32 !
p+4
Meeting Overall Alignment Requirement"
For largest alignment requirement K" Overall structure must be multiple of K"
struct S2 { double v; int i[2]; char c; } *p;
v p+0 p+8
i[0]
i[1]
c p+16
7
bytes
p+24
33 !
Arrays of Structures"
Overall structure length multiple of K" Satisfy alignment requirement for every element"
a[0] a+0! a+24! a[1] a+48! a[2] a+72! struct S2 { double v; int i[2]; char c; } a[10];
v a+24 a+32
i[0]
i[1]
c a+40
7
bytes
a+48
34 !
Saving Space"
Put large data types rst"
struct S4 { char c; int i; char d; } *p; struct S5 { int i; char c; char d; } *p;
Effect (K=4)"
c
3
bytes
i c d
2
bytes
3
bytes
35 !
Union Allocation"
Allocate according to largest element" Can only use one eld at a time"
union U1 { char c; int i[2]; double v; } *up; struct S1 { char c; int i[2]; double v; } *sp; c i[0] v up+0 up+4 up+8 i[1]
c sp+0
3
bytes
i[0] sp+8
i[1]
4
bytes
v sp+24
36 !
sp+4
sp+16
Using Union to Access Bit Patterns"
typedef union { float f; unsigned u; } bit_float_t; u f 0 4
float bit2float(unsigned u) { bit_float_t arg; arg.u = u; return arg.f; }
unsigned float2bit(float f) { bit_float_t arg; arg.f = f; return arg.u; }
What
does
this
code
do?
Not
same
as
(float)u!
What
does
this
code
do?
Not
same
as
(unsigned)f
!
37 !
Byte
Ordering
Revisited
Idea"
Short/long/quad words stored in memory as 2/4/8 consecutive bytes" Which is most (least) signicant?" Can cause problems when exchanging binary data between machines"
Big Endian"
Most signicant byte has lowest address" Sparc"
Little Endian"
Least signicant byte has lowest address" Intel x86"
38 !
Byte
Ordering
Example
union { unsigned unsigned unsigned unsigned } dw; char c[8]; short s[4]; int i[2]; long l[1];
32-bit
c[0] c[1] c[2] c[3] c[4] c[5] c[6] c[7] s[0] i[0] l[0] s[1] s[2] i[1] s[3]
39 !
Byte
Ordering
Example
(Cont).
int j; for (j = 0; j < 8; j++) dw.c[j] = 0xf0 + j; printf("Characters 0-7 == [0x%x,0x%x,0x%x,0x%x, 0x%x,0x%x,0x%x,0x%x]\n", dw.c[0], dw.c[1], dw.c[2], dw.c[3], dw.c[4], dw.c[5], dw.c[6], dw.c[7]); printf("Shorts 0-3 == [0x%x,0x%x,0x%x,0x%x]\n", dw.s[0], dw.s[1], dw.s[2], dw.s[3]); printf("Ints 0-1 == [0x%x,0x%x]\n", dw.i[0], dw.i[1]); printf("Long 0 == [0x%lx]\n", dw.l[0]);
40 !
Byte
Ordering
on
IA32
Liqle
Endian
f0 f1 f2 f3 f4 f5 f6 f7
c[0] c[1] c[2] c[3] c[4] c[5] c[6] c[7] s[0] i[0] l[0]
LSB
Print
MSB
LSB
MSB
s[1]
s[2] i[1]
s[3]
Output:
Characters Shorts Ints Long 0-7 0-3 0-1 0 == == == == [0xf0,0xf1,0xf2,0xf3,0xf4,0xf5,0xf6,0xf7] [0xf1f0,0xf3f2,0xf5f4,0xf7f6] [0xf3f2f1f0,0xf7f6f5f4] [0xf3f2f1f0]
41 !
Byte
Ordering
on
Sun
Big
Endian
f0 f1 f2 f3 f4 f5 f6 f7
c[0] c[1] c[2] c[3] c[4] c[5] c[6] c[7] s[0] i[0] l[0]
MSB
Print
LSB
MSB
LSB
s[1]
s[2] i[1]
s[3]
Output
on
Sun:
Characters Shorts Ints Long 0-7 0-3 0-1 0 == == == == [0xf0,0xf1,0xf2,0xf3,0xf4,0xf5,0xf6,0xf7] [0xf0f1,0xf2f3,0xf4f5,0xf6f7] [0xf0f1f2f3,0xf4f5f6f7] [0xf0f1f2f3]
42 !