관리 메뉴

Wiredwisdom

C compile process 본문

Computer Science/Basic

C compile process

Duke_Wisdom 2025. 7. 27. 19:21

 

Example Files.


math_utils.h

#ifndef MATH_UTILS_H
#define MATH_UTILS_H

int add(int a, int b);
int multiply(int a, int b);
int subtract(int a, int b);

#endif

add.c

#include "math_utils.h"

int add(int a, int b) {
    return a + b;
}

multiply.c

#include "math_utils.h"

int multiply(int a, int b) {
    return a * b;
}

subtract.c

#include "math_utils.h"

int subtract(int a, int b) {
    return a - b;
}

main.c

#include <stdio.h>
#include "math_utils.h"

int main() {
    int x = 10, y = 5;
    
    printf("Add: %d\n", add(x, y));
    printf("Multiply: %d\n", multiply(x, y));
    
    return 0;
}

 

 

1. Compile each source file to object file


 

Input command

 

$ gcc -c add.c -o add.o
$ gcc -c multiply.c -o multiply.o  
$ gcc -c subtract.c -o subtract.o
$ gcc -c main.c -o main.o

 

Output file

 

add.o

multiply.o

subtract.o

main.o

$ ls

---------------------------------------------------------------------------------
add.c  add.o  main.c  math_utils.h  multiply.c  subtract.c

 

 

2. Use 'nm'


nm : name(symbols) list 

$ nm add.o

-----------------------------------------------------------------------------
0000000000000000 T add

 

T : Text section

It means the function is defined/implemented in this object file.

U : Undefined


So, add.o have function 'add' 

 

 

 


$ nm main.o


--------------------------------------------------------------------------
                 U add
0000000000000000 T main
                 U multiply
                 U printf

 

 

add multiply printf is not defined in this object file.

 

 

 


 

Final Executable Layout : In the final executable, the memory layout might look like

objdump -d main.o
0x401000: _start (real entry point)
0x401020: some initialization code
0x401040: main 
0x401060: other functions

 

 

3. Use 'objdump'


objdump : object dump     * Can use .o .bin

-d : [disasssemble] disassembled executable sections, showing assembly code for functions.

-D :  [Disassemble] disassembles all sections including data sections.

-S : [Source] option intermixes source code with disassembly when compiled with debug information( -g flag)

-h : [headers] displays section headers showing memory layout.

-x : [all-headers] shows all headers including file header and program headers.

-s : [full-contents] displays the full contents of all sections

-j : display contents of a specific section only.

-t : displays the symbol table(like nm)

-T : shows dynamic symbol table for shared libraries

-R : displays dynamic relocation entries

-r : shows static relocation entries

-m <machine> : specifies target archittecture (useful for cross-platform analysis)

-b <format> : specifies the binary format

--prefix-addresses : shows full addresses instead of relative offsets

 

$ objdump -r main.o

main.o:     file format elf64-x86-64

RELOCATION RECORDS FOR [.text]:
OFFSET           TYPE              VALUE
0000000000000021 R_X86_64_PLT32    add-0x0000000000000004
0000000000000028 R_X86_64_32       .rodata
0000000000000032 R_X86_64_PLT32    printf-0x0000000000000004
0000000000000041 R_X86_64_PLT32    multiply-0x0000000000000004
0000000000000048 R_X86_64_32       .rodata+0x0000000000000009
0000000000000052 R_X86_64_PLT32    printf-0x0000000000000004


RELOCATION RECORDS FOR [.eh_frame]:
OFFSET           TYPE              VALUE
0000000000000020 R_X86_64_PC32     .text

 

printf("Add: %d\n", add(x, y));

 

[ add function ]   0000000000000021 R_X86_64_PLT32    add-0x0000000000000004
[ string data ]      0000000000000028 R_X86_64_32       .rodata
[ printf function ] 0000000000000032 R_X86_64_PLT32    printf-0x0000000000000004

 

R_X86_64_PLT32

This is for function calls using PLT (Procedure Linkage Table).

It's a 32-bit relative address calculation used for calling functions.

 

R_X86_64_32

This is for 32-bit absolute addresses,

typically used for data references like strings in the .rodata section.

 

-0x4

The -0x4 offset you see is because x86-64 uses PC-relative addressing.

The CPU adds the offset to the next instruction's address,

but since the address field in the instruction points to the current instruction + 4, we subtract 4 to compensate.

 

 

$ objdump -d main.o

main.o:     file format elf64-x86-64


Disassembly of section .text:

0000000000000000 <main>:
   0:	55                   	push   %rbp
   1:	48 89 e5             	mov    %rsp,%rbp
   4:	48 83 ec 10          	sub    $0x10,%rsp
   8:	c7 45 fc 0a 00 00 00 	movl   $0xa,-0x4(%rbp)
   f:	c7 45 f8 05 00 00 00 	movl   $0x5,-0x8(%rbp)
  16:	8b 55 f8             	mov    -0x8(%rbp),%edx
  19:	8b 45 fc             	mov    -0x4(%rbp),%eax
  1c:	89 d6                	mov    %edx,%esi
  1e:	89 c7                	mov    %eax,%edi
  20:	e8 00 00 00 00       	call   25 <main+0x25>
  25:	89 c6                	mov    %eax,%esi
  27:	bf 00 00 00 00       	mov    $0x0,%edi
  2c:	b8 00 00 00 00       	mov    $0x0,%eax
  31:	e8 00 00 00 00       	call   36 <main+0x36>
  36:	8b 55 f8             	mov    -0x8(%rbp),%edx
  39:	8b 45 fc             	mov    -0x4(%rbp),%eax
  3c:	89 d6                	mov    %edx,%esi
  3e:	89 c7                	mov    %eax,%edi
  40:	e8 00 00 00 00       	call   45 <main+0x45>
  45:	89 c6                	mov    %eax,%esi
  47:	bf 00 00 00 00       	mov    $0x0,%edi
  4c:	b8 00 00 00 00       	mov    $0x0,%eax
  51:	e8 00 00 00 00       	call   56 <main+0x56>
  56:	b8 00 00 00 00       	mov    $0x0,%eax
  5b:	c9                   	leave
  5c:	c3                   	ret

 

 

4. Make archive file.


ar rcs libmath.a add.o multiply.o subtract.o

 

libmath.a

 

$ ar -t libmath.a
add.o
multiply.o
subtract.o

 

# View symbols in the library
$ nm libmath.a

add.o:
0000000000000000 T add

multiply.o:
0000000000000000 T multiply

subtract.o:
0000000000000000 T subtract

 

5. Link archive file to binary file.


# Link main.o with static library

gcc main.o -L. -lmath -o myprogram

 

# See which symbols are included in final executable
$ nm myprogram | grep -E "(add|multiply|subtract|main)"
0000000000001149 T add         # T = text section, actual address assigned
0000000000001129 T main        # main function address
0000000000001156 T multiply    # multiply function address

 

# Disassemble final executable
$ objdump -d myprogram

...
0000000000001129 <main>:
    1129:       55                      push   %rbp
    112a:       48 89 e5                mov    %rsp,%rbp
    ...
    1141:       e8 03 00 00 00          call   1149 <add>        # Real address: 1149
    1146:       89 c6                   mov    %eax,%esi
    ...
    115b:       e8 f6 ff ff ff          call   1156 <multiply>   # Real address: 1156
    1160:       89 c6                   mov    %eax,%esi

...
0000000000001149 <add>:        # add() function placed here
    1149:       55                      push   %rbp
    114a:       48 89 e5                mov    %rsp,%rbp
    114d:       89 7d fc                mov    %edi,-0x4(%rbp)
    1150:       89 75 f8                mov    %esi,-0x8(%rbp)
    1153:       8b 55 fc                mov    -0x4(%rbp),%edx
    1156:       8b 45 f8                mov    -0x8(%rbp),%eax
    1159:       01 d0                   add    %edx,%eax
    115b:       5d                      pop    %rbp
    115c:       c3                      ret    

0000000000001156 <multiply>:   # multiply() function placed here  
    1156:       55                      push   %rbp
    1157:       48 89 e5                mov    %rsp,%rbp
    ...

 

$ objdump -h myprogram

Sections:
Idx Name          Size      VMA               LMA               
  0 .text         00000201  0000000000001000  0000000000001000
  1 .rodata       00000018  0000000000002000  0000000000002000  
  2 .data         00000010  0000000000003000  0000000000003000

 

$ objdump -t myprogram | head -20

SYMBOL TABLE:
0000000000001000  .text  00000000 .text
0000000000001129  .text  00000000 main      # main at 0x1129
0000000000001149  .text  00000000 add       # add at 0x1149  
0000000000001156  .text  00000000 multiply  # multiply at 0x1156​
# Create and analyze the complete example:
gcc -c *.c
ar rcs libmath.a add.o multiply.o subtract.o
gcc main.o -L. -lmath -o myprogram

# Analysis commands:
nm main.o              # Symbols before linking
objdump -r main.o      # Relocations needed  
objdump -d main.o      # Assembly before linking
nm myprogram          # Symbols after linking
objdump -d myprogram  # Assembly after linking

 

6. Optimization


 

# No optimization (slow)
gcc -O0 main.c -o program_slow

# Basic optimization  
gcc -O1 main.c -o program_basic

# Standard optimization
gcc -O2 main.c -o program_standard

# Maximum optimization (fastest)
gcc -O3 main.c -o program_fast

# Size optimization (balanced)
gcc -Os main.c -o program_small