C 语言学习笔记 11 - 聚合数据类型


C 语言虽然没有像 Java 这类面向对象语言一样有类的思想,但仍然提供了一种方式供我们聚合多个数值:这就是结构体。


struct <结构体名称> {
    <类型> <名称>;
} <结构体变量名>;


// 普通结构体
struct Person {
    char *name;
    int age;
    char *job;

struct Person person;

// 匿名结构体
struct {
    char *name;
    int age;   
    char *job; 
} am_person;


struct Person person = { "noraincity", 24, "student" }


// 不声明年龄
struct Person person = { .name="noraincity", .job="student" }

其中的 . 被称为成员选择运算符,用于选择结构体中的成员。


struct Person *ptr = &person;

char *name = ptr->name;

此处的 -> 也是一种成员选择运算符,在对结构体的指针时使用。

不过每次定义都要多写一个 struct 略显繁琐,我们可以使用上一节的 typedef 关键字减少重复的 struct,声明起来就像 Java 里一样。

typedef struct Person Person;

Person noStructPerson = { "Person", 18, "unknown" };



一个结构体的所占内存大小并不完全由结构体中的数据类型决定,假如一个结构体中有 intlong 两个数据,它的内存大小也不会是 4 + 8。

对以下 Test 变量进行分析:

typedef struct {
    char a;
    char b;
    int c;
    short d;
    double e;
} Test;

Test test = {'A', 'B', 4, 2, 8.00};

使用 CLion 的 Memory View 我们可以发现,其中有部分位置是被 cc 填充的 (MinGW 下是 00),累计占用了 24 字节,与想象中占用 16 字节好像不同。


由于现代计算机 CPU 的特性,为了能够更快的访问结构体,有必要按一定大小来划分存放结构体的内存空间。因此需要按一定大小进行内存对齐。

下面引用 Stack Overflow 上 Structure padding and packing 回答中的代码,体现了内存对齐带来的不同之处:

#include <stdio.h>

// size is 8, 4 + 1, then round to multiple of 4 (int's size),
struct stu_a {
    int i;
    char c;

// size is 16, 8 + 1, then round to multiple of 8 (long's size),
struct stu_b {
    long l;
    char c;

// size is 24, l need padding by 4 before it, then round to multiple of 8 (long's size),
struct stu_c {
    int i;
    long l;
    char c;

// size is 16, 8 + 4 + 1, then round to multiple of 8 (long's size),
struct stu_d {
    long l;
    int i;
    char c;

// size is 16, 8 + 4 + 1, then round to multiple of 8 (double's size),
struct stu_e {
    double d;
    int i;
    char c;

// size is 24, d need align to 8, then round to multiple of 8 (double's size),
struct stu_f {
    int i;
    double d;
    char c;

// size is 4,
struct stu_g {
    int i;

// size is 8,
struct stu_h {
    long l;

// test - padding within a single struct,
int test_struct_padding() {
    printf("%s: %ld\n", "stu_a", sizeof(struct stu_a));
    printf("%s: %ld\n", "stu_b", sizeof(struct stu_b));
    printf("%s: %ld\n", "stu_c", sizeof(struct stu_c));
    printf("%s: %ld\n", "stu_d", sizeof(struct stu_d));
    printf("%s: %ld\n", "stu_e", sizeof(struct stu_e));
    printf("%s: %ld\n", "stu_f", sizeof(struct stu_f));

    printf("%s: %ld\n", "stu_g", sizeof(struct stu_g));
    printf("%s: %ld\n", "stu_h", sizeof(struct stu_h));

    return 0;

// test - address of struct,
int test_struct_address() {
    printf("%s: %ld\n", "stu_g", sizeof(struct stu_g));
    printf("%s: %ld\n", "stu_h", sizeof(struct stu_h));
    printf("%s: %ld\n", "stu_f", sizeof(struct stu_f));

    struct stu_g g;
    struct stu_h h;
    struct stu_f f1;
    struct stu_f f2;
    int x = 1;
    long y = 1;

    printf("address of %s: %p\n", "g", &g);
    printf("address of %s: %p\n", "h", &h);
    printf("address of %s: %p\n", "f1", &f1);
    printf("address of %s: %p\n", "f2", &f2);
    printf("address of %s: %p\n", "x", &x);
    printf("address of %s: %p\n", "y", &y);

    // g is only 4 bytes itself, but distance to next struct is 16 bytes(on 64 bit system) or 8 bytes(on 32 bit system),
    printf("space between %s and %s: %ld\n", "g", "h", (long)(&h) - (long)(&g));

    // h is only 8 bytes itself, but distance to next struct is 16 bytes(on 64 bit system) or 8 bytes(on 32 bit system),
    printf("space between %s and %s: %ld\n", "h", "f1", (long)(&f1) - (long)(&h));

    // f1 is only 24 bytes itself, but distance to next struct is 32 bytes(on 64 bit system) or 24 bytes(on 32 bit system),
    printf("space between %s and %s: %ld\n", "f1", "f2", (long)(&f2) - (long)(&f1));

    // x is not a struct, and it reuse those empty space between struts, which exists due to padding, e.g between g & h,
    printf("space between %s and %s: %ld\n", "x", "f2", (long)(&x) - (long)(&f2));
    printf("space between %s and %s: %ld\n", "g", "x", (long)(&x) - (long)(&g));

    // y is not a struct, and it reuse those empty space between struts, which exists due to padding, e.g between h & f1,
    printf("space between %s and %s: %ld\n", "x", "y", (long)(&y) - (long)(&x));
    printf("space between %s and %s: %ld\n", "h", "y", (long)(&y) - (long)(&h));

    return 0;

int main(int argc, char * argv[]) {

    return 0;

不过,因为 C 是一个很自由的语言,当然也有办法去规定它该怎么对齐。 使用预编译命令 #pragma pack(n) (n 为 2 的 x 次方) 即可限制它的对齐系数。

GCC 提供了一个参数用于修改结构体中单个变量的对齐规则:__attribute((aligned(n))),在 C11 中提供了 __Alignas(n),但有最小值 (不得小于当前变量类型大小) 要求。

C11 还提供了 _AlignOf(x) (x 为结构体中的变量) 获取结构体中对应变量偏移位置。


总结: 1. 结构体所占内存大小并非元素本身大小之和。 2. 通常情况下,结构体内存的大小按最大元素大小对齐。 3. 在每个平台、每个编译器下最终进行内存对齐的方案都不同


关键字为 union,与结构体语法定义相似,只是关键字不同。 但与结构体不同的是,联合体中所有变量共享一块内存。因此下面的代码是不被推荐的:

typedef union Test {
    int i;
    double d;
} Test;

Test test = {.i=1, .d=8.0} // 由于共享了内存,前面的 i 就被后面的 d 覆盖了。
comments powered by Disqus