054b1432455d045bfc466343fb71909faadceed3
[openwrt/staging/ynezz.git] / tools / qemu / patches / 0002-VMDK-introduce-VmdkExtent.patch
1 From fcd9c52d160376184cbd25e04586aa6eef6abd61 Mon Sep 17 00:00:00 2001
2 From: Fam Zheng <famcool@gmail.com>
3 Date: Tue, 12 Jul 2011 19:56:28 +0800
4 Subject: [PATCH 02/12] VMDK: introduce VmdkExtent
5
6 Introduced VmdkExtent array into BDRVVmdkState, enable holding multiple
7 image extents for multiple file image support.
8
9 Signed-off-by: Fam Zheng <famcool@gmail.com>
10 Reviewed-by: Stefan Hajnoczi <stefanha@linux.vnet.ibm.com>
11 Signed-off-by: Kevin Wolf <kwolf@redhat.com>
12 ---
13 block/vmdk.c | 348 +++++++++++++++++++++++++++++++++++++++++------------------
14 1 file changed, 246 insertions(+), 102 deletions(-)
15
16 --- a/block/vmdk.c
17 +++ b/block/vmdk.c
18 @@ -60,7 +60,11 @@ typedef struct {
19
20 #define L2_CACHE_SIZE 16
21
22 -typedef struct BDRVVmdkState {
23 +typedef struct VmdkExtent {
24 + BlockDriverState *file;
25 + bool flat;
26 + int64_t sectors;
27 + int64_t end_sector;
28 int64_t l1_table_offset;
29 int64_t l1_backup_table_offset;
30 uint32_t *l1_table;
31 @@ -74,7 +78,13 @@ typedef struct BDRVVmdkState {
32 uint32_t l2_cache_counts[L2_CACHE_SIZE];
33
34 unsigned int cluster_sectors;
35 +} VmdkExtent;
36 +
37 +typedef struct BDRVVmdkState {
38 uint32_t parent_cid;
39 + int num_extents;
40 + /* Extent array with num_extents entries, ascend ordered by address */
41 + VmdkExtent *extents;
42 } BDRVVmdkState;
43
44 typedef struct VmdkMetaData {
45 @@ -105,6 +115,19 @@ static int vmdk_probe(const uint8_t *buf
46 #define DESC_SIZE 20*SECTOR_SIZE // 20 sectors of 512 bytes each
47 #define HEADER_SIZE 512 // first sector of 512 bytes
48
49 +static void vmdk_free_extents(BlockDriverState *bs)
50 +{
51 + int i;
52 + BDRVVmdkState *s = bs->opaque;
53 +
54 + for (i = 0; i < s->num_extents; i++) {
55 + qemu_free(s->extents[i].l1_table);
56 + qemu_free(s->extents[i].l2_cache);
57 + qemu_free(s->extents[i].l1_backup_table);
58 + }
59 + qemu_free(s->extents);
60 +}
61 +
62 static uint32_t vmdk_read_cid(BlockDriverState *bs, int parent)
63 {
64 char desc[DESC_SIZE];
65 @@ -358,11 +381,50 @@ static int vmdk_parent_open(BlockDriverS
66 return 0;
67 }
68
69 +/* Create and append extent to the extent array. Return the added VmdkExtent
70 + * address. return NULL if allocation failed. */
71 +static VmdkExtent *vmdk_add_extent(BlockDriverState *bs,
72 + BlockDriverState *file, bool flat, int64_t sectors,
73 + int64_t l1_offset, int64_t l1_backup_offset,
74 + uint32_t l1_size,
75 + int l2_size, unsigned int cluster_sectors)
76 +{
77 + VmdkExtent *extent;
78 + BDRVVmdkState *s = bs->opaque;
79 +
80 + s->extents = qemu_realloc(s->extents,
81 + (s->num_extents + 1) * sizeof(VmdkExtent));
82 + extent = &s->extents[s->num_extents];
83 + s->num_extents++;
84 +
85 + memset(extent, 0, sizeof(VmdkExtent));
86 + extent->file = file;
87 + extent->flat = flat;
88 + extent->sectors = sectors;
89 + extent->l1_table_offset = l1_offset;
90 + extent->l1_backup_table_offset = l1_backup_offset;
91 + extent->l1_size = l1_size;
92 + extent->l1_entry_sectors = l2_size * cluster_sectors;
93 + extent->l2_size = l2_size;
94 + extent->cluster_sectors = cluster_sectors;
95 +
96 + if (s->num_extents > 1) {
97 + extent->end_sector = (*(extent - 1)).end_sector + extent->sectors;
98 + } else {
99 + extent->end_sector = extent->sectors;
100 + }
101 + bs->total_sectors = extent->end_sector;
102 + return extent;
103 +}
104 +
105 +
106 static int vmdk_open(BlockDriverState *bs, int flags)
107 {
108 BDRVVmdkState *s = bs->opaque;
109 uint32_t magic;
110 - int l1_size, i;
111 + int i;
112 + uint32_t l1_size, l1_entry_sectors;
113 + VmdkExtent *extent = NULL;
114
115 if (bdrv_pread(bs->file, 0, &magic, sizeof(magic)) != sizeof(magic))
116 goto fail;
117 @@ -370,32 +432,34 @@ static int vmdk_open(BlockDriverState *b
118 magic = be32_to_cpu(magic);
119 if (magic == VMDK3_MAGIC) {
120 VMDK3Header header;
121 -
122 - if (bdrv_pread(bs->file, sizeof(magic), &header, sizeof(header)) != sizeof(header))
123 + if (bdrv_pread(bs->file, sizeof(magic), &header, sizeof(header))
124 + != sizeof(header)) {
125 goto fail;
126 - s->cluster_sectors = le32_to_cpu(header.granularity);
127 - s->l2_size = 1 << 9;
128 - s->l1_size = 1 << 6;
129 - bs->total_sectors = le32_to_cpu(header.disk_sectors);
130 - s->l1_table_offset = le32_to_cpu(header.l1dir_offset) << 9;
131 - s->l1_backup_table_offset = 0;
132 - s->l1_entry_sectors = s->l2_size * s->cluster_sectors;
133 + }
134 + extent = vmdk_add_extent(bs, bs->file, false,
135 + le32_to_cpu(header.disk_sectors),
136 + le32_to_cpu(header.l1dir_offset) << 9, 0,
137 + 1 << 6, 1 << 9, le32_to_cpu(header.granularity));
138 } else if (magic == VMDK4_MAGIC) {
139 VMDK4Header header;
140 -
141 - if (bdrv_pread(bs->file, sizeof(magic), &header, sizeof(header)) != sizeof(header))
142 + if (bdrv_pread(bs->file, sizeof(magic), &header, sizeof(header))
143 + != sizeof(header)) {
144 goto fail;
145 - bs->total_sectors = le64_to_cpu(header.capacity);
146 - s->cluster_sectors = le64_to_cpu(header.granularity);
147 - s->l2_size = le32_to_cpu(header.num_gtes_per_gte);
148 - s->l1_entry_sectors = s->l2_size * s->cluster_sectors;
149 - if (s->l1_entry_sectors <= 0)
150 + }
151 + l1_entry_sectors = le32_to_cpu(header.num_gtes_per_gte)
152 + * le64_to_cpu(header.granularity);
153 + l1_size = (le64_to_cpu(header.capacity) + l1_entry_sectors - 1)
154 + / l1_entry_sectors;
155 + extent = vmdk_add_extent(bs, bs->file, false,
156 + le64_to_cpu(header.capacity),
157 + le64_to_cpu(header.gd_offset) << 9,
158 + le64_to_cpu(header.rgd_offset) << 9,
159 + l1_size,
160 + le32_to_cpu(header.num_gtes_per_gte),
161 + le64_to_cpu(header.granularity));
162 + if (extent->l1_entry_sectors <= 0) {
163 goto fail;
164 - s->l1_size = (bs->total_sectors + s->l1_entry_sectors - 1)
165 - / s->l1_entry_sectors;
166 - s->l1_table_offset = le64_to_cpu(header.rgd_offset) << 9;
167 - s->l1_backup_table_offset = le64_to_cpu(header.gd_offset) << 9;
168 -
169 + }
170 // try to open parent images, if exist
171 if (vmdk_parent_open(bs) != 0)
172 goto fail;
173 @@ -406,40 +470,49 @@ static int vmdk_open(BlockDriverState *b
174 }
175
176 /* read the L1 table */
177 - l1_size = s->l1_size * sizeof(uint32_t);
178 - s->l1_table = qemu_malloc(l1_size);
179 - if (bdrv_pread(bs->file, s->l1_table_offset, s->l1_table, l1_size) != l1_size)
180 - goto fail;
181 - for(i = 0; i < s->l1_size; i++) {
182 - le32_to_cpus(&s->l1_table[i]);
183 - }
184 -
185 - if (s->l1_backup_table_offset) {
186 - s->l1_backup_table = qemu_malloc(l1_size);
187 - if (bdrv_pread(bs->file, s->l1_backup_table_offset, s->l1_backup_table, l1_size) != l1_size)
188 + l1_size = extent->l1_size * sizeof(uint32_t);
189 + extent->l1_table = qemu_malloc(l1_size);
190 + if (bdrv_pread(bs->file,
191 + extent->l1_table_offset,
192 + extent->l1_table,
193 + l1_size)
194 + != l1_size) {
195 + goto fail;
196 + }
197 + for (i = 0; i < extent->l1_size; i++) {
198 + le32_to_cpus(&extent->l1_table[i]);
199 + }
200 +
201 + if (extent->l1_backup_table_offset) {
202 + extent->l1_backup_table = qemu_malloc(l1_size);
203 + if (bdrv_pread(bs->file,
204 + extent->l1_backup_table_offset,
205 + extent->l1_backup_table,
206 + l1_size)
207 + != l1_size) {
208 goto fail;
209 - for(i = 0; i < s->l1_size; i++) {
210 - le32_to_cpus(&s->l1_backup_table[i]);
211 + }
212 + for (i = 0; i < extent->l1_size; i++) {
213 + le32_to_cpus(&extent->l1_backup_table[i]);
214 }
215 }
216
217 - s->l2_cache = qemu_malloc(s->l2_size * L2_CACHE_SIZE * sizeof(uint32_t));
218 + extent->l2_cache =
219 + qemu_malloc(extent->l2_size * L2_CACHE_SIZE * sizeof(uint32_t));
220 return 0;
221 fail:
222 - qemu_free(s->l1_backup_table);
223 - qemu_free(s->l1_table);
224 - qemu_free(s->l2_cache);
225 + vmdk_free_extents(bs);
226 return -1;
227 }
228
229 -static uint64_t get_cluster_offset(BlockDriverState *bs, VmdkMetaData *m_data,
230 - uint64_t offset, int allocate);
231 -
232 -static int get_whole_cluster(BlockDriverState *bs, uint64_t cluster_offset,
233 - uint64_t offset, int allocate)
234 +static int get_whole_cluster(BlockDriverState *bs,
235 + VmdkExtent *extent,
236 + uint64_t cluster_offset,
237 + uint64_t offset,
238 + bool allocate)
239 {
240 - BDRVVmdkState *s = bs->opaque;
241 - uint8_t whole_grain[s->cluster_sectors*512]; // 128 sectors * 512 bytes each = grain size 64KB
242 + /* 128 sectors * 512 bytes each = grain size 64KB */
243 + uint8_t whole_grain[extent->cluster_sectors * 512];
244
245 // we will be here if it's first write on non-exist grain(cluster).
246 // try to read from parent image, if exist
247 @@ -450,14 +523,14 @@ static int get_whole_cluster(BlockDriver
248 return -1;
249
250 ret = bdrv_read(bs->backing_hd, offset >> 9, whole_grain,
251 - s->cluster_sectors);
252 + extent->cluster_sectors);
253 if (ret < 0) {
254 return -1;
255 }
256
257 //Write grain only into the active image
258 - ret = bdrv_write(bs->file, cluster_offset, whole_grain,
259 - s->cluster_sectors);
260 + ret = bdrv_write(extent->file, cluster_offset, whole_grain,
261 + extent->cluster_sectors);
262 if (ret < 0) {
263 return -1;
264 }
265 @@ -465,29 +538,39 @@ static int get_whole_cluster(BlockDriver
266 return 0;
267 }
268
269 -static int vmdk_L2update(BlockDriverState *bs, VmdkMetaData *m_data)
270 +static int vmdk_L2update(VmdkExtent *extent, VmdkMetaData *m_data)
271 {
272 - BDRVVmdkState *s = bs->opaque;
273 -
274 /* update L2 table */
275 - if (bdrv_pwrite_sync(bs->file, ((int64_t)m_data->l2_offset * 512) + (m_data->l2_index * sizeof(m_data->offset)),
276 - &(m_data->offset), sizeof(m_data->offset)) < 0)
277 + if (bdrv_pwrite_sync(
278 + extent->file,
279 + ((int64_t)m_data->l2_offset * 512)
280 + + (m_data->l2_index * sizeof(m_data->offset)),
281 + &(m_data->offset),
282 + sizeof(m_data->offset)
283 + ) < 0) {
284 return -1;
285 + }
286 /* update backup L2 table */
287 - if (s->l1_backup_table_offset != 0) {
288 - m_data->l2_offset = s->l1_backup_table[m_data->l1_index];
289 - if (bdrv_pwrite_sync(bs->file, ((int64_t)m_data->l2_offset * 512) + (m_data->l2_index * sizeof(m_data->offset)),
290 - &(m_data->offset), sizeof(m_data->offset)) < 0)
291 + if (extent->l1_backup_table_offset != 0) {
292 + m_data->l2_offset = extent->l1_backup_table[m_data->l1_index];
293 + if (bdrv_pwrite_sync(
294 + extent->file,
295 + ((int64_t)m_data->l2_offset * 512)
296 + + (m_data->l2_index * sizeof(m_data->offset)),
297 + &(m_data->offset), sizeof(m_data->offset)
298 + ) < 0) {
299 return -1;
300 + }
301 }
302
303 return 0;
304 }
305
306 -static uint64_t get_cluster_offset(BlockDriverState *bs, VmdkMetaData *m_data,
307 - uint64_t offset, int allocate)
308 +static uint64_t get_cluster_offset(BlockDriverState *bs,
309 + VmdkExtent *extent,
310 + VmdkMetaData *m_data,
311 + uint64_t offset, int allocate)
312 {
313 - BDRVVmdkState *s = bs->opaque;
314 unsigned int l1_index, l2_offset, l2_index;
315 int min_index, i, j;
316 uint32_t min_count, *l2_table, tmp = 0;
317 @@ -496,21 +579,23 @@ static uint64_t get_cluster_offset(Block
318 if (m_data)
319 m_data->valid = 0;
320
321 - l1_index = (offset >> 9) / s->l1_entry_sectors;
322 - if (l1_index >= s->l1_size)
323 + l1_index = (offset >> 9) / extent->l1_entry_sectors;
324 + if (l1_index >= extent->l1_size) {
325 return 0;
326 - l2_offset = s->l1_table[l1_index];
327 - if (!l2_offset)
328 + }
329 + l2_offset = extent->l1_table[l1_index];
330 + if (!l2_offset) {
331 return 0;
332 + }
333 for(i = 0; i < L2_CACHE_SIZE; i++) {
334 - if (l2_offset == s->l2_cache_offsets[i]) {
335 + if (l2_offset == extent->l2_cache_offsets[i]) {
336 /* increment the hit count */
337 - if (++s->l2_cache_counts[i] == 0xffffffff) {
338 + if (++extent->l2_cache_counts[i] == 0xffffffff) {
339 for(j = 0; j < L2_CACHE_SIZE; j++) {
340 - s->l2_cache_counts[j] >>= 1;
341 + extent->l2_cache_counts[j] >>= 1;
342 }
343 }
344 - l2_table = s->l2_cache + (i * s->l2_size);
345 + l2_table = extent->l2_cache + (i * extent->l2_size);
346 goto found;
347 }
348 }
349 @@ -518,20 +603,25 @@ static uint64_t get_cluster_offset(Block
350 min_index = 0;
351 min_count = 0xffffffff;
352 for(i = 0; i < L2_CACHE_SIZE; i++) {
353 - if (s->l2_cache_counts[i] < min_count) {
354 - min_count = s->l2_cache_counts[i];
355 + if (extent->l2_cache_counts[i] < min_count) {
356 + min_count = extent->l2_cache_counts[i];
357 min_index = i;
358 }
359 }
360 - l2_table = s->l2_cache + (min_index * s->l2_size);
361 - if (bdrv_pread(bs->file, (int64_t)l2_offset * 512, l2_table, s->l2_size * sizeof(uint32_t)) !=
362 - s->l2_size * sizeof(uint32_t))
363 + l2_table = extent->l2_cache + (min_index * extent->l2_size);
364 + if (bdrv_pread(
365 + extent->file,
366 + (int64_t)l2_offset * 512,
367 + l2_table,
368 + extent->l2_size * sizeof(uint32_t)
369 + ) != extent->l2_size * sizeof(uint32_t)) {
370 return 0;
371 + }
372
373 - s->l2_cache_offsets[min_index] = l2_offset;
374 - s->l2_cache_counts[min_index] = 1;
375 + extent->l2_cache_offsets[min_index] = l2_offset;
376 + extent->l2_cache_counts[min_index] = 1;
377 found:
378 - l2_index = ((offset >> 9) / s->cluster_sectors) % s->l2_size;
379 + l2_index = ((offset >> 9) / extent->cluster_sectors) % extent->l2_size;
380 cluster_offset = le32_to_cpu(l2_table[l2_index]);
381
382 if (!cluster_offset) {
383 @@ -539,8 +629,11 @@ static uint64_t get_cluster_offset(Block
384 return 0;
385
386 // Avoid the L2 tables update for the images that have snapshots.
387 - cluster_offset = bdrv_getlength(bs->file);
388 - bdrv_truncate(bs->file, cluster_offset + (s->cluster_sectors << 9));
389 + cluster_offset = bdrv_getlength(extent->file);
390 + bdrv_truncate(
391 + extent->file,
392 + cluster_offset + (extent->cluster_sectors << 9)
393 + );
394
395 cluster_offset >>= 9;
396 tmp = cpu_to_le32(cluster_offset);
397 @@ -551,7 +644,8 @@ static uint64_t get_cluster_offset(Block
398 * This problem may occur because of insufficient space on host disk
399 * or inappropriate VM shutdown.
400 */
401 - if (get_whole_cluster(bs, cluster_offset, offset, allocate) == -1)
402 + if (get_whole_cluster(
403 + bs, extent, cluster_offset, offset, allocate) == -1)
404 return 0;
405
406 if (m_data) {
407 @@ -566,33 +660,69 @@ static uint64_t get_cluster_offset(Block
408 return cluster_offset;
409 }
410
411 +static VmdkExtent *find_extent(BDRVVmdkState *s,
412 + int64_t sector_num, VmdkExtent *start_hint)
413 +{
414 + VmdkExtent *extent = start_hint;
415 +
416 + if (!extent) {
417 + extent = &s->extents[0];
418 + }
419 + while (extent < &s->extents[s->num_extents]) {
420 + if (sector_num < extent->end_sector) {
421 + return extent;
422 + }
423 + extent++;
424 + }
425 + return NULL;
426 +}
427 +
428 static int vmdk_is_allocated(BlockDriverState *bs, int64_t sector_num,
429 int nb_sectors, int *pnum)
430 {
431 BDRVVmdkState *s = bs->opaque;
432 - int index_in_cluster, n;
433 - uint64_t cluster_offset;
434
435 - cluster_offset = get_cluster_offset(bs, NULL, sector_num << 9, 0);
436 - index_in_cluster = sector_num % s->cluster_sectors;
437 - n = s->cluster_sectors - index_in_cluster;
438 + int64_t index_in_cluster, n, ret;
439 + uint64_t offset;
440 + VmdkExtent *extent;
441 +
442 + extent = find_extent(s, sector_num, NULL);
443 + if (!extent) {
444 + return 0;
445 + }
446 + if (extent->flat) {
447 + n = extent->end_sector - sector_num;
448 + ret = 1;
449 + } else {
450 + offset = get_cluster_offset(bs, extent, NULL, sector_num * 512, 0);
451 + index_in_cluster = sector_num % extent->cluster_sectors;
452 + n = extent->cluster_sectors - index_in_cluster;
453 + ret = offset ? 1 : 0;
454 + }
455 if (n > nb_sectors)
456 n = nb_sectors;
457 *pnum = n;
458 - return (cluster_offset != 0);
459 + return ret;
460 }
461
462 static int vmdk_read(BlockDriverState *bs, int64_t sector_num,
463 uint8_t *buf, int nb_sectors)
464 {
465 BDRVVmdkState *s = bs->opaque;
466 - int index_in_cluster, n, ret;
467 + int ret;
468 + uint64_t n, index_in_cluster;
469 + VmdkExtent *extent = NULL;
470 uint64_t cluster_offset;
471
472 while (nb_sectors > 0) {
473 - cluster_offset = get_cluster_offset(bs, NULL, sector_num << 9, 0);
474 - index_in_cluster = sector_num % s->cluster_sectors;
475 - n = s->cluster_sectors - index_in_cluster;
476 + extent = find_extent(s, sector_num, extent);
477 + if (!extent) {
478 + return -EIO;
479 + }
480 + cluster_offset = get_cluster_offset(
481 + bs, extent, NULL, sector_num << 9, 0);
482 + index_in_cluster = sector_num % extent->cluster_sectors;
483 + n = extent->cluster_sectors - index_in_cluster;
484 if (n > nb_sectors)
485 n = nb_sectors;
486 if (!cluster_offset) {
487 @@ -621,10 +751,12 @@ static int vmdk_write(BlockDriverState *
488 const uint8_t *buf, int nb_sectors)
489 {
490 BDRVVmdkState *s = bs->opaque;
491 - VmdkMetaData m_data;
492 - int index_in_cluster, n;
493 + VmdkExtent *extent = NULL;
494 + int n;
495 + int64_t index_in_cluster;
496 uint64_t cluster_offset;
497 static int cid_update = 0;
498 + VmdkMetaData m_data;
499
500 if (sector_num > bs->total_sectors) {
501 fprintf(stderr,
502 @@ -635,20 +767,35 @@ static int vmdk_write(BlockDriverState *
503 }
504
505 while (nb_sectors > 0) {
506 - index_in_cluster = sector_num & (s->cluster_sectors - 1);
507 - n = s->cluster_sectors - index_in_cluster;
508 - if (n > nb_sectors)
509 - n = nb_sectors;
510 - cluster_offset = get_cluster_offset(bs, &m_data, sector_num << 9, 1);
511 - if (!cluster_offset)
512 + extent = find_extent(s, sector_num, extent);
513 + if (!extent) {
514 + return -EIO;
515 + }
516 + cluster_offset = get_cluster_offset(
517 + bs,
518 + extent,
519 + &m_data,
520 + sector_num << 9, 1);
521 + if (!cluster_offset) {
522 return -1;
523 + }
524 + index_in_cluster = sector_num % extent->cluster_sectors;
525 + n = extent->cluster_sectors - index_in_cluster;
526 + if (n > nb_sectors) {
527 + n = nb_sectors;
528 + }
529
530 - if (bdrv_pwrite(bs->file, cluster_offset + index_in_cluster * 512, buf, n * 512) != n * 512)
531 + if (bdrv_pwrite(bs->file,
532 + cluster_offset + index_in_cluster * 512,
533 + buf, n * 512)
534 + != n * 512) {
535 return -1;
536 + }
537 if (m_data.valid) {
538 /* update L2 tables */
539 - if (vmdk_L2update(bs, &m_data) == -1)
540 + if (vmdk_L2update(extent, &m_data) == -1) {
541 return -1;
542 + }
543 }
544 nb_sectors -= n;
545 sector_num += n;
546 @@ -822,10 +969,7 @@ exit:
547
548 static void vmdk_close(BlockDriverState *bs)
549 {
550 - BDRVVmdkState *s = bs->opaque;
551 -
552 - qemu_free(s->l1_table);
553 - qemu_free(s->l2_cache);
554 + vmdk_free_extents(bs);
555 }
556
557 static int vmdk_flush(BlockDriverState *bs)