1 : /******************************************************
2 : The interface to the operating system file i/o primitives
3 :
4 : (c) 1995 Innobase Oy
5 :
6 : Created 10/21/1995 Heikki Tuuri
7 : *******************************************************/
8 :
9 : #include "os0file.h"
10 : #include "os0sync.h"
11 : #include "os0thread.h"
12 : #include "ut0mem.h"
13 : #include "srv0srv.h"
14 : #include "srv0start.h"
15 : #include "fil0fil.h"
16 : #include "buf0buf.h"
17 :
18 : #if defined(UNIV_HOTBACKUP) && defined(__WIN__)
19 : /* Add includes for the _stat() call to compile on Windows */
20 : #include <sys/types.h>
21 : #include <sys/stat.h>
22 : #include <errno.h>
23 : #endif /* UNIV_HOTBACKUP */
24 :
25 : /* This specifies the file permissions InnoDB uses when it creates files in
26 : Unix; the value of os_innodb_umask is initialized in ha_innodb.cc to
27 : my_umask */
28 :
29 : #ifndef __WIN__
30 : UNIV_INTERN ulint os_innodb_umask
31 : = S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP;
32 : #else
33 : UNIV_INTERN ulint os_innodb_umask = 0;
34 : #endif
35 :
36 : #ifdef UNIV_DO_FLUSH
37 : /* If the following is set to TRUE, we do not call os_file_flush in every
38 : os_file_write. We can set this TRUE when the doublewrite buffer is used. */
39 : UNIV_INTERN ibool os_do_not_call_flush_at_each_write = FALSE;
40 : #else
41 : /* We do not call os_file_flush in every os_file_write. */
42 : #endif /* UNIV_DO_FLUSH */
43 :
44 : /* We use these mutexes to protect lseek + file i/o operation, if the
45 : OS does not provide an atomic pread or pwrite, or similar */
46 : #define OS_FILE_N_SEEK_MUTEXES 16
47 : UNIV_INTERN os_mutex_t os_file_seek_mutexes[OS_FILE_N_SEEK_MUTEXES];
48 :
49 : /* In simulated aio, merge at most this many consecutive i/os */
50 : #define OS_AIO_MERGE_N_CONSECUTIVE 64
51 :
52 : /* If this flag is TRUE, then we will use the native aio of the
53 : OS (provided we compiled Innobase with it in), otherwise we will
54 : use simulated aio we build below with threads */
55 :
56 : UNIV_INTERN ibool os_aio_use_native_aio = FALSE;
57 :
58 : UNIV_INTERN ibool os_aio_print_debug = FALSE;
59 :
60 : /* The aio array slot structure */
61 : typedef struct os_aio_slot_struct os_aio_slot_t;
62 :
63 : struct os_aio_slot_struct{
64 : ibool is_read; /* TRUE if a read operation */
65 : ulint pos; /* index of the slot in the aio
66 : array */
67 : ibool reserved; /* TRUE if this slot is reserved */
68 : time_t reservation_time;/* time when reserved */
69 : ulint len; /* length of the block to read or
70 : write */
71 : byte* buf; /* buffer used in i/o */
72 : ulint type; /* OS_FILE_READ or OS_FILE_WRITE */
73 : ulint offset; /* 32 low bits of file offset in
74 : bytes */
75 : ulint offset_high; /* 32 high bits of file offset */
76 : os_file_t file; /* file where to read or write */
77 : const char* name; /* file name or path */
78 : ibool io_already_done;/* used only in simulated aio:
79 : TRUE if the physical i/o already
80 : made and only the slot message
81 : needs to be passed to the caller
82 : of os_aio_simulated_handle */
83 : fil_node_t* message1; /* message which is given by the */
84 : void* message2; /* the requester of an aio operation
85 : and which can be used to identify
86 : which pending aio operation was
87 : completed */
88 : #ifdef WIN_ASYNC_IO
89 : os_event_t event; /* event object we need in the
90 : OVERLAPPED struct */
91 : OVERLAPPED control; /* Windows control block for the
92 : aio request */
93 : #endif
94 : };
95 :
96 : /* The aio array structure */
97 : typedef struct os_aio_array_struct os_aio_array_t;
98 :
99 : struct os_aio_array_struct{
100 : os_mutex_t mutex; /* the mutex protecting the aio array */
101 : os_event_t not_full; /* The event which is set to the signaled
102 : state when there is space in the aio
103 : outside the ibuf segment */
104 : os_event_t is_empty; /* The event which is set to the signaled
105 : state when there are no pending i/os
106 : in this array */
107 : ulint n_slots; /* Total number of slots in the aio array.
108 : This must be divisible by n_threads. */
109 : ulint n_segments;/* Number of segments in the aio array of
110 : pending aio requests. A thread can wait
111 : separately for any one of the segments. */
112 : ulint n_reserved;/* Number of reserved slots in the
113 : aio array outside the ibuf segment */
114 : os_aio_slot_t* slots; /* Pointer to the slots in the array */
115 : #ifdef __WIN__
116 : os_native_event_t* native_events;
117 : /* Pointer to an array of OS native event
118 : handles where we copied the handles from
119 : slots, in the same order. This can be used
120 : in WaitForMultipleObjects; used only in
121 : Windows */
122 : #endif
123 : };
124 :
125 : /* Array of events used in simulated aio */
126 : static os_event_t* os_aio_segment_wait_events = NULL;
127 :
128 : /* The aio arrays for non-ibuf i/o and ibuf i/o, as well as sync aio. These
129 : are NULL when the module has not yet been initialized. */
130 : static os_aio_array_t* os_aio_read_array = NULL;
131 : static os_aio_array_t* os_aio_write_array = NULL;
132 : static os_aio_array_t* os_aio_ibuf_array = NULL;
133 : static os_aio_array_t* os_aio_log_array = NULL;
134 : static os_aio_array_t* os_aio_sync_array = NULL;
135 :
136 : static ulint os_aio_n_segments = ULINT_UNDEFINED;
137 :
138 : /* If the following is TRUE, read i/o handler threads try to
139 : wait until a batch of new read requests have been posted */
140 : static ibool os_aio_recommend_sleep_for_read_threads = FALSE;
141 :
142 : UNIV_INTERN ulint os_n_file_reads = 0;
143 : UNIV_INTERN ulint os_bytes_read_since_printout = 0;
144 : UNIV_INTERN ulint os_n_file_writes = 0;
145 : UNIV_INTERN ulint os_n_fsyncs = 0;
146 : UNIV_INTERN ulint os_n_file_reads_old = 0;
147 : UNIV_INTERN ulint os_n_file_writes_old = 0;
148 : UNIV_INTERN ulint os_n_fsyncs_old = 0;
149 : UNIV_INTERN time_t os_last_printout;
150 :
151 : UNIV_INTERN ibool os_has_said_disk_full = FALSE;
152 :
153 : /* The mutex protecting the following counts of pending I/O operations */
154 : static os_mutex_t os_file_count_mutex;
155 : UNIV_INTERN ulint os_file_n_pending_preads = 0;
156 : UNIV_INTERN ulint os_file_n_pending_pwrites = 0;
157 : UNIV_INTERN ulint os_n_pending_writes = 0;
158 : UNIV_INTERN ulint os_n_pending_reads = 0;
159 :
160 : /***************************************************************************
161 : Gets the operating system version. Currently works only on Windows. */
162 : UNIV_INTERN
163 : ulint
164 : os_get_os_version(void)
165 : /*===================*/
166 : /* out: OS_WIN95, OS_WIN31, OS_WINNT, OS_WIN2000 */
167 0 : {
168 : #ifdef __WIN__
169 : OSVERSIONINFO os_info;
170 :
171 : os_info.dwOSVersionInfoSize = sizeof(OSVERSIONINFO);
172 :
173 : ut_a(GetVersionEx(&os_info));
174 :
175 : if (os_info.dwPlatformId == VER_PLATFORM_WIN32s) {
176 : return(OS_WIN31);
177 : } else if (os_info.dwPlatformId == VER_PLATFORM_WIN32_WINDOWS) {
178 : return(OS_WIN95);
179 : } else if (os_info.dwPlatformId == VER_PLATFORM_WIN32_NT) {
180 : if (os_info.dwMajorVersion <= 4) {
181 : return(OS_WINNT);
182 : } else {
183 : return(OS_WIN2000);
184 : }
185 : } else {
186 : ut_error;
187 : return(0);
188 : }
189 : #else
190 0 : ut_error;
191 :
192 : return(0);
193 : #endif
194 : }
195 :
196 : /***************************************************************************
197 : Retrieves the last error number if an error occurs in a file io function.
198 : The number should be retrieved before any other OS calls (because they may
199 : overwrite the error number). If the number is not known to this program,
200 : the OS error number + 100 is returned. */
201 : UNIV_INTERN
202 : ulint
203 : os_file_get_last_error(
204 : /*===================*/
205 : /* out: error number, or OS error
206 : number + 100 */
207 : ibool report_all_errors) /* in: TRUE if we want an error message
208 : printed of all errors */
209 114 : {
210 : ulint err;
211 :
212 : #ifdef __WIN__
213 :
214 : err = (ulint) GetLastError();
215 :
216 : if (report_all_errors
217 : || (err != ERROR_DISK_FULL && err != ERROR_FILE_EXISTS)) {
218 :
219 : ut_print_timestamp(stderr);
220 : fprintf(stderr,
221 : " InnoDB: Operating system error number %lu"
222 : " in a file operation.\n", (ulong) err);
223 :
224 : if (err == ERROR_PATH_NOT_FOUND) {
225 : fprintf(stderr,
226 : "InnoDB: The error means the system"
227 : " cannot find the path specified.\n");
228 :
229 : if (srv_is_being_started) {
230 : fprintf(stderr,
231 : "InnoDB: If you are installing InnoDB,"
232 : " remember that you must create\n"
233 : "InnoDB: directories yourself, InnoDB"
234 : " does not create them.\n");
235 : }
236 : } else if (err == ERROR_ACCESS_DENIED) {
237 : fprintf(stderr,
238 : "InnoDB: The error means mysqld does not have"
239 : " the access rights to\n"
240 : "InnoDB: the directory. It may also be"
241 : " you have created a subdirectory\n"
242 : "InnoDB: of the same name as a data file.\n");
243 : } else if (err == ERROR_SHARING_VIOLATION
244 : || err == ERROR_LOCK_VIOLATION) {
245 : fprintf(stderr,
246 : "InnoDB: The error means that another program"
247 : " is using InnoDB's files.\n"
248 : "InnoDB: This might be a backup or antivirus"
249 : " software or another instance\n"
250 : "InnoDB: of MySQL."
251 : " Please close it to get rid of this error.\n");
252 : } else {
253 : fprintf(stderr,
254 : "InnoDB: Some operating system error numbers"
255 : " are described at\n"
256 : "InnoDB: "
257 : "http://dev.mysql.com/doc/refman/5.1/en/"
258 : "operating-system-error-codes.html\n");
259 : }
260 : }
261 :
262 : fflush(stderr);
263 :
264 : if (err == ERROR_FILE_NOT_FOUND) {
265 : return(OS_FILE_NOT_FOUND);
266 : } else if (err == ERROR_DISK_FULL) {
267 : return(OS_FILE_DISK_FULL);
268 : } else if (err == ERROR_FILE_EXISTS) {
269 : return(OS_FILE_ALREADY_EXISTS);
270 : } else if (err == ERROR_SHARING_VIOLATION
271 : || err == ERROR_LOCK_VIOLATION) {
272 : return(OS_FILE_SHARING_VIOLATION);
273 : } else {
274 : return(100 + err);
275 : }
276 : #else
277 114 : err = (ulint) errno;
278 :
279 114 : if (report_all_errors
280 : || (err != ENOSPC && err != EEXIST)) {
281 :
282 0 : ut_print_timestamp(stderr);
283 0 : fprintf(stderr,
284 : " InnoDB: Operating system error number %lu"
285 : " in a file operation.\n", (ulong) err);
286 :
287 0 : if (err == ENOENT) {
288 0 : fprintf(stderr,
289 : "InnoDB: The error means the system"
290 : " cannot find the path specified.\n");
291 :
292 0 : if (srv_is_being_started) {
293 0 : fprintf(stderr,
294 : "InnoDB: If you are installing InnoDB,"
295 : " remember that you must create\n"
296 : "InnoDB: directories yourself, InnoDB"
297 : " does not create them.\n");
298 : }
299 0 : } else if (err == EACCES) {
300 0 : fprintf(stderr,
301 : "InnoDB: The error means mysqld does not have"
302 : " the access rights to\n"
303 : "InnoDB: the directory.\n");
304 : } else {
305 0 : if (strerror((int)err) != NULL) {
306 0 : fprintf(stderr,
307 : "InnoDB: Error number %lu"
308 : " means '%s'.\n",
309 : err, strerror((int)err));
310 : }
311 :
312 0 : fprintf(stderr,
313 : "InnoDB: Some operating system"
314 : " error numbers are described at\n"
315 : "InnoDB: "
316 : "http://dev.mysql.com/doc/refman/5.1/en/"
317 : "operating-system-error-codes.html\n");
318 : }
319 : }
320 :
321 114 : fflush(stderr);
322 :
323 114 : if (err == ENOSPC) {
324 0 : return(OS_FILE_DISK_FULL);
325 114 : } else if (err == ENOENT) {
326 0 : return(OS_FILE_NOT_FOUND);
327 114 : } else if (err == EEXIST) {
328 114 : return(OS_FILE_ALREADY_EXISTS);
329 0 : } else if (err == EXDEV || err == ENOTDIR || err == EISDIR) {
330 0 : return(OS_FILE_PATH_ERROR);
331 : } else {
332 0 : return(100 + err);
333 : }
334 : #endif
335 : }
336 :
337 : /********************************************************************
338 : Does error handling when a file operation fails.
339 : Conditionally exits (calling exit(3)) based on should_exit value and the
340 : error type */
341 : static
342 : ibool
343 : os_file_handle_error_cond_exit(
344 : /*===========================*/
345 : /* out: TRUE if we should retry the
346 : operation */
347 : const char* name, /* in: name of a file or NULL */
348 : const char* operation, /* in: operation */
349 : ibool should_exit) /* in: call exit(3) if unknown error
350 : and this parameter is TRUE */
351 57 : {
352 : ulint err;
353 :
354 57 : err = os_file_get_last_error(FALSE);
355 :
356 57 : if (err == OS_FILE_DISK_FULL) {
357 : /* We only print a warning about disk full once */
358 :
359 0 : if (os_has_said_disk_full) {
360 :
361 0 : return(FALSE);
362 : }
363 :
364 0 : if (name) {
365 0 : ut_print_timestamp(stderr);
366 0 : fprintf(stderr,
367 : " InnoDB: Encountered a problem with"
368 : " file %s\n", name);
369 : }
370 :
371 0 : ut_print_timestamp(stderr);
372 0 : fprintf(stderr,
373 : " InnoDB: Disk is full. Try to clean the disk"
374 : " to free space.\n");
375 :
376 0 : os_has_said_disk_full = TRUE;
377 :
378 0 : fflush(stderr);
379 :
380 0 : return(FALSE);
381 57 : } else if (err == OS_FILE_AIO_RESOURCES_RESERVED) {
382 :
383 0 : return(TRUE);
384 57 : } else if (err == OS_FILE_ALREADY_EXISTS
385 : || err == OS_FILE_PATH_ERROR) {
386 :
387 57 : return(FALSE);
388 0 : } else if (err == OS_FILE_SHARING_VIOLATION) {
389 :
390 0 : os_thread_sleep(10000000); /* 10 sec */
391 0 : return(TRUE);
392 : } else {
393 0 : if (name) {
394 0 : fprintf(stderr, "InnoDB: File name %s\n", name);
395 : }
396 :
397 0 : fprintf(stderr, "InnoDB: File operation call: '%s'.\n",
398 : operation);
399 :
400 0 : if (should_exit) {
401 0 : fprintf(stderr, "InnoDB: Cannot continue operation.\n");
402 :
403 0 : fflush(stderr);
404 :
405 0 : exit(1);
406 : }
407 : }
408 :
409 0 : return(FALSE);
410 : }
411 :
412 : /********************************************************************
413 : Does error handling when a file operation fails. */
414 : static
415 : ibool
416 : os_file_handle_error(
417 : /*=================*/
418 : /* out: TRUE if we should retry the
419 : operation */
420 : const char* name, /* in: name of a file or NULL */
421 : const char* operation)/* in: operation */
422 57 : {
423 : /* exit in case of unknown error */
424 57 : return(os_file_handle_error_cond_exit(name, operation, TRUE));
425 : }
426 :
427 : /********************************************************************
428 : Does error handling when a file operation fails. */
429 : static
430 : ibool
431 : os_file_handle_error_no_exit(
432 : /*=========================*/
433 : /* out: TRUE if we should retry the
434 : operation */
435 : const char* name, /* in: name of a file or NULL */
436 : const char* operation)/* in: operation */
437 0 : {
438 : /* don't exit in case of unknown error */
439 0 : return(os_file_handle_error_cond_exit(name, operation, FALSE));
440 : }
441 :
442 : #undef USE_FILE_LOCK
443 : #define USE_FILE_LOCK
444 : #if defined(UNIV_HOTBACKUP) || defined(__WIN__) || defined(__NETWARE__)
445 : /* InnoDB Hot Backup does not lock the data files.
446 : * On Windows, mandatory locking is used.
447 : */
448 : # undef USE_FILE_LOCK
449 : #endif
450 : #ifdef USE_FILE_LOCK
451 : /********************************************************************
452 : Obtain an exclusive lock on a file. */
453 : static
454 : int
455 : os_file_lock(
456 : /*=========*/
457 : /* out: 0 on success */
458 : int fd, /* in: file descriptor */
459 : const char* name) /* in: file name */
460 126 : {
461 : struct flock lk;
462 126 : lk.l_type = F_WRLCK;
463 126 : lk.l_whence = SEEK_SET;
464 126 : lk.l_start = lk.l_len = 0;
465 126 : if (fcntl(fd, F_SETLK, &lk) == -1) {
466 0 : fprintf(stderr,
467 : "InnoDB: Unable to lock %s, error: %d\n", name, errno);
468 :
469 0 : if (errno == EAGAIN || errno == EACCES) {
470 0 : fprintf(stderr,
471 : "InnoDB: Check that you do not already have"
472 : " another mysqld process\n"
473 : "InnoDB: using the same InnoDB data"
474 : " or log files.\n");
475 : }
476 :
477 0 : return(-1);
478 : }
479 :
480 126 : return(0);
481 : }
482 : #endif /* USE_FILE_LOCK */
483 :
484 : /********************************************************************
485 : Creates the seek mutexes used in positioned reads and writes. */
486 : UNIV_INTERN
487 : void
488 : os_io_init_simple(void)
489 : /*===================*/
490 21 : {
491 : ulint i;
492 :
493 21 : os_file_count_mutex = os_mutex_create(NULL);
494 :
495 357 : for (i = 0; i < OS_FILE_N_SEEK_MUTEXES; i++) {
496 336 : os_file_seek_mutexes[i] = os_mutex_create(NULL);
497 : }
498 21 : }
499 :
500 : /***************************************************************************
501 : Creates a temporary file. This function is like tmpfile(3), but
502 : the temporary file is created in the MySQL temporary directory.
503 : On Netware, this function is like tmpfile(3), because the C run-time
504 : library of Netware does not expose the delete-on-close flag. */
505 : UNIV_INTERN
506 : FILE*
507 : os_file_create_tmpfile(void)
508 : /*========================*/
509 : /* out: temporary file handle, or NULL on error */
510 105 : {
511 : #ifdef UNIV_HOTBACKUP
512 : ut_error;
513 :
514 : return(NULL);
515 : #else
516 : # ifdef __NETWARE__
517 : FILE* file = tmpfile();
518 : # else /* __NETWARE__ */
519 105 : FILE* file = NULL;
520 105 : int fd = innobase_mysql_tmpfile();
521 :
522 105 : if (fd >= 0) {
523 105 : file = fdopen(fd, "w+b");
524 : }
525 : # endif /* __NETWARE__ */
526 :
527 105 : if (!file) {
528 0 : ut_print_timestamp(stderr);
529 0 : fprintf(stderr,
530 : " InnoDB: Error: unable to create temporary file;"
531 : " errno: %d\n", errno);
532 : # ifndef __NETWARE__
533 0 : if (fd >= 0) {
534 0 : close(fd);
535 : }
536 : # endif /* !__NETWARE__ */
537 : }
538 :
539 105 : return(file);
540 : #endif /* UNIV_HOTBACKUP */
541 : }
542 :
543 : /***************************************************************************
544 : The os_file_opendir() function opens a directory stream corresponding to the
545 : directory named by the dirname argument. The directory stream is positioned
546 : at the first entry. In both Unix and Windows we automatically skip the '.'
547 : and '..' items at the start of the directory listing. */
548 : UNIV_INTERN
549 : os_file_dir_t
550 : os_file_opendir(
551 : /*============*/
552 : /* out: directory stream, NULL if
553 : error */
554 : const char* dirname, /* in: directory name; it must not
555 : contain a trailing '\' or '/' */
556 : ibool error_is_fatal) /* in: TRUE if we should treat an
557 : error as a fatal error; if we try to
558 : open symlinks then we do not wish a
559 : fatal error if it happens not to be
560 : a directory */
561 0 : {
562 : os_file_dir_t dir;
563 : #ifdef __WIN__
564 : LPWIN32_FIND_DATA lpFindFileData;
565 : char path[OS_FILE_MAX_PATH + 3];
566 :
567 : ut_a(strlen(dirname) < OS_FILE_MAX_PATH);
568 :
569 : strcpy(path, dirname);
570 : strcpy(path + strlen(path), "\\*");
571 :
572 : /* Note that in Windows opening the 'directory stream' also retrieves
573 : the first entry in the directory. Since it is '.', that is no problem,
574 : as we will skip over the '.' and '..' entries anyway. */
575 :
576 : lpFindFileData = ut_malloc(sizeof(WIN32_FIND_DATA));
577 :
578 : dir = FindFirstFile((LPCTSTR) path, lpFindFileData);
579 :
580 : ut_free(lpFindFileData);
581 :
582 : if (dir == INVALID_HANDLE_VALUE) {
583 :
584 : if (error_is_fatal) {
585 : os_file_handle_error(dirname, "opendir");
586 : }
587 :
588 : return(NULL);
589 : }
590 :
591 : return(dir);
592 : #else
593 0 : dir = opendir(dirname);
594 :
595 0 : if (dir == NULL && error_is_fatal) {
596 0 : os_file_handle_error(dirname, "opendir");
597 : }
598 :
599 0 : return(dir);
600 : #endif
601 : }
602 :
603 : /***************************************************************************
604 : Closes a directory stream. */
605 : UNIV_INTERN
606 : int
607 : os_file_closedir(
608 : /*=============*/
609 : /* out: 0 if success, -1 if failure */
610 : os_file_dir_t dir) /* in: directory stream */
611 0 : {
612 : #ifdef __WIN__
613 : BOOL ret;
614 :
615 : ret = FindClose(dir);
616 :
617 : if (!ret) {
618 : os_file_handle_error_no_exit(NULL, "closedir");
619 :
620 : return(-1);
621 : }
622 :
623 : return(0);
624 : #else
625 : int ret;
626 :
627 0 : ret = closedir(dir);
628 :
629 0 : if (ret) {
630 0 : os_file_handle_error_no_exit(NULL, "closedir");
631 : }
632 :
633 0 : return(ret);
634 : #endif
635 : }
636 :
637 : /***************************************************************************
638 : This function returns information of the next file in the directory. We jump
639 : over the '.' and '..' entries in the directory. */
640 : UNIV_INTERN
641 : int
642 : os_file_readdir_next_file(
643 : /*======================*/
644 : /* out: 0 if ok, -1 if error, 1 if at the end
645 : of the directory */
646 : const char* dirname,/* in: directory name or path */
647 : os_file_dir_t dir, /* in: directory stream */
648 : os_file_stat_t* info) /* in/out: buffer where the info is returned */
649 0 : {
650 : #ifdef __WIN__
651 : LPWIN32_FIND_DATA lpFindFileData;
652 : BOOL ret;
653 :
654 : lpFindFileData = ut_malloc(sizeof(WIN32_FIND_DATA));
655 : next_file:
656 : ret = FindNextFile(dir, lpFindFileData);
657 :
658 : if (ret) {
659 : ut_a(strlen((char *) lpFindFileData->cFileName)
660 : < OS_FILE_MAX_PATH);
661 :
662 : if (strcmp((char *) lpFindFileData->cFileName, ".") == 0
663 : || strcmp((char *) lpFindFileData->cFileName, "..") == 0) {
664 :
665 : goto next_file;
666 : }
667 :
668 : strcpy(info->name, (char *) lpFindFileData->cFileName);
669 :
670 : info->size = (ib_int64_t)(lpFindFileData->nFileSizeLow)
671 : + (((ib_int64_t)(lpFindFileData->nFileSizeHigh))
672 : << 32);
673 :
674 : if (lpFindFileData->dwFileAttributes
675 : & FILE_ATTRIBUTE_REPARSE_POINT) {
676 : /* TODO: test Windows symlinks */
677 : /* TODO: MySQL has apparently its own symlink
678 : implementation in Windows, dbname.sym can
679 : redirect a database directory:
680 : http://dev.mysql.com/doc/refman/5.1/en/
681 : windows-symbolic-links.html */
682 : info->type = OS_FILE_TYPE_LINK;
683 : } else if (lpFindFileData->dwFileAttributes
684 : & FILE_ATTRIBUTE_DIRECTORY) {
685 : info->type = OS_FILE_TYPE_DIR;
686 : } else {
687 : /* It is probably safest to assume that all other
688 : file types are normal. Better to check them rather
689 : than blindly skip them. */
690 :
691 : info->type = OS_FILE_TYPE_FILE;
692 : }
693 : }
694 :
695 : ut_free(lpFindFileData);
696 :
697 : if (ret) {
698 : return(0);
699 : } else if (GetLastError() == ERROR_NO_MORE_FILES) {
700 :
701 : return(1);
702 : } else {
703 : os_file_handle_error_no_exit(dirname,
704 : "readdir_next_file");
705 : return(-1);
706 : }
707 : #else
708 : struct dirent* ent;
709 : char* full_path;
710 : int ret;
711 : struct stat statinfo;
712 : #ifdef HAVE_READDIR_R
713 : char dirent_buf[sizeof(struct dirent)
714 : + _POSIX_PATH_MAX + 100];
715 : /* In /mysys/my_lib.c, _POSIX_PATH_MAX + 1 is used as
716 : the max file name len; but in most standards, the
717 : length is NAME_MAX; we add 100 to be even safer */
718 : #endif
719 :
720 0 : next_file:
721 :
722 : #ifdef HAVE_READDIR_R
723 0 : ret = readdir_r(dir, (struct dirent*)dirent_buf, &ent);
724 :
725 0 : if (ret != 0) {
726 0 : fprintf(stderr,
727 : "InnoDB: cannot read directory %s, error %lu\n",
728 : dirname, (ulong)ret);
729 :
730 0 : return(-1);
731 : }
732 :
733 0 : if (ent == NULL) {
734 : /* End of directory */
735 :
736 0 : return(1);
737 : }
738 :
739 0 : ut_a(strlen(ent->d_name) < _POSIX_PATH_MAX + 100 - 1);
740 : #else
741 : ent = readdir(dir);
742 :
743 : if (ent == NULL) {
744 :
745 : return(1);
746 : }
747 : #endif
748 0 : ut_a(strlen(ent->d_name) < OS_FILE_MAX_PATH);
749 :
750 0 : if (strcmp(ent->d_name, ".") == 0 || strcmp(ent->d_name, "..") == 0) {
751 :
752 : goto next_file;
753 : }
754 :
755 0 : strcpy(info->name, ent->d_name);
756 :
757 0 : full_path = ut_malloc(strlen(dirname) + strlen(ent->d_name) + 10);
758 :
759 0 : sprintf(full_path, "%s/%s", dirname, ent->d_name);
760 :
761 0 : ret = stat(full_path, &statinfo);
762 :
763 0 : if (ret) {
764 0 : os_file_handle_error_no_exit(full_path, "stat");
765 :
766 0 : ut_free(full_path);
767 :
768 0 : return(-1);
769 : }
770 :
771 0 : info->size = (ib_int64_t)statinfo.st_size;
772 :
773 0 : if (S_ISDIR(statinfo.st_mode)) {
774 0 : info->type = OS_FILE_TYPE_DIR;
775 0 : } else if (S_ISLNK(statinfo.st_mode)) {
776 0 : info->type = OS_FILE_TYPE_LINK;
777 0 : } else if (S_ISREG(statinfo.st_mode)) {
778 0 : info->type = OS_FILE_TYPE_FILE;
779 : } else {
780 0 : info->type = OS_FILE_TYPE_UNKNOWN;
781 : }
782 :
783 0 : ut_free(full_path);
784 :
785 0 : return(0);
786 : #endif
787 : }
788 :
789 : /*********************************************************************
790 : This function attempts to create a directory named pathname. The new directory
791 : gets default permissions. On Unix the permissions are (0770 & ~umask). If the
792 : directory exists already, nothing is done and the call succeeds, unless the
793 : fail_if_exists arguments is true. */
794 : UNIV_INTERN
795 : ibool
796 : os_file_create_directory(
797 : /*=====================*/
798 : /* out: TRUE if call succeeds,
799 : FALSE on error */
800 : const char* pathname, /* in: directory name as
801 : null-terminated string */
802 : ibool fail_if_exists) /* in: if TRUE, pre-existing directory
803 : is treated as an error. */
804 0 : {
805 : #ifdef __WIN__
806 : BOOL rcode;
807 :
808 : rcode = CreateDirectory((LPCTSTR) pathname, NULL);
809 : if (!(rcode != 0
810 : || (GetLastError() == ERROR_ALREADY_EXISTS
811 : && !fail_if_exists))) {
812 : /* failure */
813 : os_file_handle_error(pathname, "CreateDirectory");
814 :
815 : return(FALSE);
816 : }
817 :
818 : return (TRUE);
819 : #else
820 : int rcode;
821 :
822 0 : rcode = mkdir(pathname, 0770);
823 :
824 0 : if (!(rcode == 0 || (errno == EEXIST && !fail_if_exists))) {
825 : /* failure */
826 0 : os_file_handle_error(pathname, "mkdir");
827 :
828 0 : return(FALSE);
829 : }
830 :
831 0 : return (TRUE);
832 : #endif
833 : }
834 :
835 : /********************************************************************
836 : A simple function to open or create a file. */
837 : UNIV_INTERN
838 : os_file_t
839 : os_file_create_simple(
840 : /*==================*/
841 : /* out, own: handle to the file, not defined
842 : if error, error number can be retrieved with
843 : os_file_get_last_error */
844 : const char* name, /* in: name of the file or path as a
845 : null-terminated string */
846 : ulint create_mode,/* in: OS_FILE_OPEN if an existing file is
847 : opened (if does not exist, error), or
848 : OS_FILE_CREATE if a new file is created
849 : (if exists, error), or
850 : OS_FILE_CREATE_PATH if new file
851 : (if exists, error) and subdirectories along
852 : its path are created (if needed)*/
853 : ulint access_type,/* in: OS_FILE_READ_ONLY or
854 : OS_FILE_READ_WRITE */
855 : ibool* success)/* out: TRUE if succeed, FALSE if error */
856 0 : {
857 : #ifdef __WIN__
858 : os_file_t file;
859 : DWORD create_flag;
860 : DWORD access;
861 : DWORD attributes = 0;
862 : ibool retry;
863 :
864 : try_again:
865 : ut_a(name);
866 :
867 : if (create_mode == OS_FILE_OPEN) {
868 : create_flag = OPEN_EXISTING;
869 : } else if (create_mode == OS_FILE_CREATE) {
870 : create_flag = CREATE_NEW;
871 : } else if (create_mode == OS_FILE_CREATE_PATH) {
872 : /* create subdirs along the path if needed */
873 : *success = os_file_create_subdirs_if_needed(name);
874 : if (!*success) {
875 : ut_error;
876 : }
877 : create_flag = CREATE_NEW;
878 : create_mode = OS_FILE_CREATE;
879 : } else {
880 : create_flag = 0;
881 : ut_error;
882 : }
883 :
884 : if (access_type == OS_FILE_READ_ONLY) {
885 : access = GENERIC_READ;
886 : } else if (access_type == OS_FILE_READ_WRITE) {
887 : access = GENERIC_READ | GENERIC_WRITE;
888 : } else {
889 : access = 0;
890 : ut_error;
891 : }
892 :
893 : file = CreateFile((LPCTSTR) name,
894 : access,
895 : FILE_SHARE_READ | FILE_SHARE_WRITE,
896 : /* file can be read and written also
897 : by other processes */
898 : NULL, /* default security attributes */
899 : create_flag,
900 : attributes,
901 : NULL); /* no template file */
902 :
903 : if (file == INVALID_HANDLE_VALUE) {
904 : *success = FALSE;
905 :
906 : retry = os_file_handle_error(name,
907 : create_mode == OS_FILE_OPEN ?
908 : "open" : "create");
909 : if (retry) {
910 : goto try_again;
911 : }
912 : } else {
913 : *success = TRUE;
914 : }
915 :
916 : return(file);
917 : #else /* __WIN__ */
918 : os_file_t file;
919 : int create_flag;
920 : ibool retry;
921 :
922 0 : try_again:
923 0 : ut_a(name);
924 :
925 0 : if (create_mode == OS_FILE_OPEN) {
926 0 : if (access_type == OS_FILE_READ_ONLY) {
927 0 : create_flag = O_RDONLY;
928 : } else {
929 0 : create_flag = O_RDWR;
930 : }
931 0 : } else if (create_mode == OS_FILE_CREATE) {
932 0 : create_flag = O_RDWR | O_CREAT | O_EXCL;
933 0 : } else if (create_mode == OS_FILE_CREATE_PATH) {
934 : /* create subdirs along the path if needed */
935 0 : *success = os_file_create_subdirs_if_needed(name);
936 0 : if (!*success) {
937 0 : return (-1);
938 : }
939 0 : create_flag = O_RDWR | O_CREAT | O_EXCL;
940 0 : create_mode = OS_FILE_CREATE;
941 : } else {
942 0 : create_flag = 0;
943 0 : ut_error;
944 : }
945 :
946 0 : if (create_mode == OS_FILE_CREATE) {
947 0 : file = open(name, create_flag, S_IRUSR | S_IWUSR
948 : | S_IRGRP | S_IWGRP);
949 : } else {
950 0 : file = open(name, create_flag);
951 : }
952 :
953 0 : if (file == -1) {
954 0 : *success = FALSE;
955 :
956 0 : retry = os_file_handle_error(name,
957 : create_mode == OS_FILE_OPEN ?
958 : "open" : "create");
959 0 : if (retry) {
960 0 : goto try_again;
961 : }
962 : #ifdef USE_FILE_LOCK
963 0 : } else if (access_type == OS_FILE_READ_WRITE
964 : && os_file_lock(file, name)) {
965 0 : *success = FALSE;
966 0 : close(file);
967 0 : file = -1;
968 : #endif
969 : } else {
970 0 : *success = TRUE;
971 : }
972 :
973 0 : return(file);
974 : #endif /* __WIN__ */
975 : }
976 :
977 : /********************************************************************
978 : A simple function to open or create a file. */
979 : UNIV_INTERN
980 : os_file_t
981 : os_file_create_simple_no_error_handling(
982 : /*====================================*/
983 : /* out, own: handle to the file, not defined
984 : if error, error number can be retrieved with
985 : os_file_get_last_error */
986 : const char* name, /* in: name of the file or path as a
987 : null-terminated string */
988 : ulint create_mode,/* in: OS_FILE_OPEN if an existing file
989 : is opened (if does not exist, error), or
990 : OS_FILE_CREATE if a new file is created
991 : (if exists, error) */
992 : ulint access_type,/* in: OS_FILE_READ_ONLY,
993 : OS_FILE_READ_WRITE, or
994 : OS_FILE_READ_ALLOW_DELETE; the last option is
995 : used by a backup program reading the file */
996 : ibool* success)/* out: TRUE if succeed, FALSE if error */
997 0 : {
998 : #ifdef __WIN__
999 : os_file_t file;
1000 : DWORD create_flag;
1001 : DWORD access;
1002 : DWORD attributes = 0;
1003 : DWORD share_mode = FILE_SHARE_READ | FILE_SHARE_WRITE;
1004 :
1005 : ut_a(name);
1006 :
1007 : if (create_mode == OS_FILE_OPEN) {
1008 : create_flag = OPEN_EXISTING;
1009 : } else if (create_mode == OS_FILE_CREATE) {
1010 : create_flag = CREATE_NEW;
1011 : } else {
1012 : create_flag = 0;
1013 : ut_error;
1014 : }
1015 :
1016 : if (access_type == OS_FILE_READ_ONLY) {
1017 : access = GENERIC_READ;
1018 : } else if (access_type == OS_FILE_READ_WRITE) {
1019 : access = GENERIC_READ | GENERIC_WRITE;
1020 : } else if (access_type == OS_FILE_READ_ALLOW_DELETE) {
1021 : access = GENERIC_READ;
1022 : share_mode = FILE_SHARE_DELETE | FILE_SHARE_READ
1023 : | FILE_SHARE_WRITE; /* A backup program has to give
1024 : mysqld the maximum freedom to
1025 : do what it likes with the
1026 : file */
1027 : } else {
1028 : access = 0;
1029 : ut_error;
1030 : }
1031 :
1032 : file = CreateFile((LPCTSTR) name,
1033 : access,
1034 : share_mode,
1035 : NULL, /* default security attributes */
1036 : create_flag,
1037 : attributes,
1038 : NULL); /* no template file */
1039 :
1040 : if (file == INVALID_HANDLE_VALUE) {
1041 : *success = FALSE;
1042 : } else {
1043 : *success = TRUE;
1044 : }
1045 :
1046 : return(file);
1047 : #else /* __WIN__ */
1048 : os_file_t file;
1049 : int create_flag;
1050 :
1051 0 : ut_a(name);
1052 :
1053 0 : if (create_mode == OS_FILE_OPEN) {
1054 0 : if (access_type == OS_FILE_READ_ONLY) {
1055 0 : create_flag = O_RDONLY;
1056 : } else {
1057 0 : create_flag = O_RDWR;
1058 : }
1059 0 : } else if (create_mode == OS_FILE_CREATE) {
1060 0 : create_flag = O_RDWR | O_CREAT | O_EXCL;
1061 : } else {
1062 0 : create_flag = 0;
1063 0 : ut_error;
1064 : }
1065 :
1066 0 : if (create_mode == OS_FILE_CREATE) {
1067 0 : file = open(name, create_flag, S_IRUSR | S_IWUSR
1068 : | S_IRGRP | S_IWGRP);
1069 : } else {
1070 0 : file = open(name, create_flag);
1071 : }
1072 :
1073 0 : if (file == -1) {
1074 0 : *success = FALSE;
1075 : #ifdef USE_FILE_LOCK
1076 0 : } else if (access_type == OS_FILE_READ_WRITE
1077 : && os_file_lock(file, name)) {
1078 0 : *success = FALSE;
1079 0 : close(file);
1080 0 : file = -1;
1081 : #endif
1082 : } else {
1083 0 : *success = TRUE;
1084 : }
1085 :
1086 0 : return(file);
1087 : #endif /* __WIN__ */
1088 : }
1089 :
1090 : /********************************************************************
1091 : Tries to disable OS caching on an opened file descriptor. */
1092 : UNIV_INTERN
1093 : void
1094 : os_file_set_nocache(
1095 : /*================*/
1096 : int fd, /* in: file descriptor to alter */
1097 : const char* file_name, /* in: file name, used in the
1098 : diagnostic message */
1099 : const char* operation_name) /* in: "open" or "create"; used in the
1100 : diagnostic message */
1101 0 : {
1102 : /* some versions of Solaris may not have DIRECTIO_ON */
1103 : #if defined(UNIV_SOLARIS) && defined(DIRECTIO_ON)
1104 : if (directio(fd, DIRECTIO_ON) == -1) {
1105 : int errno_save;
1106 : errno_save = (int)errno;
1107 : ut_print_timestamp(stderr);
1108 : fprintf(stderr,
1109 : " InnoDB: Failed to set DIRECTIO_ON "
1110 : "on file %s: %s: %s, continuing anyway\n",
1111 : file_name, operation_name, strerror(errno_save));
1112 : }
1113 : #elif defined(O_DIRECT)
1114 0 : if (fcntl(fd, F_SETFL, O_DIRECT) == -1) {
1115 : int errno_save;
1116 0 : errno_save = (int)errno;
1117 0 : ut_print_timestamp(stderr);
1118 0 : fprintf(stderr,
1119 : " InnoDB: Failed to set O_DIRECT "
1120 : "on file %s: %s: %s, continuing anyway\n",
1121 : file_name, operation_name, strerror(errno_save));
1122 0 : if (errno_save == EINVAL) {
1123 0 : ut_print_timestamp(stderr);
1124 0 : fprintf(stderr,
1125 : " InnoDB: O_DIRECT is known to result in "
1126 : "'Invalid argument' on Linux on tmpfs, "
1127 : "see MySQL Bug#26662\n");
1128 : }
1129 : }
1130 : #else /* Required for OSX */
1131 : (void)fd;
1132 : (void)file_name;
1133 : (void)operation_name;
1134 : #endif
1135 0 : }
1136 :
1137 : /********************************************************************
1138 : Opens an existing file or creates a new. */
1139 : UNIV_INTERN
1140 : os_file_t
1141 : os_file_create(
1142 : /*===========*/
1143 : /* out, own: handle to the file, not defined
1144 : if error, error number can be retrieved with
1145 : os_file_get_last_error */
1146 : const char* name, /* in: name of the file or path as a
1147 : null-terminated string */
1148 : ulint create_mode,/* in: OS_FILE_OPEN if an existing file
1149 : is opened (if does not exist, error), or
1150 : OS_FILE_CREATE if a new file is created
1151 : (if exists, error),
1152 : OS_FILE_OVERWRITE if a new file is created
1153 : or an old overwritten;
1154 : OS_FILE_OPEN_RAW, if a raw device or disk
1155 : partition should be opened */
1156 : ulint purpose,/* in: OS_FILE_AIO, if asynchronous,
1157 : non-buffered i/o is desired,
1158 : OS_FILE_NORMAL, if any normal file;
1159 : NOTE that it also depends on type, os_aio_..
1160 : and srv_.. variables whether we really use
1161 : async i/o or unbuffered i/o: look in the
1162 : function source code for the exact rules */
1163 : ulint type, /* in: OS_DATA_FILE or OS_LOG_FILE */
1164 : ibool* success)/* out: TRUE if succeed, FALSE if error */
1165 183 : {
1166 : #ifdef __WIN__
1167 : os_file_t file;
1168 : DWORD share_mode = FILE_SHARE_READ;
1169 : DWORD create_flag;
1170 : DWORD attributes;
1171 : ibool retry;
1172 : try_again:
1173 : ut_a(name);
1174 :
1175 : if (create_mode == OS_FILE_OPEN_RAW) {
1176 : create_flag = OPEN_EXISTING;
1177 : share_mode = FILE_SHARE_WRITE;
1178 : } else if (create_mode == OS_FILE_OPEN
1179 : || create_mode == OS_FILE_OPEN_RETRY) {
1180 : create_flag = OPEN_EXISTING;
1181 : } else if (create_mode == OS_FILE_CREATE) {
1182 : create_flag = CREATE_NEW;
1183 : } else if (create_mode == OS_FILE_OVERWRITE) {
1184 : create_flag = CREATE_ALWAYS;
1185 : } else {
1186 : create_flag = 0;
1187 : ut_error;
1188 : }
1189 :
1190 : if (purpose == OS_FILE_AIO) {
1191 : /* If specified, use asynchronous (overlapped) io and no
1192 : buffering of writes in the OS */
1193 : attributes = 0;
1194 : #ifdef WIN_ASYNC_IO
1195 : if (os_aio_use_native_aio) {
1196 : attributes = attributes | FILE_FLAG_OVERLAPPED;
1197 : }
1198 : #endif
1199 : #ifdef UNIV_NON_BUFFERED_IO
1200 : if (type == OS_LOG_FILE && srv_flush_log_at_trx_commit == 2) {
1201 : /* Do not use unbuffered i/o to log files because
1202 : value 2 denotes that we do not flush the log at every
1203 : commit, but only once per second */
1204 : } else if (srv_win_file_flush_method
1205 : == SRV_WIN_IO_UNBUFFERED) {
1206 : attributes = attributes | FILE_FLAG_NO_BUFFERING;
1207 : }
1208 : #endif
1209 : } else if (purpose == OS_FILE_NORMAL) {
1210 : attributes = 0;
1211 : #ifdef UNIV_NON_BUFFERED_IO
1212 : if (type == OS_LOG_FILE && srv_flush_log_at_trx_commit == 2) {
1213 : /* Do not use unbuffered i/o to log files because
1214 : value 2 denotes that we do not flush the log at every
1215 : commit, but only once per second */
1216 : } else if (srv_win_file_flush_method
1217 : == SRV_WIN_IO_UNBUFFERED) {
1218 : attributes = attributes | FILE_FLAG_NO_BUFFERING;
1219 : }
1220 : #endif
1221 : } else {
1222 : attributes = 0;
1223 : ut_error;
1224 : }
1225 :
1226 : file = CreateFile((LPCTSTR) name,
1227 : GENERIC_READ | GENERIC_WRITE, /* read and write
1228 : access */
1229 : share_mode, /* File can be read also by other
1230 : processes; we must give the read
1231 : permission because of ibbackup. We do
1232 : not give the write permission to
1233 : others because if one would succeed to
1234 : start 2 instances of mysqld on the
1235 : SAME files, that could cause severe
1236 : database corruption! When opening
1237 : raw disk partitions, Microsoft manuals
1238 : say that we must give also the write
1239 : permission. */
1240 : NULL, /* default security attributes */
1241 : create_flag,
1242 : attributes,
1243 : NULL); /* no template file */
1244 :
1245 : if (file == INVALID_HANDLE_VALUE) {
1246 : *success = FALSE;
1247 :
1248 : /* When srv_file_per_table is on, file creation failure may not
1249 : be critical to the whole instance. Do not crash the server in
1250 : case of unknown errors. */
1251 : if (srv_file_per_table) {
1252 : retry = os_file_handle_error_no_exit(name,
1253 : create_mode == OS_FILE_CREATE ?
1254 : "create" : "open");
1255 : } else {
1256 : retry = os_file_handle_error(name,
1257 : create_mode == OS_FILE_CREATE ?
1258 : "create" : "open");
1259 : }
1260 :
1261 : if (retry) {
1262 : goto try_again;
1263 : }
1264 : } else {
1265 : *success = TRUE;
1266 : }
1267 :
1268 : return(file);
1269 : #else /* __WIN__ */
1270 : os_file_t file;
1271 : int create_flag;
1272 : ibool retry;
1273 183 : const char* mode_str = NULL;
1274 183 : const char* type_str = NULL;
1275 183 : const char* purpose_str = NULL;
1276 :
1277 183 : try_again:
1278 183 : ut_a(name);
1279 :
1280 303 : if (create_mode == OS_FILE_OPEN || create_mode == OS_FILE_OPEN_RAW
1281 : || create_mode == OS_FILE_OPEN_RETRY) {
1282 120 : mode_str = "OPEN";
1283 120 : create_flag = O_RDWR;
1284 63 : } else if (create_mode == OS_FILE_CREATE) {
1285 63 : mode_str = "CREATE";
1286 63 : create_flag = O_RDWR | O_CREAT | O_EXCL;
1287 0 : } else if (create_mode == OS_FILE_OVERWRITE) {
1288 0 : mode_str = "OVERWRITE";
1289 0 : create_flag = O_RDWR | O_CREAT | O_TRUNC;
1290 : } else {
1291 0 : create_flag = 0;
1292 0 : ut_error;
1293 : }
1294 :
1295 183 : if (type == OS_LOG_FILE) {
1296 122 : type_str = "LOG";
1297 61 : } else if (type == OS_DATA_FILE) {
1298 61 : type_str = "DATA";
1299 : } else {
1300 0 : ut_error;
1301 : }
1302 :
1303 183 : if (purpose == OS_FILE_AIO) {
1304 101 : purpose_str = "AIO";
1305 82 : } else if (purpose == OS_FILE_NORMAL) {
1306 82 : purpose_str = "NORMAL";
1307 : } else {
1308 0 : ut_error;
1309 : }
1310 :
1311 : #if 0
1312 : fprintf(stderr, "Opening file %s, mode %s, type %s, purpose %s\n",
1313 : name, mode_str, type_str, purpose_str);
1314 : #endif
1315 : #ifdef O_SYNC
1316 : /* We let O_SYNC only affect log files; note that we map O_DSYNC to
1317 : O_SYNC because the datasync options seemed to corrupt files in 2001
1318 : in both Linux and Solaris */
1319 183 : if (type == OS_LOG_FILE
1320 : && srv_unix_file_flush_method == SRV_UNIX_O_DSYNC) {
1321 :
1322 : # if 0
1323 : fprintf(stderr, "Using O_SYNC for file %s\n", name);
1324 : # endif
1325 :
1326 0 : create_flag = create_flag | O_SYNC;
1327 : }
1328 : #endif /* O_SYNC */
1329 :
1330 183 : file = open(name, create_flag, os_innodb_umask);
1331 :
1332 183 : if (file == -1) {
1333 57 : *success = FALSE;
1334 :
1335 : /* When srv_file_per_table is on, file creation failure may not
1336 : be critical to the whole instance. Do not crash the server in
1337 : case of unknown errors. */
1338 57 : if (srv_file_per_table) {
1339 0 : retry = os_file_handle_error_no_exit(name,
1340 : create_mode == OS_FILE_CREATE ?
1341 : "create" : "open");
1342 : } else {
1343 57 : retry = os_file_handle_error(name,
1344 : create_mode == OS_FILE_CREATE ?
1345 : "create" : "open");
1346 : }
1347 :
1348 57 : if (retry) {
1349 0 : goto try_again;
1350 : } else {
1351 57 : return(file /* -1 */);
1352 : }
1353 : }
1354 : /* else */
1355 :
1356 126 : *success = TRUE;
1357 :
1358 : /* We disable OS caching (O_DIRECT) only on data files */
1359 126 : if (type != OS_LOG_FILE
1360 : && srv_unix_file_flush_method == SRV_UNIX_O_DIRECT) {
1361 :
1362 0 : os_file_set_nocache(file, name, mode_str);
1363 : }
1364 :
1365 : #ifdef USE_FILE_LOCK
1366 126 : if (create_mode != OS_FILE_OPEN_RAW && os_file_lock(file, name)) {
1367 :
1368 0 : if (create_mode == OS_FILE_OPEN_RETRY) {
1369 : int i;
1370 0 : ut_print_timestamp(stderr);
1371 0 : fputs(" InnoDB: Retrying to lock"
1372 : " the first data file\n",
1373 : stderr);
1374 0 : for (i = 0; i < 100; i++) {
1375 0 : os_thread_sleep(1000000);
1376 0 : if (!os_file_lock(file, name)) {
1377 0 : *success = TRUE;
1378 0 : return(file);
1379 : }
1380 : }
1381 0 : ut_print_timestamp(stderr);
1382 0 : fputs(" InnoDB: Unable to open the first data file\n",
1383 : stderr);
1384 : }
1385 :
1386 0 : *success = FALSE;
1387 0 : close(file);
1388 0 : file = -1;
1389 : }
1390 : #endif /* USE_FILE_LOCK */
1391 :
1392 126 : return(file);
1393 : #endif /* __WIN__ */
1394 : }
1395 :
1396 : /***************************************************************************
1397 : Deletes a file if it exists. The file has to be closed before calling this. */
1398 : UNIV_INTERN
1399 : ibool
1400 : os_file_delete_if_exists(
1401 : /*=====================*/
1402 : /* out: TRUE if success */
1403 : const char* name) /* in: file path as a null-terminated string */
1404 0 : {
1405 : #ifdef __WIN__
1406 : BOOL ret;
1407 : ulint count = 0;
1408 : loop:
1409 : /* In Windows, deleting an .ibd file may fail if ibbackup is copying
1410 : it */
1411 :
1412 : ret = DeleteFile((LPCTSTR)name);
1413 :
1414 : if (ret) {
1415 : return(TRUE);
1416 : }
1417 :
1418 : if (GetLastError() == ERROR_FILE_NOT_FOUND) {
1419 : /* the file does not exist, this not an error */
1420 :
1421 : return(TRUE);
1422 : }
1423 :
1424 : count++;
1425 :
1426 : if (count > 100 && 0 == (count % 10)) {
1427 : fprintf(stderr,
1428 : "InnoDB: Warning: cannot delete file %s\n"
1429 : "InnoDB: Are you running ibbackup"
1430 : " to back up the file?\n", name);
1431 :
1432 : os_file_get_last_error(TRUE); /* print error information */
1433 : }
1434 :
1435 : os_thread_sleep(1000000); /* sleep for a second */
1436 :
1437 : if (count > 2000) {
1438 :
1439 : return(FALSE);
1440 : }
1441 :
1442 : goto loop;
1443 : #else
1444 : int ret;
1445 :
1446 0 : ret = unlink(name);
1447 :
1448 0 : if (ret != 0 && errno != ENOENT) {
1449 0 : os_file_handle_error_no_exit(name, "delete");
1450 :
1451 0 : return(FALSE);
1452 : }
1453 :
1454 0 : return(TRUE);
1455 : #endif
1456 : }
1457 :
1458 : /***************************************************************************
1459 : Deletes a file. The file has to be closed before calling this. */
1460 : UNIV_INTERN
1461 : ibool
1462 : os_file_delete(
1463 : /*===========*/
1464 : /* out: TRUE if success */
1465 : const char* name) /* in: file path as a null-terminated string */
1466 0 : {
1467 : #ifdef __WIN__
1468 : BOOL ret;
1469 : ulint count = 0;
1470 : loop:
1471 : /* In Windows, deleting an .ibd file may fail if ibbackup is copying
1472 : it */
1473 :
1474 : ret = DeleteFile((LPCTSTR)name);
1475 :
1476 : if (ret) {
1477 : return(TRUE);
1478 : }
1479 :
1480 : if (GetLastError() == ERROR_FILE_NOT_FOUND) {
1481 : /* If the file does not exist, we classify this as a 'mild'
1482 : error and return */
1483 :
1484 : return(FALSE);
1485 : }
1486 :
1487 : count++;
1488 :
1489 : if (count > 100 && 0 == (count % 10)) {
1490 : fprintf(stderr,
1491 : "InnoDB: Warning: cannot delete file %s\n"
1492 : "InnoDB: Are you running ibbackup"
1493 : " to back up the file?\n", name);
1494 :
1495 : os_file_get_last_error(TRUE); /* print error information */
1496 : }
1497 :
1498 : os_thread_sleep(1000000); /* sleep for a second */
1499 :
1500 : if (count > 2000) {
1501 :
1502 : return(FALSE);
1503 : }
1504 :
1505 : goto loop;
1506 : #else
1507 : int ret;
1508 :
1509 0 : ret = unlink(name);
1510 :
1511 0 : if (ret != 0) {
1512 0 : os_file_handle_error_no_exit(name, "delete");
1513 :
1514 0 : return(FALSE);
1515 : }
1516 :
1517 0 : return(TRUE);
1518 : #endif
1519 : }
1520 :
1521 : /***************************************************************************
1522 : Renames a file (can also move it to another directory). It is safest that the
1523 : file is closed before calling this function. */
1524 : UNIV_INTERN
1525 : ibool
1526 : os_file_rename(
1527 : /*===========*/
1528 : /* out: TRUE if success */
1529 : const char* oldpath,/* in: old file path as a null-terminated
1530 : string */
1531 : const char* newpath)/* in: new file path */
1532 0 : {
1533 : #ifdef __WIN__
1534 : BOOL ret;
1535 :
1536 : ret = MoveFile((LPCTSTR)oldpath, (LPCTSTR)newpath);
1537 :
1538 : if (ret) {
1539 : return(TRUE);
1540 : }
1541 :
1542 : os_file_handle_error_no_exit(oldpath, "rename");
1543 :
1544 : return(FALSE);
1545 : #else
1546 : int ret;
1547 :
1548 0 : ret = rename(oldpath, newpath);
1549 :
1550 0 : if (ret != 0) {
1551 0 : os_file_handle_error_no_exit(oldpath, "rename");
1552 :
1553 0 : return(FALSE);
1554 : }
1555 :
1556 0 : return(TRUE);
1557 : #endif
1558 : }
1559 :
1560 : /***************************************************************************
1561 : Closes a file handle. In case of error, error number can be retrieved with
1562 : os_file_get_last_error. */
1563 : UNIV_INTERN
1564 : ibool
1565 : os_file_close(
1566 : /*==========*/
1567 : /* out: TRUE if success */
1568 : os_file_t file) /* in, own: handle to a file */
1569 126 : {
1570 : #ifdef __WIN__
1571 : BOOL ret;
1572 :
1573 : ut_a(file);
1574 :
1575 : ret = CloseHandle(file);
1576 :
1577 : if (ret) {
1578 : return(TRUE);
1579 : }
1580 :
1581 : os_file_handle_error(NULL, "close");
1582 :
1583 : return(FALSE);
1584 : #else
1585 : int ret;
1586 :
1587 126 : ret = close(file);
1588 :
1589 126 : if (ret == -1) {
1590 0 : os_file_handle_error(NULL, "close");
1591 :
1592 0 : return(FALSE);
1593 : }
1594 :
1595 126 : return(TRUE);
1596 : #endif
1597 : }
1598 :
1599 : /***************************************************************************
1600 : Closes a file handle. */
1601 : UNIV_INTERN
1602 : ibool
1603 : os_file_close_no_error_handling(
1604 : /*============================*/
1605 : /* out: TRUE if success */
1606 : os_file_t file) /* in, own: handle to a file */
1607 0 : {
1608 : #ifdef __WIN__
1609 : BOOL ret;
1610 :
1611 : ut_a(file);
1612 :
1613 : ret = CloseHandle(file);
1614 :
1615 : if (ret) {
1616 : return(TRUE);
1617 : }
1618 :
1619 : return(FALSE);
1620 : #else
1621 : int ret;
1622 :
1623 0 : ret = close(file);
1624 :
1625 0 : if (ret == -1) {
1626 :
1627 0 : return(FALSE);
1628 : }
1629 :
1630 0 : return(TRUE);
1631 : #endif
1632 : }
1633 :
1634 : /***************************************************************************
1635 : Gets a file size. */
1636 : UNIV_INTERN
1637 : ibool
1638 : os_file_get_size(
1639 : /*=============*/
1640 : /* out: TRUE if success */
1641 : os_file_t file, /* in: handle to a file */
1642 : ulint* size, /* out: least significant 32 bits of file
1643 : size */
1644 : ulint* size_high)/* out: most significant 32 bits of size */
1645 57 : {
1646 : #ifdef __WIN__
1647 : DWORD high;
1648 : DWORD low;
1649 :
1650 : low = GetFileSize(file, &high);
1651 :
1652 : if ((low == 0xFFFFFFFF) && (GetLastError() != NO_ERROR)) {
1653 : return(FALSE);
1654 : }
1655 :
1656 : *size = low;
1657 : *size_high = high;
1658 :
1659 : return(TRUE);
1660 : #else
1661 : off_t offs;
1662 :
1663 57 : offs = lseek(file, 0, SEEK_END);
1664 :
1665 57 : if (offs == ((off_t)-1)) {
1666 :
1667 0 : return(FALSE);
1668 : }
1669 :
1670 : if (sizeof(off_t) > 4) {
1671 57 : *size = (ulint)(offs & 0xFFFFFFFFUL);
1672 57 : *size_high = (ulint)(offs >> 32);
1673 : } else {
1674 : *size = (ulint) offs;
1675 : *size_high = 0;
1676 : }
1677 :
1678 57 : return(TRUE);
1679 : #endif
1680 : }
1681 :
1682 : /***************************************************************************
1683 : Gets file size as a 64-bit integer ib_int64_t. */
1684 : UNIV_INTERN
1685 : ib_int64_t
1686 : os_file_get_size_as_iblonglong(
1687 : /*===========================*/
1688 : /* out: size in bytes, -1 if error */
1689 : os_file_t file) /* in: handle to a file */
1690 0 : {
1691 : ulint size;
1692 : ulint size_high;
1693 : ibool success;
1694 :
1695 0 : success = os_file_get_size(file, &size, &size_high);
1696 :
1697 0 : if (!success) {
1698 :
1699 0 : return(-1);
1700 : }
1701 :
1702 0 : return((((ib_int64_t)size_high) << 32) + (ib_int64_t)size);
1703 : }
1704 :
1705 : /***************************************************************************
1706 : Write the specified number of zeros to a newly created file. */
1707 : UNIV_INTERN
1708 : ibool
1709 : os_file_set_size(
1710 : /*=============*/
1711 : /* out: TRUE if success */
1712 : const char* name, /* in: name of the file or path as a
1713 : null-terminated string */
1714 : os_file_t file, /* in: handle to a file */
1715 : ulint size, /* in: least significant 32 bits of file
1716 : size */
1717 : ulint size_high)/* in: most significant 32 bits of size */
1718 6 : {
1719 : ib_int64_t current_size;
1720 : ib_int64_t desired_size;
1721 : ibool ret;
1722 : byte* buf;
1723 : byte* buf2;
1724 : ulint buf_size;
1725 :
1726 6 : ut_a(size == (size & 0xFFFFFFFF));
1727 :
1728 6 : current_size = 0;
1729 6 : desired_size = (ib_int64_t)size + (((ib_int64_t)size_high) << 32);
1730 :
1731 : /* Write up to 1 megabyte at a time. */
1732 6 : buf_size = ut_min(64, (ulint) (desired_size / UNIV_PAGE_SIZE))
1733 : * UNIV_PAGE_SIZE;
1734 6 : buf2 = ut_malloc(buf_size + UNIV_PAGE_SIZE);
1735 :
1736 : /* Align the buffer for possible raw i/o */
1737 6 : buf = ut_align(buf2, UNIV_PAGE_SIZE);
1738 :
1739 : /* Write buffer full of zeros */
1740 6 : memset(buf, 0, buf_size);
1741 :
1742 6 : if (desired_size >= (ib_int64_t)(100 * 1024 * 1024)) {
1743 :
1744 0 : fprintf(stderr, "InnoDB: Progress in MB:");
1745 : }
1746 :
1747 52 : while (current_size < desired_size) {
1748 : ulint n_bytes;
1749 :
1750 40 : if (desired_size - current_size < (ib_int64_t) buf_size) {
1751 0 : n_bytes = (ulint) (desired_size - current_size);
1752 : } else {
1753 40 : n_bytes = buf_size;
1754 : }
1755 :
1756 40 : ret = os_file_write(name, file, buf,
1757 : (ulint)(current_size & 0xFFFFFFFF),
1758 : (ulint)(current_size >> 32),
1759 : n_bytes);
1760 40 : if (!ret) {
1761 0 : ut_free(buf2);
1762 0 : goto error_handling;
1763 : }
1764 :
1765 : /* Print about progress for each 100 MB written */
1766 40 : if ((ib_int64_t) (current_size + n_bytes) / (ib_int64_t)(100 * 1024 * 1024)
1767 : != current_size / (ib_int64_t)(100 * 1024 * 1024)) {
1768 :
1769 0 : fprintf(stderr, " %lu00",
1770 : (ulong) ((current_size + n_bytes)
1771 : / (ib_int64_t)(100 * 1024 * 1024)));
1772 : }
1773 :
1774 40 : current_size += n_bytes;
1775 : }
1776 :
1777 6 : if (desired_size >= (ib_int64_t)(100 * 1024 * 1024)) {
1778 :
1779 0 : fprintf(stderr, "\n");
1780 : }
1781 :
1782 6 : ut_free(buf2);
1783 :
1784 6 : ret = os_file_flush(file);
1785 :
1786 6 : if (ret) {
1787 6 : return(TRUE);
1788 : }
1789 :
1790 0 : error_handling:
1791 0 : return(FALSE);
1792 : }
1793 :
1794 : /***************************************************************************
1795 : Truncates a file at its current position. */
1796 : UNIV_INTERN
1797 : ibool
1798 : os_file_set_eof(
1799 : /*============*/
1800 : /* out: TRUE if success */
1801 : FILE* file) /* in: file to be truncated */
1802 21 : {
1803 : #ifdef __WIN__
1804 : HANDLE h = (HANDLE) _get_osfhandle(fileno(file));
1805 : return(SetEndOfFile(h));
1806 : #else /* __WIN__ */
1807 21 : return(!ftruncate(fileno(file), ftell(file)));
1808 : #endif /* __WIN__ */
1809 : }
1810 :
1811 : #ifndef __WIN__
1812 : /***************************************************************************
1813 : Wrapper to fsync(2) that retries the call on some errors.
1814 : Returns the value 0 if successful; otherwise the value -1 is returned and
1815 : the global variable errno is set to indicate the error. */
1816 :
1817 : static
1818 : int
1819 : os_file_fsync(
1820 : /*==========*/
1821 : /* out: 0 if success, -1 otherwise */
1822 : os_file_t file) /* in: handle to a file */
1823 57662 : {
1824 : int ret;
1825 : int failures;
1826 : ibool retry;
1827 :
1828 57662 : failures = 0;
1829 :
1830 : do {
1831 57662 : ret = fsync(file);
1832 :
1833 57662 : os_n_fsyncs++;
1834 :
1835 57662 : if (ret == -1 && errno == ENOLCK) {
1836 :
1837 0 : if (failures % 100 == 0) {
1838 :
1839 0 : ut_print_timestamp(stderr);
1840 0 : fprintf(stderr,
1841 : " InnoDB: fsync(): "
1842 : "No locks available; retrying\n");
1843 : }
1844 :
1845 0 : os_thread_sleep(200000 /* 0.2 sec */);
1846 :
1847 0 : failures++;
1848 :
1849 0 : retry = TRUE;
1850 : } else {
1851 :
1852 57662 : retry = FALSE;
1853 : }
1854 57662 : } while (retry);
1855 :
1856 57662 : return(ret);
1857 : }
1858 : #endif /* !__WIN__ */
1859 :
1860 : /***************************************************************************
1861 : Flushes the write buffers of a given file to the disk. */
1862 : UNIV_INTERN
1863 : ibool
1864 : os_file_flush(
1865 : /*==========*/
1866 : /* out: TRUE if success */
1867 : os_file_t file) /* in, own: handle to a file */
1868 57662 : {
1869 : #ifdef __WIN__
1870 : BOOL ret;
1871 :
1872 : ut_a(file);
1873 :
1874 : os_n_fsyncs++;
1875 :
1876 : ret = FlushFileBuffers(file);
1877 :
1878 : if (ret) {
1879 : return(TRUE);
1880 : }
1881 :
1882 : /* Since Windows returns ERROR_INVALID_FUNCTION if the 'file' is
1883 : actually a raw device, we choose to ignore that error if we are using
1884 : raw disks */
1885 :
1886 : if (srv_start_raw_disk_in_use && GetLastError()
1887 : == ERROR_INVALID_FUNCTION) {
1888 : return(TRUE);
1889 : }
1890 :
1891 : os_file_handle_error(NULL, "flush");
1892 :
1893 : /* It is a fatal error if a file flush does not succeed, because then
1894 : the database can get corrupt on disk */
1895 : ut_error;
1896 :
1897 : return(FALSE);
1898 : #else
1899 : int ret;
1900 :
1901 : #if defined(HAVE_DARWIN_THREADS)
1902 : # ifndef F_FULLFSYNC
1903 : /* The following definition is from the Mac OS X 10.3 <sys/fcntl.h> */
1904 : # define F_FULLFSYNC 51 /* fsync + ask the drive to flush to the media */
1905 : # elif F_FULLFSYNC != 51
1906 : # error "F_FULLFSYNC != 51: ABI incompatibility with Mac OS X 10.3"
1907 : # endif
1908 : /* Apple has disabled fsync() for internal disk drives in OS X. That
1909 : caused corruption for a user when he tested a power outage. Let us in
1910 : OS X use a nonstandard flush method recommended by an Apple
1911 : engineer. */
1912 :
1913 : if (!srv_have_fullfsync) {
1914 : /* If we are not on an operating system that supports this,
1915 : then fall back to a plain fsync. */
1916 :
1917 : ret = os_file_fsync(file);
1918 : } else {
1919 : ret = fcntl(file, F_FULLFSYNC, NULL);
1920 :
1921 : if (ret) {
1922 : /* If we are not on a file system that supports this,
1923 : then fall back to a plain fsync. */
1924 : ret = os_file_fsync(file);
1925 : }
1926 : }
1927 : #else
1928 57662 : ret = os_file_fsync(file);
1929 : #endif
1930 :
1931 57662 : if (ret == 0) {
1932 57662 : return(TRUE);
1933 : }
1934 :
1935 : /* Since Linux returns EINVAL if the 'file' is actually a raw device,
1936 : we choose to ignore that error if we are using raw disks */
1937 :
1938 0 : if (srv_start_raw_disk_in_use && errno == EINVAL) {
1939 :
1940 0 : return(TRUE);
1941 : }
1942 :
1943 0 : ut_print_timestamp(stderr);
1944 :
1945 0 : fprintf(stderr,
1946 : " InnoDB: Error: the OS said file flush did not succeed\n");
1947 :
1948 0 : os_file_handle_error(NULL, "flush");
1949 :
1950 : /* It is a fatal error if a file flush does not succeed, because then
1951 : the database can get corrupt on disk */
1952 0 : ut_error;
1953 :
1954 : return(FALSE);
1955 : #endif
1956 : }
1957 :
1958 : #ifndef __WIN__
1959 : /***********************************************************************
1960 : Does a synchronous read operation in Posix. */
1961 : static
1962 : ssize_t
1963 : os_file_pread(
1964 : /*==========*/
1965 : /* out: number of bytes read, -1 if error */
1966 : os_file_t file, /* in: handle to a file */
1967 : void* buf, /* in: buffer where to read */
1968 : ulint n, /* in: number of bytes to read */
1969 : ulint offset, /* in: least significant 32 bits of file
1970 : offset from where to read */
1971 : ulint offset_high) /* in: most significant 32 bits of
1972 : offset */
1973 726 : {
1974 : off_t offs;
1975 : ssize_t n_bytes;
1976 :
1977 726 : ut_a((offset & 0xFFFFFFFFUL) == offset);
1978 :
1979 : /* If off_t is > 4 bytes in size, then we assume we can pass a
1980 : 64-bit address */
1981 :
1982 : if (sizeof(off_t) > 4) {
1983 726 : offs = (off_t)offset + (((off_t)offset_high) << 32);
1984 :
1985 : } else {
1986 : offs = (off_t)offset;
1987 :
1988 : if (offset_high > 0) {
1989 : fprintf(stderr,
1990 : "InnoDB: Error: file read at offset > 4 GB\n");
1991 : }
1992 : }
1993 :
1994 726 : os_n_file_reads++;
1995 :
1996 : #if defined(HAVE_PREAD) && !defined(HAVE_BROKEN_PREAD)
1997 726 : os_mutex_enter(os_file_count_mutex);
1998 726 : os_file_n_pending_preads++;
1999 726 : os_n_pending_reads++;
2000 726 : os_mutex_exit(os_file_count_mutex);
2001 :
2002 726 : n_bytes = pread(file, buf, (ssize_t)n, offs);
2003 :
2004 726 : os_mutex_enter(os_file_count_mutex);
2005 726 : os_file_n_pending_preads--;
2006 726 : os_n_pending_reads--;
2007 726 : os_mutex_exit(os_file_count_mutex);
2008 :
2009 726 : return(n_bytes);
2010 : #else
2011 : {
2012 : off_t ret_offset;
2013 : ssize_t ret;
2014 : ulint i;
2015 :
2016 : os_mutex_enter(os_file_count_mutex);
2017 : os_n_pending_reads++;
2018 : os_mutex_exit(os_file_count_mutex);
2019 :
2020 : /* Protect the seek / read operation with a mutex */
2021 : i = ((ulint) file) % OS_FILE_N_SEEK_MUTEXES;
2022 :
2023 : os_mutex_enter(os_file_seek_mutexes[i]);
2024 :
2025 : ret_offset = lseek(file, offs, SEEK_SET);
2026 :
2027 : if (ret_offset < 0) {
2028 : ret = -1;
2029 : } else {
2030 : ret = read(file, buf, (ssize_t)n);
2031 : }
2032 :
2033 : os_mutex_exit(os_file_seek_mutexes[i]);
2034 :
2035 : os_mutex_enter(os_file_count_mutex);
2036 : os_n_pending_reads--;
2037 : os_mutex_exit(os_file_count_mutex);
2038 :
2039 : return(ret);
2040 : }
2041 : #endif
2042 : }
2043 :
2044 : /***********************************************************************
2045 : Does a synchronous write operation in Posix. */
2046 : static
2047 : ssize_t
2048 : os_file_pwrite(
2049 : /*===========*/
2050 : /* out: number of bytes written, -1 if error */
2051 : os_file_t file, /* in: handle to a file */
2052 : const void* buf, /* in: buffer from where to write */
2053 : ulint n, /* in: number of bytes to write */
2054 : ulint offset, /* in: least significant 32 bits of file
2055 : offset where to write */
2056 : ulint offset_high) /* in: most significant 32 bits of
2057 : offset */
2058 58284 : {
2059 : ssize_t ret;
2060 : off_t offs;
2061 :
2062 58284 : ut_a((offset & 0xFFFFFFFFUL) == offset);
2063 :
2064 : /* If off_t is > 4 bytes in size, then we assume we can pass a
2065 : 64-bit address */
2066 :
2067 : if (sizeof(off_t) > 4) {
2068 58284 : offs = (off_t)offset + (((off_t)offset_high) << 32);
2069 : } else {
2070 : offs = (off_t)offset;
2071 :
2072 : if (offset_high > 0) {
2073 : fprintf(stderr,
2074 : "InnoDB: Error: file write"
2075 : " at offset > 4 GB\n");
2076 : }
2077 : }
2078 :
2079 58284 : os_n_file_writes++;
2080 :
2081 : #if defined(HAVE_PWRITE) && !defined(HAVE_BROKEN_PREAD)
2082 58284 : os_mutex_enter(os_file_count_mutex);
2083 58284 : os_file_n_pending_pwrites++;
2084 58284 : os_n_pending_writes++;
2085 58284 : os_mutex_exit(os_file_count_mutex);
2086 :
2087 58284 : ret = pwrite(file, buf, (ssize_t)n, offs);
2088 :
2089 58284 : os_mutex_enter(os_file_count_mutex);
2090 58284 : os_file_n_pending_pwrites--;
2091 58284 : os_n_pending_writes--;
2092 58284 : os_mutex_exit(os_file_count_mutex);
2093 :
2094 : # ifdef UNIV_DO_FLUSH
2095 : if (srv_unix_file_flush_method != SRV_UNIX_LITTLESYNC
2096 : && srv_unix_file_flush_method != SRV_UNIX_NOSYNC
2097 : && !os_do_not_call_flush_at_each_write) {
2098 :
2099 : /* Always do fsync to reduce the probability that when
2100 : the OS crashes, a database page is only partially
2101 : physically written to disk. */
2102 :
2103 : ut_a(TRUE == os_file_flush(file));
2104 : }
2105 : # endif /* UNIV_DO_FLUSH */
2106 :
2107 58284 : return(ret);
2108 : #else
2109 : {
2110 : off_t ret_offset;
2111 : ulint i;
2112 :
2113 : os_mutex_enter(os_file_count_mutex);
2114 : os_n_pending_writes++;
2115 : os_mutex_exit(os_file_count_mutex);
2116 :
2117 : /* Protect the seek / write operation with a mutex */
2118 : i = ((ulint) file) % OS_FILE_N_SEEK_MUTEXES;
2119 :
2120 : os_mutex_enter(os_file_seek_mutexes[i]);
2121 :
2122 : ret_offset = lseek(file, offs, SEEK_SET);
2123 :
2124 : if (ret_offset < 0) {
2125 : ret = -1;
2126 :
2127 : goto func_exit;
2128 : }
2129 :
2130 : ret = write(file, buf, (ssize_t)n);
2131 :
2132 : # ifdef UNIV_DO_FLUSH
2133 : if (srv_unix_file_flush_method != SRV_UNIX_LITTLESYNC
2134 : && srv_unix_file_flush_method != SRV_UNIX_NOSYNC
2135 : && !os_do_not_call_flush_at_each_write) {
2136 :
2137 : /* Always do fsync to reduce the probability that when
2138 : the OS crashes, a database page is only partially
2139 : physically written to disk. */
2140 :
2141 : ut_a(TRUE == os_file_flush(file));
2142 : }
2143 : # endif /* UNIV_DO_FLUSH */
2144 :
2145 : func_exit:
2146 : os_mutex_exit(os_file_seek_mutexes[i]);
2147 :
2148 : os_mutex_enter(os_file_count_mutex);
2149 : os_n_pending_writes--;
2150 : os_mutex_exit(os_file_count_mutex);
2151 :
2152 : return(ret);
2153 : }
2154 : #endif
2155 : }
2156 : #endif
2157 :
2158 : /***********************************************************************
2159 : Requests a synchronous positioned read operation. */
2160 : UNIV_INTERN
2161 : ibool
2162 : os_file_read(
2163 : /*=========*/
2164 : /* out: TRUE if request was
2165 : successful, FALSE if fail */
2166 : os_file_t file, /* in: handle to a file */
2167 : void* buf, /* in: buffer where to read */
2168 : ulint offset, /* in: least significant 32 bits of file
2169 : offset where to read */
2170 : ulint offset_high, /* in: most significant 32 bits of
2171 : offset */
2172 : ulint n) /* in: number of bytes to read */
2173 726 : {
2174 : #ifdef __WIN__
2175 : BOOL ret;
2176 : DWORD len;
2177 : DWORD ret2;
2178 : DWORD low;
2179 : DWORD high;
2180 : ibool retry;
2181 : ulint i;
2182 :
2183 : ut_a((offset & 0xFFFFFFFFUL) == offset);
2184 :
2185 : os_n_file_reads++;
2186 : os_bytes_read_since_printout += n;
2187 :
2188 : try_again:
2189 : ut_ad(file);
2190 : ut_ad(buf);
2191 : ut_ad(n > 0);
2192 :
2193 : low = (DWORD) offset;
2194 : high = (DWORD) offset_high;
2195 :
2196 : os_mutex_enter(os_file_count_mutex);
2197 : os_n_pending_reads++;
2198 : os_mutex_exit(os_file_count_mutex);
2199 :
2200 : /* Protect the seek / read operation with a mutex */
2201 : i = ((ulint) file) % OS_FILE_N_SEEK_MUTEXES;
2202 :
2203 : os_mutex_enter(os_file_seek_mutexes[i]);
2204 :
2205 : ret2 = SetFilePointer(file, low, &high, FILE_BEGIN);
2206 :
2207 : if (ret2 == 0xFFFFFFFF && GetLastError() != NO_ERROR) {
2208 :
2209 : os_mutex_exit(os_file_seek_mutexes[i]);
2210 :
2211 : os_mutex_enter(os_file_count_mutex);
2212 : os_n_pending_reads--;
2213 : os_mutex_exit(os_file_count_mutex);
2214 :
2215 : goto error_handling;
2216 : }
2217 :
2218 : ret = ReadFile(file, buf, (DWORD) n, &len, NULL);
2219 :
2220 : os_mutex_exit(os_file_seek_mutexes[i]);
2221 :
2222 : os_mutex_enter(os_file_count_mutex);
2223 : os_n_pending_reads--;
2224 : os_mutex_exit(os_file_count_mutex);
2225 :
2226 : if (ret && len == n) {
2227 : return(TRUE);
2228 : }
2229 : #else
2230 : ibool retry;
2231 : ssize_t ret;
2232 :
2233 726 : os_bytes_read_since_printout += n;
2234 :
2235 726 : try_again:
2236 726 : ret = os_file_pread(file, buf, n, offset, offset_high);
2237 :
2238 726 : if ((ulint)ret == n) {
2239 :
2240 726 : return(TRUE);
2241 : }
2242 :
2243 0 : fprintf(stderr,
2244 : "InnoDB: Error: tried to read %lu bytes at offset %lu %lu.\n"
2245 : "InnoDB: Was only able to read %ld.\n",
2246 : (ulong)n, (ulong)offset_high,
2247 : (ulong)offset, (long)ret);
2248 : #endif
2249 : #ifdef __WIN__
2250 : error_handling:
2251 : #endif
2252 0 : retry = os_file_handle_error(NULL, "read");
2253 :
2254 0 : if (retry) {
2255 0 : goto try_again;
2256 : }
2257 :
2258 0 : fprintf(stderr,
2259 : "InnoDB: Fatal error: cannot read from file."
2260 : " OS error number %lu.\n",
2261 : #ifdef __WIN__
2262 : (ulong) GetLastError()
2263 : #else
2264 : (ulong) errno
2265 : #endif
2266 : );
2267 0 : fflush(stderr);
2268 :
2269 0 : ut_error;
2270 :
2271 : return(FALSE);
2272 : }
2273 :
2274 : /***********************************************************************
2275 : Requests a synchronous positioned read operation. This function does not do
2276 : any error handling. In case of error it returns FALSE. */
2277 : UNIV_INTERN
2278 : ibool
2279 : os_file_read_no_error_handling(
2280 : /*===========================*/
2281 : /* out: TRUE if request was
2282 : successful, FALSE if fail */
2283 : os_file_t file, /* in: handle to a file */
2284 : void* buf, /* in: buffer where to read */
2285 : ulint offset, /* in: least significant 32 bits of file
2286 : offset where to read */
2287 : ulint offset_high, /* in: most significant 32 bits of
2288 : offset */
2289 : ulint n) /* in: number of bytes to read */
2290 0 : {
2291 : #ifdef __WIN__
2292 : BOOL ret;
2293 : DWORD len;
2294 : DWORD ret2;
2295 : DWORD low;
2296 : DWORD high;
2297 : ibool retry;
2298 : ulint i;
2299 :
2300 : ut_a((offset & 0xFFFFFFFFUL) == offset);
2301 :
2302 : os_n_file_reads++;
2303 : os_bytes_read_since_printout += n;
2304 :
2305 : try_again:
2306 : ut_ad(file);
2307 : ut_ad(buf);
2308 : ut_ad(n > 0);
2309 :
2310 : low = (DWORD) offset;
2311 : high = (DWORD) offset_high;
2312 :
2313 : os_mutex_enter(os_file_count_mutex);
2314 : os_n_pending_reads++;
2315 : os_mutex_exit(os_file_count_mutex);
2316 :
2317 : /* Protect the seek / read operation with a mutex */
2318 : i = ((ulint) file) % OS_FILE_N_SEEK_MUTEXES;
2319 :
2320 : os_mutex_enter(os_file_seek_mutexes[i]);
2321 :
2322 : ret2 = SetFilePointer(file, low, &high, FILE_BEGIN);
2323 :
2324 : if (ret2 == 0xFFFFFFFF && GetLastError() != NO_ERROR) {
2325 :
2326 : os_mutex_exit(os_file_seek_mutexes[i]);
2327 :
2328 : os_mutex_enter(os_file_count_mutex);
2329 : os_n_pending_reads--;
2330 : os_mutex_exit(os_file_count_mutex);
2331 :
2332 : goto error_handling;
2333 : }
2334 :
2335 : ret = ReadFile(file, buf, (DWORD) n, &len, NULL);
2336 :
2337 : os_mutex_exit(os_file_seek_mutexes[i]);
2338 :
2339 : os_mutex_enter(os_file_count_mutex);
2340 : os_n_pending_reads--;
2341 : os_mutex_exit(os_file_count_mutex);
2342 :
2343 : if (ret && len == n) {
2344 : return(TRUE);
2345 : }
2346 : #else
2347 : ibool retry;
2348 : ssize_t ret;
2349 :
2350 0 : os_bytes_read_since_printout += n;
2351 :
2352 0 : try_again:
2353 0 : ret = os_file_pread(file, buf, n, offset, offset_high);
2354 :
2355 0 : if ((ulint)ret == n) {
2356 :
2357 0 : return(TRUE);
2358 : }
2359 : #endif
2360 : #ifdef __WIN__
2361 : error_handling:
2362 : #endif
2363 0 : retry = os_file_handle_error_no_exit(NULL, "read");
2364 :
2365 0 : if (retry) {
2366 0 : goto try_again;
2367 : }
2368 :
2369 0 : return(FALSE);
2370 : }
2371 :
2372 : /***********************************************************************
2373 : Rewind file to its start, read at most size - 1 bytes from it to str, and
2374 : NUL-terminate str. All errors are silently ignored. This function is
2375 : mostly meant to be used with temporary files. */
2376 : UNIV_INTERN
2377 : void
2378 : os_file_read_string(
2379 : /*================*/
2380 : FILE* file, /* in: file to read from */
2381 : char* str, /* in: buffer where to read */
2382 : ulint size) /* in: size of buffer */
2383 21 : {
2384 : size_t flen;
2385 :
2386 21 : if (size == 0) {
2387 0 : return;
2388 : }
2389 :
2390 21 : rewind(file);
2391 21 : flen = fread(str, 1, size - 1, file);
2392 21 : str[flen] = '\0';
2393 : }
2394 :
2395 : /***********************************************************************
2396 : Requests a synchronous write operation. */
2397 : UNIV_INTERN
2398 : ibool
2399 : os_file_write(
2400 : /*==========*/
2401 : /* out: TRUE if request was
2402 : successful, FALSE if fail */
2403 : const char* name, /* in: name of the file or path as a
2404 : null-terminated string */
2405 : os_file_t file, /* in: handle to a file */
2406 : const void* buf, /* in: buffer from which to write */
2407 : ulint offset, /* in: least significant 32 bits of file
2408 : offset where to write */
2409 : ulint offset_high, /* in: most significant 32 bits of
2410 : offset */
2411 : ulint n) /* in: number of bytes to write */
2412 58284 : {
2413 : #ifdef __WIN__
2414 : BOOL ret;
2415 : DWORD len;
2416 : DWORD ret2;
2417 : DWORD low;
2418 : DWORD high;
2419 : ulint i;
2420 : ulint n_retries = 0;
2421 : ulint err;
2422 :
2423 : ut_a((offset & 0xFFFFFFFF) == offset);
2424 :
2425 : os_n_file_writes++;
2426 :
2427 : ut_ad(file);
2428 : ut_ad(buf);
2429 : ut_ad(n > 0);
2430 : retry:
2431 : low = (DWORD) offset;
2432 : high = (DWORD) offset_high;
2433 :
2434 : os_mutex_enter(os_file_count_mutex);
2435 : os_n_pending_writes++;
2436 : os_mutex_exit(os_file_count_mutex);
2437 :
2438 : /* Protect the seek / write operation with a mutex */
2439 : i = ((ulint) file) % OS_FILE_N_SEEK_MUTEXES;
2440 :
2441 : os_mutex_enter(os_file_seek_mutexes[i]);
2442 :
2443 : ret2 = SetFilePointer(file, low, &high, FILE_BEGIN);
2444 :
2445 : if (ret2 == 0xFFFFFFFF && GetLastError() != NO_ERROR) {
2446 :
2447 : os_mutex_exit(os_file_seek_mutexes[i]);
2448 :
2449 : os_mutex_enter(os_file_count_mutex);
2450 : os_n_pending_writes--;
2451 : os_mutex_exit(os_file_count_mutex);
2452 :
2453 : ut_print_timestamp(stderr);
2454 :
2455 : fprintf(stderr,
2456 : " InnoDB: Error: File pointer positioning to"
2457 : " file %s failed at\n"
2458 : "InnoDB: offset %lu %lu. Operating system"
2459 : " error number %lu.\n"
2460 : "InnoDB: Some operating system error numbers"
2461 : " are described at\n"
2462 : "InnoDB: "
2463 : "http://dev.mysql.com/doc/refman/5.1/en/"
2464 : "operating-system-error-codes.html\n",
2465 : name, (ulong) offset_high, (ulong) offset,
2466 : (ulong) GetLastError());
2467 :
2468 : return(FALSE);
2469 : }
2470 :
2471 : ret = WriteFile(file, buf, (DWORD) n, &len, NULL);
2472 :
2473 : /* Always do fsync to reduce the probability that when the OS crashes,
2474 : a database page is only partially physically written to disk. */
2475 :
2476 : # ifdef UNIV_DO_FLUSH
2477 : if (!os_do_not_call_flush_at_each_write) {
2478 : ut_a(TRUE == os_file_flush(file));
2479 : }
2480 : # endif /* UNIV_DO_FLUSH */
2481 :
2482 : os_mutex_exit(os_file_seek_mutexes[i]);
2483 :
2484 : os_mutex_enter(os_file_count_mutex);
2485 : os_n_pending_writes--;
2486 : os_mutex_exit(os_file_count_mutex);
2487 :
2488 : if (ret && len == n) {
2489 :
2490 : return(TRUE);
2491 : }
2492 :
2493 : /* If some background file system backup tool is running, then, at
2494 : least in Windows 2000, we may get here a specific error. Let us
2495 : retry the operation 100 times, with 1 second waits. */
2496 :
2497 : if (GetLastError() == ERROR_LOCK_VIOLATION && n_retries < 100) {
2498 :
2499 : os_thread_sleep(1000000);
2500 :
2501 : n_retries++;
2502 :
2503 : goto retry;
2504 : }
2505 :
2506 : if (!os_has_said_disk_full) {
2507 :
2508 : err = (ulint)GetLastError();
2509 :
2510 : ut_print_timestamp(stderr);
2511 :
2512 : fprintf(stderr,
2513 : " InnoDB: Error: Write to file %s failed"
2514 : " at offset %lu %lu.\n"
2515 : "InnoDB: %lu bytes should have been written,"
2516 : " only %lu were written.\n"
2517 : "InnoDB: Operating system error number %lu.\n"
2518 : "InnoDB: Check that your OS and file system"
2519 : " support files of this size.\n"
2520 : "InnoDB: Check also that the disk is not full"
2521 : " or a disk quota exceeded.\n",
2522 : name, (ulong) offset_high, (ulong) offset,
2523 : (ulong) n, (ulong) len, (ulong) err);
2524 :
2525 : if (strerror((int)err) != NULL) {
2526 : fprintf(stderr,
2527 : "InnoDB: Error number %lu means '%s'.\n",
2528 : (ulong) err, strerror((int)err));
2529 : }
2530 :
2531 : fprintf(stderr,
2532 : "InnoDB: Some operating system error numbers"
2533 : " are described at\n"
2534 : "InnoDB: "
2535 : "http://dev.mysql.com/doc/refman/5.1/en/"
2536 : "operating-system-error-codes.html\n");
2537 :
2538 : os_has_said_disk_full = TRUE;
2539 : }
2540 :
2541 : return(FALSE);
2542 : #else
2543 : ssize_t ret;
2544 :
2545 58284 : ret = os_file_pwrite(file, buf, n, offset, offset_high);
2546 :
2547 58284 : if ((ulint)ret == n) {
2548 :
2549 58284 : return(TRUE);
2550 : }
2551 :
2552 0 : if (!os_has_said_disk_full) {
2553 :
2554 0 : ut_print_timestamp(stderr);
2555 :
2556 0 : fprintf(stderr,
2557 : " InnoDB: Error: Write to file %s failed"
2558 : " at offset %lu %lu.\n"
2559 : "InnoDB: %lu bytes should have been written,"
2560 : " only %ld were written.\n"
2561 : "InnoDB: Operating system error number %lu.\n"
2562 : "InnoDB: Check that your OS and file system"
2563 : " support files of this size.\n"
2564 : "InnoDB: Check also that the disk is not full"
2565 : " or a disk quota exceeded.\n",
2566 : name, offset_high, offset, n, (long int)ret,
2567 : (ulint)errno);
2568 0 : if (strerror(errno) != NULL) {
2569 0 : fprintf(stderr,
2570 : "InnoDB: Error number %lu means '%s'.\n",
2571 : (ulint)errno, strerror(errno));
2572 : }
2573 :
2574 0 : fprintf(stderr,
2575 : "InnoDB: Some operating system error numbers"
2576 : " are described at\n"
2577 : "InnoDB: "
2578 : "http://dev.mysql.com/doc/refman/5.1/en/"
2579 : "operating-system-error-codes.html\n");
2580 :
2581 0 : os_has_said_disk_full = TRUE;
2582 : }
2583 :
2584 0 : return(FALSE);
2585 : #endif
2586 : }
2587 :
2588 : /***********************************************************************
2589 : Check the existence and type of the given file. */
2590 : UNIV_INTERN
2591 : ibool
2592 : os_file_status(
2593 : /*===========*/
2594 : /* out: TRUE if call succeeded */
2595 : const char* path, /* in: pathname of the file */
2596 : ibool* exists, /* out: TRUE if file exists */
2597 : os_file_type_t* type) /* out: type of the file (if it exists) */
2598 0 : {
2599 : #ifdef __WIN__
2600 : int ret;
2601 : struct _stat statinfo;
2602 :
2603 : ret = _stat(path, &statinfo);
2604 : if (ret && (errno == ENOENT || errno == ENOTDIR)) {
2605 : /* file does not exist */
2606 : *exists = FALSE;
2607 : return(TRUE);
2608 : } else if (ret) {
2609 : /* file exists, but stat call failed */
2610 :
2611 : os_file_handle_error_no_exit(path, "stat");
2612 :
2613 : return(FALSE);
2614 : }
2615 :
2616 : if (_S_IFDIR & statinfo.st_mode) {
2617 : *type = OS_FILE_TYPE_DIR;
2618 : } else if (_S_IFREG & statinfo.st_mode) {
2619 : *type = OS_FILE_TYPE_FILE;
2620 : } else {
2621 : *type = OS_FILE_TYPE_UNKNOWN;
2622 : }
2623 :
2624 : *exists = TRUE;
2625 :
2626 : return(TRUE);
2627 : #else
2628 : int ret;
2629 : struct stat statinfo;
2630 :
2631 0 : ret = stat(path, &statinfo);
2632 0 : if (ret && (errno == ENOENT || errno == ENOTDIR)) {
2633 : /* file does not exist */
2634 0 : *exists = FALSE;
2635 0 : return(TRUE);
2636 0 : } else if (ret) {
2637 : /* file exists, but stat call failed */
2638 :
2639 0 : os_file_handle_error_no_exit(path, "stat");
2640 :
2641 0 : return(FALSE);
2642 : }
2643 :
2644 0 : if (S_ISDIR(statinfo.st_mode)) {
2645 0 : *type = OS_FILE_TYPE_DIR;
2646 0 : } else if (S_ISLNK(statinfo.st_mode)) {
2647 0 : *type = OS_FILE_TYPE_LINK;
2648 0 : } else if (S_ISREG(statinfo.st_mode)) {
2649 0 : *type = OS_FILE_TYPE_FILE;
2650 : } else {
2651 0 : *type = OS_FILE_TYPE_UNKNOWN;
2652 : }
2653 :
2654 0 : *exists = TRUE;
2655 :
2656 0 : return(TRUE);
2657 : #endif
2658 : }
2659 :
2660 : /***********************************************************************
2661 : This function returns information about the specified file */
2662 : UNIV_INTERN
2663 : ibool
2664 : os_file_get_status(
2665 : /*===============*/
2666 : /* out: TRUE if stat
2667 : information found */
2668 : const char* path, /* in: pathname of the file */
2669 : os_file_stat_t* stat_info) /* information of a file in a
2670 : directory */
2671 107 : {
2672 : #ifdef __WIN__
2673 : int ret;
2674 : struct _stat statinfo;
2675 :
2676 : ret = _stat(path, &statinfo);
2677 : if (ret && (errno == ENOENT || errno == ENOTDIR)) {
2678 : /* file does not exist */
2679 :
2680 : return(FALSE);
2681 : } else if (ret) {
2682 : /* file exists, but stat call failed */
2683 :
2684 : os_file_handle_error_no_exit(path, "stat");
2685 :
2686 : return(FALSE);
2687 : }
2688 : if (_S_IFDIR & statinfo.st_mode) {
2689 : stat_info->type = OS_FILE_TYPE_DIR;
2690 : } else if (_S_IFREG & statinfo.st_mode) {
2691 : stat_info->type = OS_FILE_TYPE_FILE;
2692 : } else {
2693 : stat_info->type = OS_FILE_TYPE_UNKNOWN;
2694 : }
2695 :
2696 : stat_info->ctime = statinfo.st_ctime;
2697 : stat_info->atime = statinfo.st_atime;
2698 : stat_info->mtime = statinfo.st_mtime;
2699 : stat_info->size = statinfo.st_size;
2700 :
2701 : return(TRUE);
2702 : #else
2703 : int ret;
2704 : struct stat statinfo;
2705 :
2706 107 : ret = stat(path, &statinfo);
2707 :
2708 107 : if (ret && (errno == ENOENT || errno == ENOTDIR)) {
2709 : /* file does not exist */
2710 :
2711 3 : return(FALSE);
2712 104 : } else if (ret) {
2713 : /* file exists, but stat call failed */
2714 :
2715 0 : os_file_handle_error_no_exit(path, "stat");
2716 :
2717 0 : return(FALSE);
2718 : }
2719 :
2720 104 : if (S_ISDIR(statinfo.st_mode)) {
2721 0 : stat_info->type = OS_FILE_TYPE_DIR;
2722 104 : } else if (S_ISLNK(statinfo.st_mode)) {
2723 0 : stat_info->type = OS_FILE_TYPE_LINK;
2724 104 : } else if (S_ISREG(statinfo.st_mode)) {
2725 104 : stat_info->type = OS_FILE_TYPE_FILE;
2726 : } else {
2727 0 : stat_info->type = OS_FILE_TYPE_UNKNOWN;
2728 : }
2729 :
2730 104 : stat_info->ctime = statinfo.st_ctime;
2731 104 : stat_info->atime = statinfo.st_atime;
2732 104 : stat_info->mtime = statinfo.st_mtime;
2733 104 : stat_info->size = statinfo.st_size;
2734 :
2735 104 : return(TRUE);
2736 : #endif
2737 : }
2738 :
2739 : /* path name separator character */
2740 : #ifdef __WIN__
2741 : # define OS_FILE_PATH_SEPARATOR '\\'
2742 : #else
2743 : # define OS_FILE_PATH_SEPARATOR '/'
2744 : #endif
2745 :
2746 : /********************************************************************
2747 : The function os_file_dirname returns a directory component of a
2748 : null-terminated pathname string. In the usual case, dirname returns
2749 : the string up to, but not including, the final '/', and basename
2750 : is the component following the final '/'. Trailing '/' charac
2751 : ters are not counted as part of the pathname.
2752 :
2753 : If path does not contain a slash, dirname returns the string ".".
2754 :
2755 : Concatenating the string returned by dirname, a "/", and the basename
2756 : yields a complete pathname.
2757 :
2758 : The return value is a copy of the directory component of the pathname.
2759 : The copy is allocated from heap. It is the caller responsibility
2760 : to free it after it is no longer needed.
2761 :
2762 : The following list of examples (taken from SUSv2) shows the strings
2763 : returned by dirname and basename for different paths:
2764 :
2765 : path dirname basename
2766 : "/usr/lib" "/usr" "lib"
2767 : "/usr/" "/" "usr"
2768 : "usr" "." "usr"
2769 : "/" "/" "/"
2770 : "." "." "."
2771 : ".." "." ".."
2772 : */
2773 : UNIV_INTERN
2774 : char*
2775 : os_file_dirname(
2776 : /*============*/
2777 : /* out, own: directory component of the
2778 : pathname */
2779 : const char* path) /* in: pathname */
2780 0 : {
2781 : /* Find the offset of the last slash */
2782 0 : const char* last_slash = strrchr(path, OS_FILE_PATH_SEPARATOR);
2783 0 : if (!last_slash) {
2784 : /* No slash in the path, return "." */
2785 :
2786 0 : return(mem_strdup("."));
2787 : }
2788 :
2789 : /* Ok, there is a slash */
2790 :
2791 0 : if (last_slash == path) {
2792 : /* last slash is the first char of the path */
2793 :
2794 0 : return(mem_strdup("/"));
2795 : }
2796 :
2797 : /* Non-trivial directory component */
2798 :
2799 0 : return(mem_strdupl(path, last_slash - path));
2800 : }
2801 :
2802 : /********************************************************************
2803 : Creates all missing subdirectories along the given path. */
2804 : UNIV_INTERN
2805 : ibool
2806 : os_file_create_subdirs_if_needed(
2807 : /*=============================*/
2808 : /* out: TRUE if call succeeded
2809 : FALSE otherwise */
2810 : const char* path) /* in: path name */
2811 0 : {
2812 : char* subdir;
2813 : ibool success, subdir_exists;
2814 : os_file_type_t type;
2815 :
2816 0 : subdir = os_file_dirname(path);
2817 0 : if (strlen(subdir) == 1
2818 : && (*subdir == OS_FILE_PATH_SEPARATOR || *subdir == '.')) {
2819 : /* subdir is root or cwd, nothing to do */
2820 0 : mem_free(subdir);
2821 :
2822 0 : return(TRUE);
2823 : }
2824 :
2825 : /* Test if subdir exists */
2826 0 : success = os_file_status(subdir, &subdir_exists, &type);
2827 0 : if (success && !subdir_exists) {
2828 : /* subdir does not exist, create it */
2829 0 : success = os_file_create_subdirs_if_needed(subdir);
2830 0 : if (!success) {
2831 0 : mem_free(subdir);
2832 :
2833 0 : return(FALSE);
2834 : }
2835 0 : success = os_file_create_directory(subdir, FALSE);
2836 : }
2837 :
2838 0 : mem_free(subdir);
2839 :
2840 0 : return(success);
2841 : }
2842 :
2843 : /********************************************************************
2844 : Returns a pointer to the nth slot in the aio array. */
2845 : static
2846 : os_aio_slot_t*
2847 : os_aio_array_get_nth_slot(
2848 : /*======================*/
2849 : /* out: pointer to slot */
2850 : os_aio_array_t* array, /* in: aio array */
2851 : ulint index) /* in: index of the slot */
2852 2192154 : {
2853 2192154 : ut_a(index < array->n_slots);
2854 :
2855 2194127 : return((array->slots) + index);
2856 : }
2857 :
2858 : /****************************************************************************
2859 : Creates an aio wait array. */
2860 : static
2861 : os_aio_array_t*
2862 : os_aio_array_create(
2863 : /*================*/
2864 : /* out, own: aio array */
2865 : ulint n, /* in: maximum number of pending aio operations
2866 : allowed; n must be divisible by n_segments */
2867 : ulint n_segments) /* in: number of segments in the aio array */
2868 105 : {
2869 : os_aio_array_t* array;
2870 : ulint i;
2871 : os_aio_slot_t* slot;
2872 : #ifdef WIN_ASYNC_IO
2873 : OVERLAPPED* over;
2874 : #endif
2875 105 : ut_a(n > 0);
2876 105 : ut_a(n_segments > 0);
2877 :
2878 105 : array = ut_malloc(sizeof(os_aio_array_t));
2879 :
2880 105 : array->mutex = os_mutex_create(NULL);
2881 105 : array->not_full = os_event_create(NULL);
2882 105 : array->is_empty = os_event_create(NULL);
2883 :
2884 105 : os_event_set(array->is_empty);
2885 :
2886 105 : array->n_slots = n;
2887 105 : array->n_segments = n_segments;
2888 105 : array->n_reserved = 0;
2889 105 : array->slots = ut_malloc(n * sizeof(os_aio_slot_t));
2890 : #ifdef __WIN__
2891 : array->native_events = ut_malloc(n * sizeof(os_native_event_t));
2892 : #endif
2893 23709 : for (i = 0; i < n; i++) {
2894 23604 : slot = os_aio_array_get_nth_slot(array, i);
2895 :
2896 23604 : slot->pos = i;
2897 23604 : slot->reserved = FALSE;
2898 : #ifdef WIN_ASYNC_IO
2899 : slot->event = os_event_create(NULL);
2900 :
2901 : over = &(slot->control);
2902 :
2903 : over->hEvent = slot->event->handle;
2904 :
2905 : *((array->native_events) + i) = over->hEvent;
2906 : #endif
2907 : }
2908 :
2909 105 : return(array);
2910 : }
2911 :
2912 : /****************************************************************************
2913 : Initializes the asynchronous io system. Calls also os_io_init_simple.
2914 : Creates a separate aio array for
2915 : non-ibuf read and write, a third aio array for the ibuf i/o, with just one
2916 : segment, two aio arrays for log reads and writes with one segment, and a
2917 : synchronous aio array of the specified size. The combined number of segments
2918 : in the three first aio arrays is the parameter n_segments given to the
2919 : function. The caller must create an i/o handler thread for each segment in
2920 : the four first arrays, but not for the sync aio array. */
2921 : UNIV_INTERN
2922 : void
2923 : os_aio_init(
2924 : /*========*/
2925 : ulint n, /* in: maximum number of pending aio operations
2926 : allowed; n must be divisible by n_segments */
2927 : ulint n_segments, /* in: combined number of segments in the four
2928 : first aio arrays; must be >= 4 */
2929 : ulint n_slots_sync) /* in: number of slots in the sync aio array */
2930 21 : {
2931 : ulint n_read_segs;
2932 : ulint n_write_segs;
2933 : ulint n_per_seg;
2934 : ulint i;
2935 :
2936 : ut_ad(n % n_segments == 0);
2937 : ut_ad(n_segments >= 4);
2938 :
2939 21 : os_io_init_simple();
2940 :
2941 105 : for (i = 0; i < n_segments; i++) {
2942 84 : srv_set_io_thread_op_info(i, "not started yet");
2943 : }
2944 :
2945 21 : n_per_seg = n / n_segments;
2946 21 : n_write_segs = (n_segments - 2) / 2;
2947 21 : n_read_segs = n_segments - 2 - n_write_segs;
2948 :
2949 : /* fprintf(stderr, "Array n per seg %lu\n", n_per_seg); */
2950 :
2951 21 : os_aio_ibuf_array = os_aio_array_create(n_per_seg, 1);
2952 :
2953 21 : srv_io_thread_function[0] = "insert buffer thread";
2954 :
2955 21 : os_aio_log_array = os_aio_array_create(n_per_seg, 1);
2956 :
2957 21 : srv_io_thread_function[1] = "log thread";
2958 :
2959 21 : os_aio_read_array = os_aio_array_create(n_read_segs * n_per_seg,
2960 : n_read_segs);
2961 42 : for (i = 2; i < 2 + n_read_segs; i++) {
2962 21 : ut_a(i < SRV_MAX_N_IO_THREADS);
2963 21 : srv_io_thread_function[i] = "read thread";
2964 : }
2965 :
2966 21 : os_aio_write_array = os_aio_array_create(n_write_segs * n_per_seg,
2967 : n_write_segs);
2968 42 : for (i = 2 + n_read_segs; i < n_segments; i++) {
2969 21 : ut_a(i < SRV_MAX_N_IO_THREADS);
2970 21 : srv_io_thread_function[i] = "write thread";
2971 : }
2972 :
2973 21 : os_aio_sync_array = os_aio_array_create(n_slots_sync, 1);
2974 :
2975 21 : os_aio_n_segments = n_segments;
2976 :
2977 21 : os_aio_validate();
2978 :
2979 21 : os_aio_segment_wait_events = ut_malloc(n_segments * sizeof(void*));
2980 :
2981 105 : for (i = 0; i < n_segments; i++) {
2982 84 : os_aio_segment_wait_events[i] = os_event_create(NULL);
2983 : }
2984 :
2985 21 : os_last_printout = time(NULL);
2986 :
2987 21 : }
2988 :
2989 : #ifdef WIN_ASYNC_IO
2990 : /****************************************************************************
2991 : Wakes up all async i/o threads in the array in Windows async i/o at
2992 : shutdown. */
2993 : static
2994 : void
2995 : os_aio_array_wake_win_aio_at_shutdown(
2996 : /*==================================*/
2997 : os_aio_array_t* array) /* in: aio array */
2998 : {
2999 : ulint i;
3000 :
3001 : for (i = 0; i < array->n_slots; i++) {
3002 :
3003 : os_event_set((array->slots + i)->event);
3004 : }
3005 : }
3006 : #endif
3007 :
3008 : /****************************************************************************
3009 : Wakes up all async i/o threads so that they know to exit themselves in
3010 : shutdown. */
3011 : UNIV_INTERN
3012 : void
3013 : os_aio_wake_all_threads_at_shutdown(void)
3014 : /*=====================================*/
3015 41 : {
3016 : ulint i;
3017 :
3018 : #ifdef WIN_ASYNC_IO
3019 : /* This code wakes up all ai/o threads in Windows native aio */
3020 : os_aio_array_wake_win_aio_at_shutdown(os_aio_read_array);
3021 : os_aio_array_wake_win_aio_at_shutdown(os_aio_write_array);
3022 : os_aio_array_wake_win_aio_at_shutdown(os_aio_ibuf_array);
3023 : os_aio_array_wake_win_aio_at_shutdown(os_aio_log_array);
3024 : #endif
3025 : /* This loop wakes up all simulated ai/o threads */
3026 :
3027 205 : for (i = 0; i < os_aio_n_segments; i++) {
3028 :
3029 164 : os_event_set(os_aio_segment_wait_events[i]);
3030 : }
3031 41 : }
3032 :
3033 : /****************************************************************************
3034 : Waits until there are no pending writes in os_aio_write_array. There can
3035 : be other, synchronous, pending writes. */
3036 : UNIV_INTERN
3037 : void
3038 : os_aio_wait_until_no_pending_writes(void)
3039 : /*=====================================*/
3040 139 : {
3041 139 : os_event_wait(os_aio_write_array->is_empty);
3042 139 : }
3043 :
3044 : /**************************************************************************
3045 : Calculates segment number for a slot. */
3046 : static
3047 : ulint
3048 : os_aio_get_segment_no_from_slot(
3049 : /*============================*/
3050 : /* out: segment number (which is the number
3051 : used by, for example, i/o-handler threads) */
3052 : os_aio_array_t* array, /* in: aio wait array */
3053 : os_aio_slot_t* slot) /* in: slot in this array */
3054 192 : {
3055 : ulint segment;
3056 : ulint seg_len;
3057 :
3058 192 : if (array == os_aio_ibuf_array) {
3059 4 : segment = 0;
3060 :
3061 188 : } else if (array == os_aio_log_array) {
3062 188 : segment = 1;
3063 :
3064 0 : } else if (array == os_aio_read_array) {
3065 0 : seg_len = os_aio_read_array->n_slots
3066 : / os_aio_read_array->n_segments;
3067 :
3068 0 : segment = 2 + slot->pos / seg_len;
3069 : } else {
3070 0 : ut_a(array == os_aio_write_array);
3071 0 : seg_len = os_aio_write_array->n_slots
3072 : / os_aio_write_array->n_segments;
3073 :
3074 0 : segment = os_aio_read_array->n_segments + 2
3075 : + slot->pos / seg_len;
3076 : }
3077 :
3078 192 : return(segment);
3079 : }
3080 :
3081 : /**************************************************************************
3082 : Calculates local segment number and aio array from global segment number. */
3083 : static
3084 : ulint
3085 : os_aio_get_array_and_local_segment(
3086 : /*===============================*/
3087 : /* out: local segment number within
3088 : the aio array */
3089 : os_aio_array_t** array, /* out: aio wait array */
3090 : ulint global_segment)/* in: global segment number */
3091 7004 : {
3092 : ulint segment;
3093 :
3094 7004 : ut_a(global_segment < os_aio_n_segments);
3095 :
3096 7004 : if (global_segment == 0) {
3097 221 : *array = os_aio_ibuf_array;
3098 221 : segment = 0;
3099 :
3100 6783 : } else if (global_segment == 1) {
3101 589 : *array = os_aio_log_array;
3102 589 : segment = 0;
3103 :
3104 6194 : } else if (global_segment < os_aio_read_array->n_segments + 2) {
3105 476 : *array = os_aio_read_array;
3106 :
3107 476 : segment = global_segment - 2;
3108 : } else {
3109 5718 : *array = os_aio_write_array;
3110 :
3111 5718 : segment = global_segment - (os_aio_read_array->n_segments + 2);
3112 : }
3113 :
3114 7004 : return(segment);
3115 : }
3116 :
3117 : /***********************************************************************
3118 : Requests for a slot in the aio array. If no slot is available, waits until
3119 : not_full-event becomes signaled. */
3120 : static
3121 : os_aio_slot_t*
3122 : os_aio_array_reserve_slot(
3123 : /*======================*/
3124 : /* out: pointer to slot */
3125 : ulint type, /* in: OS_FILE_READ or OS_FILE_WRITE */
3126 : os_aio_array_t* array, /* in: aio array */
3127 : fil_node_t* message1,/* in: message to be passed along with
3128 : the aio operation */
3129 : void* message2,/* in: message to be passed along with
3130 : the aio operation */
3131 : os_file_t file, /* in: file handle */
3132 : const char* name, /* in: name of the file or path as a
3133 : null-terminated string */
3134 : void* buf, /* in: buffer where to read or from which
3135 : to write */
3136 : ulint offset, /* in: least significant 32 bits of file
3137 : offset */
3138 : ulint offset_high, /* in: most significant 32 bits of
3139 : offset */
3140 : ulint len) /* in: length of the block to read or write */
3141 5960 : {
3142 : os_aio_slot_t* slot;
3143 : #ifdef WIN_ASYNC_IO
3144 : OVERLAPPED* control;
3145 : #endif
3146 : ulint i;
3147 5960 : loop:
3148 5960 : os_mutex_enter(array->mutex);
3149 :
3150 5960 : if (array->n_reserved == array->n_slots) {
3151 0 : os_mutex_exit(array->mutex);
3152 :
3153 0 : if (!os_aio_use_native_aio) {
3154 : /* If the handler threads are suspended, wake them
3155 : so that we get more slots */
3156 :
3157 0 : os_aio_simulated_wake_handler_threads();
3158 : }
3159 :
3160 0 : os_event_wait(array->not_full);
3161 :
3162 0 : goto loop;
3163 : }
3164 :
3165 255084 : for (i = 0;; i++) {
3166 255084 : slot = os_aio_array_get_nth_slot(array, i);
3167 :
3168 255084 : if (slot->reserved == FALSE) {
3169 5960 : break;
3170 : }
3171 249124 : }
3172 :
3173 5960 : array->n_reserved++;
3174 :
3175 5960 : if (array->n_reserved == 1) {
3176 370 : os_event_reset(array->is_empty);
3177 : }
3178 :
3179 5960 : if (array->n_reserved == array->n_slots) {
3180 0 : os_event_reset(array->not_full);
3181 : }
3182 :
3183 5960 : slot->reserved = TRUE;
3184 5960 : slot->reservation_time = time(NULL);
3185 5960 : slot->message1 = message1;
3186 5960 : slot->message2 = message2;
3187 5960 : slot->file = file;
3188 5960 : slot->name = name;
3189 5960 : slot->len = len;
3190 5960 : slot->type = type;
3191 5960 : slot->buf = buf;
3192 5960 : slot->offset = offset;
3193 5960 : slot->offset_high = offset_high;
3194 5960 : slot->io_already_done = FALSE;
3195 :
3196 : #ifdef WIN_ASYNC_IO
3197 : control = &(slot->control);
3198 : control->Offset = (DWORD)offset;
3199 : control->OffsetHigh = (DWORD)offset_high;
3200 : os_event_reset(slot->event);
3201 : #endif
3202 :
3203 5960 : os_mutex_exit(array->mutex);
3204 :
3205 5960 : return(slot);
3206 : }
3207 :
3208 : /***********************************************************************
3209 : Frees a slot in the aio array. */
3210 : static
3211 : void
3212 : os_aio_array_free_slot(
3213 : /*===================*/
3214 : os_aio_array_t* array, /* in: aio array */
3215 : os_aio_slot_t* slot) /* in: pointer to slot */
3216 5960 : {
3217 : ut_ad(array);
3218 : ut_ad(slot);
3219 :
3220 5960 : os_mutex_enter(array->mutex);
3221 :
3222 : ut_ad(slot->reserved);
3223 :
3224 5960 : slot->reserved = FALSE;
3225 :
3226 5960 : array->n_reserved--;
3227 :
3228 5960 : if (array->n_reserved == array->n_slots - 1) {
3229 0 : os_event_set(array->not_full);
3230 : }
3231 :
3232 5960 : if (array->n_reserved == 0) {
3233 370 : os_event_set(array->is_empty);
3234 : }
3235 :
3236 : #ifdef WIN_ASYNC_IO
3237 : os_event_reset(slot->event);
3238 : #endif
3239 5960 : os_mutex_exit(array->mutex);
3240 5960 : }
3241 :
3242 : /**************************************************************************
3243 : Wakes up a simulated aio i/o-handler thread if it has something to do. */
3244 : static
3245 : void
3246 : os_aio_simulated_wake_handler_thread(
3247 : /*=================================*/
3248 : ulint global_segment) /* in: the number of the segment in the aio
3249 : arrays */
3250 920 : {
3251 : os_aio_array_t* array;
3252 : os_aio_slot_t* slot;
3253 : ulint segment;
3254 : ulint n;
3255 : ulint i;
3256 :
3257 : ut_ad(!os_aio_use_native_aio);
3258 :
3259 920 : segment = os_aio_get_array_and_local_segment(&array, global_segment);
3260 :
3261 920 : n = array->n_slots / array->n_segments;
3262 :
3263 : /* Look through n slots after the segment * n'th slot */
3264 :
3265 920 : os_mutex_enter(array->mutex);
3266 :
3267 141464 : for (i = 0; i < n; i++) {
3268 140915 : slot = os_aio_array_get_nth_slot(array, i + segment * n);
3269 :
3270 140915 : if (slot->reserved) {
3271 : /* Found an i/o request */
3272 :
3273 371 : break;
3274 : }
3275 : }
3276 :
3277 920 : os_mutex_exit(array->mutex);
3278 :
3279 920 : if (i < n) {
3280 371 : os_event_set(os_aio_segment_wait_events[global_segment]);
3281 : }
3282 920 : }
3283 :
3284 : /**************************************************************************
3285 : Wakes up simulated aio i/o-handler threads if they have something to do. */
3286 : UNIV_INTERN
3287 : void
3288 : os_aio_simulated_wake_handler_threads(void)
3289 : /*=======================================*/
3290 182 : {
3291 : ulint i;
3292 :
3293 182 : if (os_aio_use_native_aio) {
3294 : /* We do not use simulated aio: do nothing */
3295 :
3296 0 : return;
3297 : }
3298 :
3299 182 : os_aio_recommend_sleep_for_read_threads = FALSE;
3300 :
3301 910 : for (i = 0; i < os_aio_n_segments; i++) {
3302 728 : os_aio_simulated_wake_handler_thread(i);
3303 : }
3304 : }
3305 :
3306 : /**************************************************************************
3307 : This function can be called if one wants to post a batch of reads and
3308 : prefers an i/o-handler thread to handle them all at once later. You must
3309 : call os_aio_simulated_wake_handler_threads later to ensure the threads
3310 : are not left sleeping! */
3311 : UNIV_INTERN
3312 : void
3313 : os_aio_simulated_put_read_threads_to_sleep(void)
3314 : /*============================================*/
3315 10 : {
3316 : os_aio_array_t* array;
3317 : ulint g;
3318 :
3319 10 : os_aio_recommend_sleep_for_read_threads = TRUE;
3320 :
3321 50 : for (g = 0; g < os_aio_n_segments; g++) {
3322 40 : os_aio_get_array_and_local_segment(&array, g);
3323 :
3324 40 : if (array == os_aio_read_array) {
3325 :
3326 10 : os_event_reset(os_aio_segment_wait_events[g]);
3327 : }
3328 : }
3329 10 : }
3330 :
3331 : /***********************************************************************
3332 : Requests an asynchronous i/o operation. */
3333 : UNIV_INTERN
3334 : ibool
3335 : os_aio(
3336 : /*===*/
3337 : /* out: TRUE if request was queued
3338 : successfully, FALSE if fail */
3339 : ulint type, /* in: OS_FILE_READ or OS_FILE_WRITE */
3340 : ulint mode, /* in: OS_AIO_NORMAL, ..., possibly ORed
3341 : to OS_AIO_SIMULATED_WAKE_LATER: the
3342 : last flag advises this function not to wake
3343 : i/o-handler threads, but the caller will
3344 : do the waking explicitly later, in this
3345 : way the caller can post several requests in
3346 : a batch; NOTE that the batch must not be
3347 : so big that it exhausts the slots in aio
3348 : arrays! NOTE that a simulated batch
3349 : may introduce hidden chances of deadlocks,
3350 : because i/os are not actually handled until
3351 : all have been posted: use with great
3352 : caution! */
3353 : const char* name, /* in: name of the file or path as a
3354 : null-terminated string */
3355 : os_file_t file, /* in: handle to a file */
3356 : void* buf, /* in: buffer where to read or from which
3357 : to write */
3358 : ulint offset, /* in: least significant 32 bits of file
3359 : offset where to read or write */
3360 : ulint offset_high, /* in: most significant 32 bits of
3361 : offset */
3362 : ulint n, /* in: number of bytes to read or write */
3363 : fil_node_t* message1,/* in: messages for the aio handler (these
3364 : can be used to identify a completed aio
3365 : operation); if mode is OS_AIO_SYNC, these
3366 : are ignored */
3367 : void* message2)
3368 64002 : {
3369 : os_aio_array_t* array;
3370 : os_aio_slot_t* slot;
3371 : #ifdef WIN_ASYNC_IO
3372 : ibool retval;
3373 : BOOL ret = TRUE;
3374 : DWORD len = (DWORD) n;
3375 : struct fil_node_struct * dummy_mess1;
3376 : void* dummy_mess2;
3377 : ulint dummy_type;
3378 : #endif
3379 64002 : ulint err = 0;
3380 : ibool retry;
3381 : ulint wake_later;
3382 :
3383 : ut_ad(file);
3384 : ut_ad(buf);
3385 : ut_ad(n > 0);
3386 : ut_ad(n % OS_FILE_LOG_BLOCK_SIZE == 0);
3387 : ut_ad(offset % OS_FILE_LOG_BLOCK_SIZE == 0);
3388 : ut_ad(os_aio_validate());
3389 :
3390 64002 : wake_later = mode & OS_AIO_SIMULATED_WAKE_LATER;
3391 64002 : mode = mode & (~OS_AIO_SIMULATED_WAKE_LATER);
3392 :
3393 64002 : if (mode == OS_AIO_SYNC
3394 : #ifdef WIN_ASYNC_IO
3395 : && !os_aio_use_native_aio
3396 : #endif
3397 : ) {
3398 : /* This is actually an ordinary synchronous read or write:
3399 : no need to use an i/o-handler thread. NOTE that if we use
3400 : Windows async i/o, Windows does not allow us to use
3401 : ordinary synchronous os_file_read etc. on the same file,
3402 : therefore we have built a special mechanism for synchronous
3403 : wait in the Windows case. */
3404 :
3405 58042 : if (type == OS_FILE_READ) {
3406 610 : return(os_file_read(file, buf, offset,
3407 : offset_high, n));
3408 : }
3409 :
3410 57432 : ut_a(type == OS_FILE_WRITE);
3411 :
3412 57432 : return(os_file_write(name, file, buf, offset, offset_high, n));
3413 : }
3414 :
3415 5960 : try_again:
3416 5960 : if (mode == OS_AIO_NORMAL) {
3417 5768 : if (type == OS_FILE_READ) {
3418 263 : array = os_aio_read_array;
3419 : } else {
3420 5505 : array = os_aio_write_array;
3421 : }
3422 192 : } else if (mode == OS_AIO_IBUF) {
3423 : ut_ad(type == OS_FILE_READ);
3424 : /* Reduce probability of deadlock bugs in connection with ibuf:
3425 : do not let the ibuf i/o handler sleep */
3426 :
3427 4 : wake_later = FALSE;
3428 :
3429 4 : array = os_aio_ibuf_array;
3430 188 : } else if (mode == OS_AIO_LOG) {
3431 :
3432 188 : array = os_aio_log_array;
3433 0 : } else if (mode == OS_AIO_SYNC) {
3434 0 : array = os_aio_sync_array;
3435 : } else {
3436 0 : array = NULL; /* Eliminate compiler warning */
3437 0 : ut_error;
3438 : }
3439 :
3440 5960 : slot = os_aio_array_reserve_slot(type, array, message1, message2, file,
3441 : name, buf, offset, offset_high, n);
3442 5960 : if (type == OS_FILE_READ) {
3443 267 : if (os_aio_use_native_aio) {
3444 : #ifdef WIN_ASYNC_IO
3445 : os_n_file_reads++;
3446 : os_bytes_read_since_printout += len;
3447 :
3448 : ret = ReadFile(file, buf, (DWORD)n, &len,
3449 : &(slot->control));
3450 : #endif
3451 : } else {
3452 267 : if (!wake_later) {
3453 4 : os_aio_simulated_wake_handler_thread(
3454 : os_aio_get_segment_no_from_slot(
3455 : array, slot));
3456 : }
3457 : }
3458 5693 : } else if (type == OS_FILE_WRITE) {
3459 5693 : if (os_aio_use_native_aio) {
3460 : #ifdef WIN_ASYNC_IO
3461 : os_n_file_writes++;
3462 : ret = WriteFile(file, buf, (DWORD)n, &len,
3463 : &(slot->control));
3464 : #endif
3465 : } else {
3466 5693 : if (!wake_later) {
3467 188 : os_aio_simulated_wake_handler_thread(
3468 : os_aio_get_segment_no_from_slot(
3469 : array, slot));
3470 : }
3471 : }
3472 : } else {
3473 0 : ut_error;
3474 : }
3475 :
3476 : #ifdef WIN_ASYNC_IO
3477 : if (os_aio_use_native_aio) {
3478 : if ((ret && len == n)
3479 : || (!ret && GetLastError() == ERROR_IO_PENDING)) {
3480 : /* aio was queued successfully! */
3481 :
3482 : if (mode == OS_AIO_SYNC) {
3483 : /* We want a synchronous i/o operation on a
3484 : file where we also use async i/o: in Windows
3485 : we must use the same wait mechanism as for
3486 : async i/o */
3487 :
3488 : retval = os_aio_windows_handle(ULINT_UNDEFINED,
3489 : slot->pos,
3490 : &dummy_mess1,
3491 : &dummy_mess2,
3492 : &dummy_type);
3493 :
3494 : return(retval);
3495 : }
3496 :
3497 : return(TRUE);
3498 : }
3499 :
3500 : err = 1; /* Fall through the next if */
3501 : }
3502 : #endif
3503 5960 : if (err == 0) {
3504 : /* aio was queued successfully! */
3505 :
3506 5960 : return(TRUE);
3507 : }
3508 :
3509 0 : os_aio_array_free_slot(array, slot);
3510 :
3511 0 : retry = os_file_handle_error(name,
3512 : type == OS_FILE_READ
3513 : ? "aio read" : "aio write");
3514 0 : if (retry) {
3515 :
3516 0 : goto try_again;
3517 : }
3518 :
3519 0 : return(FALSE);
3520 : }
3521 :
3522 : #ifdef WIN_ASYNC_IO
3523 : /**************************************************************************
3524 : This function is only used in Windows asynchronous i/o.
3525 : Waits for an aio operation to complete. This function is used to wait the
3526 : for completed requests. The aio array of pending requests is divided
3527 : into segments. The thread specifies which segment or slot it wants to wait
3528 : for. NOTE: this function will also take care of freeing the aio slot,
3529 : therefore no other thread is allowed to do the freeing! */
3530 : UNIV_INTERN
3531 : ibool
3532 : os_aio_windows_handle(
3533 : /*==================*/
3534 : /* out: TRUE if the aio operation succeeded */
3535 : ulint segment, /* in: the number of the segment in the aio
3536 : arrays to wait for; segment 0 is the ibuf
3537 : i/o thread, segment 1 the log i/o thread,
3538 : then follow the non-ibuf read threads, and as
3539 : the last are the non-ibuf write threads; if
3540 : this is ULINT_UNDEFINED, then it means that
3541 : sync aio is used, and this parameter is
3542 : ignored */
3543 : ulint pos, /* this parameter is used only in sync aio:
3544 : wait for the aio slot at this position */
3545 : fil_node_t**message1, /* out: the messages passed with the aio
3546 : request; note that also in the case where
3547 : the aio operation failed, these output
3548 : parameters are valid and can be used to
3549 : restart the operation, for example */
3550 : void** message2,
3551 : ulint* type) /* out: OS_FILE_WRITE or ..._READ */
3552 : {
3553 : ulint orig_seg = segment;
3554 : os_aio_array_t* array;
3555 : os_aio_slot_t* slot;
3556 : ulint n;
3557 : ulint i;
3558 : ibool ret_val;
3559 : BOOL ret;
3560 : DWORD len;
3561 :
3562 : if (segment == ULINT_UNDEFINED) {
3563 : array = os_aio_sync_array;
3564 : segment = 0;
3565 : } else {
3566 : segment = os_aio_get_array_and_local_segment(&array, segment);
3567 : }
3568 :
3569 : /* NOTE! We only access constant fields in os_aio_array. Therefore
3570 : we do not have to acquire the protecting mutex yet */
3571 :
3572 : ut_ad(os_aio_validate());
3573 : ut_ad(segment < array->n_segments);
3574 :
3575 : n = array->n_slots / array->n_segments;
3576 :
3577 : if (array == os_aio_sync_array) {
3578 : os_event_wait(os_aio_array_get_nth_slot(array, pos)->event);
3579 : i = pos;
3580 : } else {
3581 : srv_set_io_thread_op_info(orig_seg, "wait Windows aio");
3582 : i = os_event_wait_multiple(n,
3583 : (array->native_events)
3584 : + segment * n);
3585 : }
3586 :
3587 : os_mutex_enter(array->mutex);
3588 :
3589 : slot = os_aio_array_get_nth_slot(array, i + segment * n);
3590 :
3591 : ut_a(slot->reserved);
3592 :
3593 : if (orig_seg != ULINT_UNDEFINED) {
3594 : srv_set_io_thread_op_info(orig_seg,
3595 : "get windows aio return value");
3596 : }
3597 :
3598 : ret = GetOverlappedResult(slot->file, &(slot->control), &len, TRUE);
3599 :
3600 : *message1 = slot->message1;
3601 : *message2 = slot->message2;
3602 :
3603 : *type = slot->type;
3604 :
3605 : if (ret && len == slot->len) {
3606 : ret_val = TRUE;
3607 :
3608 : #ifdef UNIV_DO_FLUSH
3609 : if (slot->type == OS_FILE_WRITE
3610 : && !os_do_not_call_flush_at_each_write) {
3611 : ut_a(TRUE == os_file_flush(slot->file));
3612 : }
3613 : #endif /* UNIV_DO_FLUSH */
3614 : } else {
3615 : os_file_handle_error(slot->name, "Windows aio");
3616 :
3617 : ret_val = FALSE;
3618 : }
3619 :
3620 : os_mutex_exit(array->mutex);
3621 :
3622 : os_aio_array_free_slot(array, slot);
3623 :
3624 : return(ret_val);
3625 : }
3626 : #endif
3627 :
3628 : /**************************************************************************
3629 : Does simulated aio. This function should be called by an i/o-handler
3630 : thread. */
3631 : UNIV_INTERN
3632 : ibool
3633 : os_aio_simulated_handle(
3634 : /*====================*/
3635 : /* out: TRUE if the aio operation succeeded */
3636 : ulint global_segment, /* in: the number of the segment in the aio
3637 : arrays to wait for; segment 0 is the ibuf
3638 : i/o thread, segment 1 the log i/o thread,
3639 : then follow the non-ibuf read threads, and as
3640 : the last are the non-ibuf write threads */
3641 : fil_node_t**message1, /* out: the messages passed with the aio
3642 : request; note that also in the case where
3643 : the aio operation failed, these output
3644 : parameters are valid and can be used to
3645 : restart the operation, for example */
3646 : void** message2,
3647 : ulint* type) /* out: OS_FILE_WRITE or ..._READ */
3648 6044 : {
3649 : os_aio_array_t* array;
3650 : ulint segment;
3651 : os_aio_slot_t* slot;
3652 : os_aio_slot_t* slot2;
3653 : os_aio_slot_t* consecutive_ios[OS_AIO_MERGE_N_CONSECUTIVE];
3654 : ulint n_consecutive;
3655 : ulint total_len;
3656 : ulint offs;
3657 : ulint lowest_offset;
3658 : ulint biggest_age;
3659 : ulint age;
3660 : byte* combined_buf;
3661 : byte* combined_buf2;
3662 : ibool ret;
3663 : ulint n;
3664 : ulint i;
3665 :
3666 6044 : segment = os_aio_get_array_and_local_segment(&array, global_segment);
3667 :
3668 6415 : restart:
3669 : /* NOTE! We only access constant fields in os_aio_array. Therefore
3670 : we do not have to acquire the protecting mutex yet */
3671 :
3672 6415 : srv_set_io_thread_op_info(global_segment,
3673 : "looking for i/o requests (a)");
3674 : ut_ad(os_aio_validate());
3675 : ut_ad(segment < array->n_segments);
3676 :
3677 6414 : n = array->n_slots / array->n_segments;
3678 :
3679 : /* Look through n slots after the segment * n'th slot */
3680 :
3681 6414 : if (array == os_aio_read_array
3682 : && os_aio_recommend_sleep_for_read_threads) {
3683 :
3684 : /* Give other threads chance to add several i/os to the array
3685 : at once. */
3686 :
3687 1 : goto recommended_sleep;
3688 : }
3689 :
3690 6413 : os_mutex_enter(array->mutex);
3691 :
3692 6414 : srv_set_io_thread_op_info(global_segment,
3693 : "looking for i/o requests (b)");
3694 :
3695 : /* Check if there is a slot for which the i/o has already been
3696 : done */
3697 :
3698 589277 : for (i = 0; i < n; i++) {
3699 587828 : slot = os_aio_array_get_nth_slot(array, i + segment * n);
3700 :
3701 587617 : if (slot->reserved && slot->io_already_done) {
3702 :
3703 5051 : if (os_aio_print_debug) {
3704 0 : fprintf(stderr,
3705 : "InnoDB: i/o for slot %lu"
3706 : " already done, returning\n",
3707 : (ulong) i);
3708 : }
3709 :
3710 5051 : ret = TRUE;
3711 :
3712 5051 : goto slot_io_done;
3713 : }
3714 : }
3715 :
3716 1449 : n_consecutive = 0;
3717 :
3718 : /* If there are at least 2 seconds old requests, then pick the oldest
3719 : one to prevent starvation. If several requests have the same age,
3720 : then pick the one at the lowest offset. */
3721 :
3722 1449 : biggest_age = 0;
3723 1449 : lowest_offset = ULINT_MAX;
3724 :
3725 350050 : for (i = 0; i < n; i++) {
3726 348577 : slot = os_aio_array_get_nth_slot(array, i + segment * n);
3727 :
3728 348601 : if (slot->reserved) {
3729 26821 : age = (ulint)difftime(time(NULL),
3730 : slot->reservation_time);
3731 :
3732 26821 : if ((age >= 2 && age > biggest_age)
3733 : || (age >= 2 && age == biggest_age
3734 : && slot->offset < lowest_offset)) {
3735 :
3736 : /* Found an i/o request */
3737 0 : consecutive_ios[0] = slot;
3738 :
3739 0 : n_consecutive = 1;
3740 :
3741 0 : biggest_age = age;
3742 0 : lowest_offset = slot->offset;
3743 : }
3744 : }
3745 : }
3746 :
3747 1473 : if (n_consecutive == 0) {
3748 : /* There were no old requests. Look for an i/o request at the
3749 : lowest offset in the array (we ignore the high 32 bits of the
3750 : offset in these heuristics) */
3751 :
3752 1363 : lowest_offset = ULINT_MAX;
3753 :
3754 349092 : for (i = 0; i < n; i++) {
3755 347839 : slot = os_aio_array_get_nth_slot(array,
3756 : i + segment * n);
3757 :
3758 347729 : if (slot->reserved && slot->offset < lowest_offset) {
3759 :
3760 : /* Found an i/o request */
3761 1454 : consecutive_ios[0] = slot;
3762 :
3763 1454 : n_consecutive = 1;
3764 :
3765 1454 : lowest_offset = slot->offset;
3766 : }
3767 : }
3768 : }
3769 :
3770 1363 : if (n_consecutive == 0) {
3771 :
3772 : /* No i/o requested at the moment */
3773 :
3774 454 : goto wait_for_io;
3775 : }
3776 :
3777 909 : slot = consecutive_ios[0];
3778 :
3779 : /* Check if there are several consecutive blocks to read or write */
3780 :
3781 5940 : consecutive_loop:
3782 468271 : for (i = 0; i < n; i++) {
3783 467382 : slot2 = os_aio_array_get_nth_slot(array, i + segment * n);
3784 :
3785 467382 : if (slot2->reserved && slot2 != slot
3786 : && slot2->offset == slot->offset + slot->len
3787 : /* check that sum does not wrap over */
3788 : && slot->offset + slot->len > slot->offset
3789 : && slot2->offset_high == slot->offset_high
3790 : && slot2->type == slot->type
3791 : && slot2->file == slot->file) {
3792 :
3793 : /* Found a consecutive i/o request */
3794 :
3795 5051 : consecutive_ios[n_consecutive] = slot2;
3796 5051 : n_consecutive++;
3797 :
3798 5051 : slot = slot2;
3799 :
3800 5051 : if (n_consecutive < OS_AIO_MERGE_N_CONSECUTIVE) {
3801 :
3802 5031 : goto consecutive_loop;
3803 : } else {
3804 20 : break;
3805 : }
3806 : }
3807 : }
3808 :
3809 909 : srv_set_io_thread_op_info(global_segment, "consecutive i/o requests");
3810 :
3811 : /* We have now collected n_consecutive i/o requests in the array;
3812 : allocate a single buffer which can hold all data, and perform the
3813 : i/o */
3814 :
3815 909 : total_len = 0;
3816 909 : slot = consecutive_ios[0];
3817 :
3818 6869 : for (i = 0; i < n_consecutive; i++) {
3819 5960 : total_len += consecutive_ios[i]->len;
3820 : }
3821 :
3822 909 : if (n_consecutive == 1) {
3823 : /* We can use the buffer of the i/o request */
3824 477 : combined_buf = slot->buf;
3825 477 : combined_buf2 = NULL;
3826 : } else {
3827 432 : combined_buf2 = ut_malloc(total_len + UNIV_PAGE_SIZE);
3828 :
3829 432 : ut_a(combined_buf2);
3830 :
3831 432 : combined_buf = ut_align(combined_buf2, UNIV_PAGE_SIZE);
3832 : }
3833 :
3834 : /* We release the array mutex for the time of the i/o: NOTE that
3835 : this assumes that there is just one i/o-handler thread serving
3836 : a single segment of slots! */
3837 :
3838 909 : os_mutex_exit(array->mutex);
3839 :
3840 909 : if (slot->type == OS_FILE_WRITE && n_consecutive > 1) {
3841 : /* Copy the buffers to the combined buffer */
3842 370 : offs = 0;
3843 :
3844 5621 : for (i = 0; i < n_consecutive; i++) {
3845 :
3846 5251 : ut_memcpy(combined_buf + offs, consecutive_ios[i]->buf,
3847 : consecutive_ios[i]->len);
3848 5251 : offs += consecutive_ios[i]->len;
3849 : }
3850 : }
3851 :
3852 909 : srv_set_io_thread_op_info(global_segment, "doing file i/o");
3853 :
3854 909 : if (os_aio_print_debug) {
3855 0 : fprintf(stderr,
3856 : "InnoDB: doing i/o of type %lu at offset %lu %lu,"
3857 : " length %lu\n",
3858 : (ulong) slot->type, (ulong) slot->offset_high,
3859 : (ulong) slot->offset, (ulong) total_len);
3860 : }
3861 :
3862 : /* Do the i/o with ordinary, synchronous i/o functions: */
3863 909 : if (slot->type == OS_FILE_WRITE) {
3864 812 : ret = os_file_write(slot->name, slot->file, combined_buf,
3865 : slot->offset, slot->offset_high,
3866 : total_len);
3867 : } else {
3868 97 : ret = os_file_read(slot->file, combined_buf,
3869 : slot->offset, slot->offset_high, total_len);
3870 : }
3871 :
3872 909 : ut_a(ret);
3873 909 : srv_set_io_thread_op_info(global_segment, "file i/o done");
3874 :
3875 : #if 0
3876 : fprintf(stderr,
3877 : "aio: %lu consecutive %lu:th segment, first offs %lu blocks\n",
3878 : n_consecutive, global_segment, slot->offset / UNIV_PAGE_SIZE);
3879 : #endif
3880 :
3881 909 : if (slot->type == OS_FILE_READ && n_consecutive > 1) {
3882 : /* Copy the combined buffer to individual buffers */
3883 62 : offs = 0;
3884 :
3885 294 : for (i = 0; i < n_consecutive; i++) {
3886 :
3887 232 : ut_memcpy(consecutive_ios[i]->buf, combined_buf + offs,
3888 : consecutive_ios[i]->len);
3889 232 : offs += consecutive_ios[i]->len;
3890 : }
3891 : }
3892 :
3893 909 : if (combined_buf2) {
3894 432 : ut_free(combined_buf2);
3895 : }
3896 :
3897 909 : os_mutex_enter(array->mutex);
3898 :
3899 : /* Mark the i/os done in slots */
3900 :
3901 6869 : for (i = 0; i < n_consecutive; i++) {
3902 5960 : consecutive_ios[i]->io_already_done = TRUE;
3903 : }
3904 :
3905 : /* We return the messages for the first slot now, and if there were
3906 : several slots, the messages will be returned with subsequent calls
3907 : of this function */
3908 :
3909 5960 : slot_io_done:
3910 :
3911 5960 : ut_a(slot->reserved);
3912 :
3913 5960 : *message1 = slot->message1;
3914 5960 : *message2 = slot->message2;
3915 :
3916 5960 : *type = slot->type;
3917 :
3918 5960 : os_mutex_exit(array->mutex);
3919 :
3920 5960 : os_aio_array_free_slot(array, slot);
3921 :
3922 5960 : return(ret);
3923 :
3924 454 : wait_for_io:
3925 454 : srv_set_io_thread_op_info(global_segment, "resetting wait event");
3926 :
3927 : /* We wait here until there again can be i/os in the segment
3928 : of this thread */
3929 :
3930 454 : os_event_reset(os_aio_segment_wait_events[global_segment]);
3931 :
3932 454 : os_mutex_exit(array->mutex);
3933 :
3934 455 : recommended_sleep:
3935 455 : srv_set_io_thread_op_info(global_segment, "waiting for i/o request");
3936 :
3937 455 : os_event_wait(os_aio_segment_wait_events[global_segment]);
3938 :
3939 371 : if (os_aio_print_debug) {
3940 0 : fprintf(stderr,
3941 : "InnoDB: i/o handler thread for i/o"
3942 : " segment %lu wakes up\n",
3943 : (ulong) global_segment);
3944 : }
3945 :
3946 371 : goto restart;
3947 : }
3948 :
3949 : /**************************************************************************
3950 : Validates the consistency of an aio array. */
3951 : static
3952 : ibool
3953 : os_aio_array_validate(
3954 : /*==================*/
3955 : /* out: TRUE if ok */
3956 : os_aio_array_t* array) /* in: aio wait array */
3957 105 : {
3958 : os_aio_slot_t* slot;
3959 105 : ulint n_reserved = 0;
3960 : ulint i;
3961 :
3962 105 : ut_a(array);
3963 :
3964 105 : os_mutex_enter(array->mutex);
3965 :
3966 105 : ut_a(array->n_slots > 0);
3967 105 : ut_a(array->n_segments > 0);
3968 :
3969 23709 : for (i = 0; i < array->n_slots; i++) {
3970 23604 : slot = os_aio_array_get_nth_slot(array, i);
3971 :
3972 23604 : if (slot->reserved) {
3973 0 : n_reserved++;
3974 0 : ut_a(slot->len > 0);
3975 : }
3976 : }
3977 :
3978 105 : ut_a(array->n_reserved == n_reserved);
3979 :
3980 105 : os_mutex_exit(array->mutex);
3981 :
3982 105 : return(TRUE);
3983 : }
3984 :
3985 : /**************************************************************************
3986 : Validates the consistency the aio system. */
3987 : UNIV_INTERN
3988 : ibool
3989 : os_aio_validate(void)
3990 : /*=================*/
3991 : /* out: TRUE if ok */
3992 21 : {
3993 21 : os_aio_array_validate(os_aio_read_array);
3994 21 : os_aio_array_validate(os_aio_write_array);
3995 21 : os_aio_array_validate(os_aio_ibuf_array);
3996 21 : os_aio_array_validate(os_aio_log_array);
3997 21 : os_aio_array_validate(os_aio_sync_array);
3998 :
3999 21 : return(TRUE);
4000 : }
4001 :
4002 : /**************************************************************************
4003 : Prints info of the aio arrays. */
4004 : UNIV_INTERN
4005 : void
4006 : os_aio_print(
4007 : /*=========*/
4008 : FILE* file) /* in: file where to print */
4009 0 : {
4010 : os_aio_array_t* array;
4011 : os_aio_slot_t* slot;
4012 : ulint n_reserved;
4013 : time_t current_time;
4014 : double time_elapsed;
4015 : double avg_bytes_read;
4016 : ulint i;
4017 :
4018 0 : for (i = 0; i < srv_n_file_io_threads; i++) {
4019 0 : fprintf(file, "I/O thread %lu state: %s (%s)", (ulong) i,
4020 : srv_io_thread_op_info[i],
4021 : srv_io_thread_function[i]);
4022 :
4023 : #ifndef __WIN__
4024 0 : if (os_aio_segment_wait_events[i]->is_set) {
4025 0 : fprintf(file, " ev set");
4026 : }
4027 : #endif
4028 :
4029 0 : fprintf(file, "\n");
4030 : }
4031 :
4032 0 : fputs("Pending normal aio reads:", file);
4033 :
4034 0 : array = os_aio_read_array;
4035 0 : loop:
4036 0 : ut_a(array);
4037 :
4038 0 : os_mutex_enter(array->mutex);
4039 :
4040 0 : ut_a(array->n_slots > 0);
4041 0 : ut_a(array->n_segments > 0);
4042 :
4043 0 : n_reserved = 0;
4044 :
4045 0 : for (i = 0; i < array->n_slots; i++) {
4046 0 : slot = os_aio_array_get_nth_slot(array, i);
4047 :
4048 0 : if (slot->reserved) {
4049 0 : n_reserved++;
4050 : #if 0
4051 : fprintf(stderr, "Reserved slot, messages %p %p\n",
4052 : (void*) slot->message1,
4053 : (void*) slot->message2);
4054 : #endif
4055 0 : ut_a(slot->len > 0);
4056 : }
4057 : }
4058 :
4059 0 : ut_a(array->n_reserved == n_reserved);
4060 :
4061 0 : fprintf(file, " %lu", (ulong) n_reserved);
4062 :
4063 0 : os_mutex_exit(array->mutex);
4064 :
4065 0 : if (array == os_aio_read_array) {
4066 0 : fputs(", aio writes:", file);
4067 :
4068 0 : array = os_aio_write_array;
4069 :
4070 0 : goto loop;
4071 : }
4072 :
4073 0 : if (array == os_aio_write_array) {
4074 0 : fputs(",\n ibuf aio reads:", file);
4075 0 : array = os_aio_ibuf_array;
4076 :
4077 0 : goto loop;
4078 : }
4079 :
4080 0 : if (array == os_aio_ibuf_array) {
4081 0 : fputs(", log i/o's:", file);
4082 0 : array = os_aio_log_array;
4083 :
4084 0 : goto loop;
4085 : }
4086 :
4087 0 : if (array == os_aio_log_array) {
4088 0 : fputs(", sync i/o's:", file);
4089 0 : array = os_aio_sync_array;
4090 :
4091 0 : goto loop;
4092 : }
4093 :
4094 0 : putc('\n', file);
4095 0 : current_time = time(NULL);
4096 0 : time_elapsed = 0.001 + difftime(current_time, os_last_printout);
4097 :
4098 0 : fprintf(file,
4099 : "Pending flushes (fsync) log: %lu; buffer pool: %lu\n"
4100 : "%lu OS file reads, %lu OS file writes, %lu OS fsyncs\n",
4101 : (ulong) fil_n_pending_log_flushes,
4102 : (ulong) fil_n_pending_tablespace_flushes,
4103 : (ulong) os_n_file_reads, (ulong) os_n_file_writes,
4104 : (ulong) os_n_fsyncs);
4105 :
4106 0 : if (os_file_n_pending_preads != 0 || os_file_n_pending_pwrites != 0) {
4107 0 : fprintf(file,
4108 : "%lu pending preads, %lu pending pwrites\n",
4109 : (ulong) os_file_n_pending_preads,
4110 : (ulong) os_file_n_pending_pwrites);
4111 : }
4112 :
4113 0 : if (os_n_file_reads == os_n_file_reads_old) {
4114 0 : avg_bytes_read = 0.0;
4115 : } else {
4116 0 : avg_bytes_read = (double) os_bytes_read_since_printout
4117 : / (os_n_file_reads - os_n_file_reads_old);
4118 : }
4119 :
4120 0 : fprintf(file,
4121 : "%.2f reads/s, %lu avg bytes/read,"
4122 : " %.2f writes/s, %.2f fsyncs/s\n",
4123 : (os_n_file_reads - os_n_file_reads_old)
4124 : / time_elapsed,
4125 : (ulong)avg_bytes_read,
4126 : (os_n_file_writes - os_n_file_writes_old)
4127 : / time_elapsed,
4128 : (os_n_fsyncs - os_n_fsyncs_old)
4129 : / time_elapsed);
4130 :
4131 0 : os_n_file_reads_old = os_n_file_reads;
4132 0 : os_n_file_writes_old = os_n_file_writes;
4133 0 : os_n_fsyncs_old = os_n_fsyncs;
4134 0 : os_bytes_read_since_printout = 0;
4135 :
4136 0 : os_last_printout = current_time;
4137 0 : }
4138 :
4139 : /**************************************************************************
4140 : Refreshes the statistics used to print per-second averages. */
4141 : UNIV_INTERN
4142 : void
4143 : os_aio_refresh_stats(void)
4144 : /*======================*/
4145 8 : {
4146 8 : os_n_file_reads_old = os_n_file_reads;
4147 8 : os_n_file_writes_old = os_n_file_writes;
4148 8 : os_n_fsyncs_old = os_n_fsyncs;
4149 8 : os_bytes_read_since_printout = 0;
4150 :
4151 8 : os_last_printout = time(NULL);
4152 8 : }
4153 :
4154 : #ifdef UNIV_DEBUG
4155 : /**************************************************************************
4156 : Checks that all slots in the system have been freed, that is, there are
4157 : no pending io operations. */
4158 : UNIV_INTERN
4159 : ibool
4160 : os_aio_all_slots_free(void)
4161 : /*=======================*/
4162 : /* out: TRUE if all free */
4163 : {
4164 : os_aio_array_t* array;
4165 : ulint n_res = 0;
4166 :
4167 : array = os_aio_read_array;
4168 :
4169 : os_mutex_enter(array->mutex);
4170 :
4171 : n_res += array->n_reserved;
4172 :
4173 : os_mutex_exit(array->mutex);
4174 :
4175 : array = os_aio_write_array;
4176 :
4177 : os_mutex_enter(array->mutex);
4178 :
4179 : n_res += array->n_reserved;
4180 :
4181 : os_mutex_exit(array->mutex);
4182 :
4183 : array = os_aio_ibuf_array;
4184 :
4185 : os_mutex_enter(array->mutex);
4186 :
4187 : n_res += array->n_reserved;
4188 :
4189 : os_mutex_exit(array->mutex);
4190 :
4191 : array = os_aio_log_array;
4192 :
4193 : os_mutex_enter(array->mutex);
4194 :
4195 : n_res += array->n_reserved;
4196 :
4197 : os_mutex_exit(array->mutex);
4198 :
4199 : array = os_aio_sync_array;
4200 :
4201 : os_mutex_enter(array->mutex);
4202 :
4203 : n_res += array->n_reserved;
4204 :
4205 : os_mutex_exit(array->mutex);
4206 :
4207 : if (n_res == 0) {
4208 :
4209 : return(TRUE);
4210 : }
4211 :
4212 : return(FALSE);
4213 : }
4214 : #endif /* UNIV_DEBUG */
|