LTP GCOV extension - code coverage report
Current view: directory - storage/innobase/os - os0file.c
Test: all.out
Date: 2009-03-04 Instrumented lines: 840
Code covered: 53.9 % Executed lines: 453

       1                 : /******************************************************
       2                 : The interface to the operating system file i/o primitives
       3                 : 
       4                 : (c) 1995 Innobase Oy
       5                 : 
       6                 : Created 10/21/1995 Heikki Tuuri
       7                 : *******************************************************/
       8                 : 
       9                 : #include "os0file.h"
      10                 : #include "os0sync.h"
      11                 : #include "os0thread.h"
      12                 : #include "ut0mem.h"
      13                 : #include "srv0srv.h"
      14                 : #include "srv0start.h"
      15                 : #include "fil0fil.h"
      16                 : #include "buf0buf.h"
      17                 : 
      18                 : #if defined(UNIV_HOTBACKUP) && defined(__WIN__)
      19                 : /* Add includes for the _stat() call to compile on Windows */
      20                 : #include <sys/types.h>
      21                 : #include <sys/stat.h>
      22                 : #include <errno.h>
      23                 : #endif /* UNIV_HOTBACKUP */
      24                 : 
      25                 : /* This specifies the file permissions InnoDB uses when it creates files in
      26                 : Unix; the value of os_innodb_umask is initialized in ha_innodb.cc to
      27                 : my_umask */
      28                 : 
      29                 : #ifndef __WIN__
      30                 : UNIV_INTERN ulint       os_innodb_umask
      31                 :                         = S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP;
      32                 : #else
      33                 : UNIV_INTERN ulint       os_innodb_umask         = 0;
      34                 : #endif
      35                 : 
      36                 : #ifdef UNIV_DO_FLUSH
      37                 : /* If the following is set to TRUE, we do not call os_file_flush in every
      38                 : os_file_write. We can set this TRUE when the doublewrite buffer is used. */
      39                 : UNIV_INTERN ibool       os_do_not_call_flush_at_each_write      = FALSE;
      40                 : #else
      41                 : /* We do not call os_file_flush in every os_file_write. */
      42                 : #endif /* UNIV_DO_FLUSH */
      43                 : 
      44                 : /* We use these mutexes to protect lseek + file i/o operation, if the
      45                 : OS does not provide an atomic pread or pwrite, or similar */
      46                 : #define OS_FILE_N_SEEK_MUTEXES  16
      47                 : UNIV_INTERN os_mutex_t  os_file_seek_mutexes[OS_FILE_N_SEEK_MUTEXES];
      48                 : 
      49                 : /* In simulated aio, merge at most this many consecutive i/os */
      50                 : #define OS_AIO_MERGE_N_CONSECUTIVE      64
      51                 : 
      52                 : /* If this flag is TRUE, then we will use the native aio of the
      53                 : OS (provided we compiled Innobase with it in), otherwise we will
      54                 : use simulated aio we build below with threads */
      55                 : 
      56                 : UNIV_INTERN ibool       os_aio_use_native_aio   = FALSE;
      57                 : 
      58                 : UNIV_INTERN ibool       os_aio_print_debug      = FALSE;
      59                 : 
      60                 : /* The aio array slot structure */
      61                 : typedef struct os_aio_slot_struct       os_aio_slot_t;
      62                 : 
      63                 : struct os_aio_slot_struct{
      64                 :         ibool           is_read;        /* TRUE if a read operation */
      65                 :         ulint           pos;            /* index of the slot in the aio
      66                 :                                         array */
      67                 :         ibool           reserved;       /* TRUE if this slot is reserved */
      68                 :         time_t          reservation_time;/* time when reserved */
      69                 :         ulint           len;            /* length of the block to read or
      70                 :                                         write */
      71                 :         byte*           buf;            /* buffer used in i/o */
      72                 :         ulint           type;           /* OS_FILE_READ or OS_FILE_WRITE */
      73                 :         ulint           offset;         /* 32 low bits of file offset in
      74                 :                                         bytes */
      75                 :         ulint           offset_high;    /* 32 high bits of file offset */
      76                 :         os_file_t       file;           /* file where to read or write */
      77                 :         const char*     name;           /* file name or path */
      78                 :         ibool           io_already_done;/* used only in simulated aio:
      79                 :                                         TRUE if the physical i/o already
      80                 :                                         made and only the slot message
      81                 :                                         needs to be passed to the caller
      82                 :                                         of os_aio_simulated_handle */
      83                 :         fil_node_t*     message1;       /* message which is given by the */
      84                 :         void*           message2;       /* the requester of an aio operation
      85                 :                                         and which can be used to identify
      86                 :                                         which pending aio operation was
      87                 :                                         completed */
      88                 : #ifdef WIN_ASYNC_IO
      89                 :         os_event_t      event;          /* event object we need in the
      90                 :                                         OVERLAPPED struct */
      91                 :         OVERLAPPED      control;        /* Windows control block for the
      92                 :                                         aio request */
      93                 : #endif
      94                 : };
      95                 : 
      96                 : /* The aio array structure */
      97                 : typedef struct os_aio_array_struct      os_aio_array_t;
      98                 : 
      99                 : struct os_aio_array_struct{
     100                 :         os_mutex_t      mutex;    /* the mutex protecting the aio array */
     101                 :         os_event_t      not_full; /* The event which is set to the signaled
     102                 :                                   state when there is space in the aio
     103                 :                                   outside the ibuf segment */
     104                 :         os_event_t      is_empty; /* The event which is set to the signaled
     105                 :                                   state when there are no pending i/os
     106                 :                                   in this array */
     107                 :         ulint           n_slots;  /* Total number of slots in the aio array.
     108                 :                                   This must be divisible by n_threads. */
     109                 :         ulint           n_segments;/* Number of segments in the aio array of
     110                 :                                   pending aio requests. A thread can wait
     111                 :                                   separately for any one of the segments. */
     112                 :         ulint           n_reserved;/* Number of reserved slots in the
     113                 :                                   aio array outside the ibuf segment */
     114                 :         os_aio_slot_t*  slots;    /* Pointer to the slots in the array */
     115                 : #ifdef __WIN__
     116                 :         os_native_event_t* native_events;
     117                 :                                   /* Pointer to an array of OS native event
     118                 :                                   handles where we copied the handles from
     119                 :                                   slots, in the same order. This can be used
     120                 :                                   in WaitForMultipleObjects; used only in
     121                 :                                   Windows */
     122                 : #endif
     123                 : };
     124                 : 
     125                 : /* Array of events used in simulated aio */
     126                 : static os_event_t*      os_aio_segment_wait_events      = NULL;
     127                 : 
     128                 : /* The aio arrays for non-ibuf i/o and ibuf i/o, as well as sync aio. These
     129                 : are NULL when the module has not yet been initialized. */
     130                 : static os_aio_array_t*  os_aio_read_array       = NULL;
     131                 : static os_aio_array_t*  os_aio_write_array      = NULL;
     132                 : static os_aio_array_t*  os_aio_ibuf_array       = NULL;
     133                 : static os_aio_array_t*  os_aio_log_array        = NULL;
     134                 : static os_aio_array_t*  os_aio_sync_array       = NULL;
     135                 : 
     136                 : static ulint    os_aio_n_segments       = ULINT_UNDEFINED;
     137                 : 
     138                 : /* If the following is TRUE, read i/o handler threads try to
     139                 : wait until a batch of new read requests have been posted */
     140                 : static ibool    os_aio_recommend_sleep_for_read_threads = FALSE;
     141                 : 
     142                 : UNIV_INTERN ulint       os_n_file_reads         = 0;
     143                 : UNIV_INTERN ulint       os_bytes_read_since_printout = 0;
     144                 : UNIV_INTERN ulint       os_n_file_writes        = 0;
     145                 : UNIV_INTERN ulint       os_n_fsyncs             = 0;
     146                 : UNIV_INTERN ulint       os_n_file_reads_old     = 0;
     147                 : UNIV_INTERN ulint       os_n_file_writes_old    = 0;
     148                 : UNIV_INTERN ulint       os_n_fsyncs_old         = 0;
     149                 : UNIV_INTERN time_t      os_last_printout;
     150                 : 
     151                 : UNIV_INTERN ibool       os_has_said_disk_full   = FALSE;
     152                 : 
     153                 : /* The mutex protecting the following counts of pending I/O operations */
     154                 : static os_mutex_t       os_file_count_mutex;
     155                 : UNIV_INTERN ulint       os_file_n_pending_preads  = 0;
     156                 : UNIV_INTERN ulint       os_file_n_pending_pwrites = 0;
     157                 : UNIV_INTERN ulint       os_n_pending_writes = 0;
     158                 : UNIV_INTERN ulint       os_n_pending_reads = 0;
     159                 : 
     160                 : /***************************************************************************
     161                 : Gets the operating system version. Currently works only on Windows. */
     162                 : UNIV_INTERN
     163                 : ulint
     164                 : os_get_os_version(void)
     165                 : /*===================*/
     166                 :                   /* out: OS_WIN95, OS_WIN31, OS_WINNT, OS_WIN2000 */
     167               0 : {
     168                 : #ifdef __WIN__
     169                 :         OSVERSIONINFO     os_info;
     170                 : 
     171                 :         os_info.dwOSVersionInfoSize = sizeof(OSVERSIONINFO);
     172                 : 
     173                 :         ut_a(GetVersionEx(&os_info));
     174                 : 
     175                 :         if (os_info.dwPlatformId == VER_PLATFORM_WIN32s) {
     176                 :                 return(OS_WIN31);
     177                 :         } else if (os_info.dwPlatformId == VER_PLATFORM_WIN32_WINDOWS) {
     178                 :                 return(OS_WIN95);
     179                 :         } else if (os_info.dwPlatformId == VER_PLATFORM_WIN32_NT) {
     180                 :                 if (os_info.dwMajorVersion <= 4) {
     181                 :                         return(OS_WINNT);
     182                 :                 } else {
     183                 :                         return(OS_WIN2000);
     184                 :                 }
     185                 :         } else {
     186                 :                 ut_error;
     187                 :                 return(0);
     188                 :         }
     189                 : #else
     190               0 :         ut_error;
     191                 : 
     192                 :         return(0);
     193                 : #endif
     194                 : }
     195                 : 
     196                 : /***************************************************************************
     197                 : Retrieves the last error number if an error occurs in a file io function.
     198                 : The number should be retrieved before any other OS calls (because they may
     199                 : overwrite the error number). If the number is not known to this program,
     200                 : the OS error number + 100 is returned. */
     201                 : UNIV_INTERN
     202                 : ulint
     203                 : os_file_get_last_error(
     204                 : /*===================*/
     205                 :                                         /* out: error number, or OS error
     206                 :                                         number + 100 */
     207                 :         ibool   report_all_errors)      /* in: TRUE if we want an error message
     208                 :                                         printed of all errors */
     209             114 : {
     210                 :         ulint   err;
     211                 : 
     212                 : #ifdef __WIN__
     213                 : 
     214                 :         err = (ulint) GetLastError();
     215                 : 
     216                 :         if (report_all_errors
     217                 :             || (err != ERROR_DISK_FULL && err != ERROR_FILE_EXISTS)) {
     218                 : 
     219                 :                 ut_print_timestamp(stderr);
     220                 :                 fprintf(stderr,
     221                 :                         "  InnoDB: Operating system error number %lu"
     222                 :                         " in a file operation.\n", (ulong) err);
     223                 : 
     224                 :                 if (err == ERROR_PATH_NOT_FOUND) {
     225                 :                         fprintf(stderr,
     226                 :                                 "InnoDB: The error means the system"
     227                 :                                 " cannot find the path specified.\n");
     228                 : 
     229                 :                         if (srv_is_being_started) {
     230                 :                                 fprintf(stderr,
     231                 :                                         "InnoDB: If you are installing InnoDB,"
     232                 :                                         " remember that you must create\n"
     233                 :                                         "InnoDB: directories yourself, InnoDB"
     234                 :                                         " does not create them.\n");
     235                 :                         }
     236                 :                 } else if (err == ERROR_ACCESS_DENIED) {
     237                 :                         fprintf(stderr,
     238                 :                                 "InnoDB: The error means mysqld does not have"
     239                 :                                 " the access rights to\n"
     240                 :                                 "InnoDB: the directory. It may also be"
     241                 :                                 " you have created a subdirectory\n"
     242                 :                                 "InnoDB: of the same name as a data file.\n");
     243                 :                 } else if (err == ERROR_SHARING_VIOLATION
     244                 :                            || err == ERROR_LOCK_VIOLATION) {
     245                 :                         fprintf(stderr,
     246                 :                                 "InnoDB: The error means that another program"
     247                 :                                 " is using InnoDB's files.\n"
     248                 :                                 "InnoDB: This might be a backup or antivirus"
     249                 :                                 " software or another instance\n"
     250                 :                                 "InnoDB: of MySQL."
     251                 :                                 " Please close it to get rid of this error.\n");
     252                 :                 } else {
     253                 :                         fprintf(stderr,
     254                 :                                 "InnoDB: Some operating system error numbers"
     255                 :                                 " are described at\n"
     256                 :                                 "InnoDB: "
     257                 :                                 "http://dev.mysql.com/doc/refman/5.1/en/"
     258                 :                                 "operating-system-error-codes.html\n");
     259                 :                 }
     260                 :         }
     261                 : 
     262                 :         fflush(stderr);
     263                 : 
     264                 :         if (err == ERROR_FILE_NOT_FOUND) {
     265                 :                 return(OS_FILE_NOT_FOUND);
     266                 :         } else if (err == ERROR_DISK_FULL) {
     267                 :                 return(OS_FILE_DISK_FULL);
     268                 :         } else if (err == ERROR_FILE_EXISTS) {
     269                 :                 return(OS_FILE_ALREADY_EXISTS);
     270                 :         } else if (err == ERROR_SHARING_VIOLATION
     271                 :                    || err == ERROR_LOCK_VIOLATION) {
     272                 :                 return(OS_FILE_SHARING_VIOLATION);
     273                 :         } else {
     274                 :                 return(100 + err);
     275                 :         }
     276                 : #else
     277             114 :         err = (ulint) errno;
     278                 : 
     279             114 :         if (report_all_errors
     280                 :             || (err != ENOSPC && err != EEXIST)) {
     281                 : 
     282               0 :                 ut_print_timestamp(stderr);
     283               0 :                 fprintf(stderr,
     284                 :                         "  InnoDB: Operating system error number %lu"
     285                 :                         " in a file operation.\n", (ulong) err);
     286                 : 
     287               0 :                 if (err == ENOENT) {
     288               0 :                         fprintf(stderr,
     289                 :                                 "InnoDB: The error means the system"
     290                 :                                 " cannot find the path specified.\n");
     291                 : 
     292               0 :                         if (srv_is_being_started) {
     293               0 :                                 fprintf(stderr,
     294                 :                                         "InnoDB: If you are installing InnoDB,"
     295                 :                                         " remember that you must create\n"
     296                 :                                         "InnoDB: directories yourself, InnoDB"
     297                 :                                         " does not create them.\n");
     298                 :                         }
     299               0 :                 } else if (err == EACCES) {
     300               0 :                         fprintf(stderr,
     301                 :                                 "InnoDB: The error means mysqld does not have"
     302                 :                                 " the access rights to\n"
     303                 :                                 "InnoDB: the directory.\n");
     304                 :                 } else {
     305               0 :                         if (strerror((int)err) != NULL) {
     306               0 :                                 fprintf(stderr,
     307                 :                                         "InnoDB: Error number %lu"
     308                 :                                         " means '%s'.\n",
     309                 :                                         err, strerror((int)err));
     310                 :                         }
     311                 : 
     312               0 :                         fprintf(stderr,
     313                 :                                 "InnoDB: Some operating system"
     314                 :                                 " error numbers are described at\n"
     315                 :                                 "InnoDB: "
     316                 :                                 "http://dev.mysql.com/doc/refman/5.1/en/"
     317                 :                                 "operating-system-error-codes.html\n");
     318                 :                 }
     319                 :         }
     320                 : 
     321             114 :         fflush(stderr);
     322                 : 
     323             114 :         if (err == ENOSPC) {
     324               0 :                 return(OS_FILE_DISK_FULL);
     325             114 :         } else if (err == ENOENT) {
     326               0 :                 return(OS_FILE_NOT_FOUND);
     327             114 :         } else if (err == EEXIST) {
     328             114 :                 return(OS_FILE_ALREADY_EXISTS);
     329               0 :         } else if (err == EXDEV || err == ENOTDIR || err == EISDIR) {
     330               0 :                 return(OS_FILE_PATH_ERROR);
     331                 :         } else {
     332               0 :                 return(100 + err);
     333                 :         }
     334                 : #endif
     335                 : }
     336                 : 
     337                 : /********************************************************************
     338                 : Does error handling when a file operation fails.
     339                 : Conditionally exits (calling exit(3)) based on should_exit value and the
     340                 : error type */
     341                 : static
     342                 : ibool
     343                 : os_file_handle_error_cond_exit(
     344                 : /*===========================*/
     345                 :                                         /* out: TRUE if we should retry the
     346                 :                                         operation */
     347                 :         const char*     name,           /* in: name of a file or NULL */
     348                 :         const char*     operation,      /* in: operation */
     349                 :         ibool           should_exit)    /* in: call exit(3) if unknown error
     350                 :                                         and this parameter is TRUE */
     351              57 : {
     352                 :         ulint   err;
     353                 : 
     354              57 :         err = os_file_get_last_error(FALSE);
     355                 : 
     356              57 :         if (err == OS_FILE_DISK_FULL) {
     357                 :                 /* We only print a warning about disk full once */
     358                 : 
     359               0 :                 if (os_has_said_disk_full) {
     360                 : 
     361               0 :                         return(FALSE);
     362                 :                 }
     363                 : 
     364               0 :                 if (name) {
     365               0 :                         ut_print_timestamp(stderr);
     366               0 :                         fprintf(stderr,
     367                 :                                 "  InnoDB: Encountered a problem with"
     368                 :                                 " file %s\n", name);
     369                 :                 }
     370                 : 
     371               0 :                 ut_print_timestamp(stderr);
     372               0 :                 fprintf(stderr,
     373                 :                         "  InnoDB: Disk is full. Try to clean the disk"
     374                 :                         " to free space.\n");
     375                 : 
     376               0 :                 os_has_said_disk_full = TRUE;
     377                 : 
     378               0 :                 fflush(stderr);
     379                 : 
     380               0 :                 return(FALSE);
     381              57 :         } else if (err == OS_FILE_AIO_RESOURCES_RESERVED) {
     382                 : 
     383               0 :                 return(TRUE);
     384              57 :         } else if (err == OS_FILE_ALREADY_EXISTS
     385                 :                    || err == OS_FILE_PATH_ERROR) {
     386                 : 
     387              57 :                 return(FALSE);
     388               0 :         } else if (err == OS_FILE_SHARING_VIOLATION) {
     389                 : 
     390               0 :                 os_thread_sleep(10000000);  /* 10 sec */
     391               0 :                 return(TRUE);
     392                 :         } else {
     393               0 :                 if (name) {
     394               0 :                         fprintf(stderr, "InnoDB: File name %s\n", name);
     395                 :                 }
     396                 : 
     397               0 :                 fprintf(stderr, "InnoDB: File operation call: '%s'.\n",
     398                 :                         operation);
     399                 : 
     400               0 :                 if (should_exit) {
     401               0 :                         fprintf(stderr, "InnoDB: Cannot continue operation.\n");
     402                 : 
     403               0 :                         fflush(stderr);
     404                 : 
     405               0 :                         exit(1);
     406                 :                 }
     407                 :         }
     408                 : 
     409               0 :         return(FALSE);
     410                 : }
     411                 : 
     412                 : /********************************************************************
     413                 : Does error handling when a file operation fails. */
     414                 : static
     415                 : ibool
     416                 : os_file_handle_error(
     417                 : /*=================*/
     418                 :                                 /* out: TRUE if we should retry the
     419                 :                                 operation */
     420                 :         const char*     name,   /* in: name of a file or NULL */
     421                 :         const char*     operation)/* in: operation */
     422              57 : {
     423                 :         /* exit in case of unknown error */
     424              57 :         return(os_file_handle_error_cond_exit(name, operation, TRUE));
     425                 : }
     426                 : 
     427                 : /********************************************************************
     428                 : Does error handling when a file operation fails. */
     429                 : static
     430                 : ibool
     431                 : os_file_handle_error_no_exit(
     432                 : /*=========================*/
     433                 :                                 /* out: TRUE if we should retry the
     434                 :                                 operation */
     435                 :         const char*     name,   /* in: name of a file or NULL */
     436                 :         const char*     operation)/* in: operation */
     437               0 : {
     438                 :         /* don't exit in case of unknown error */
     439               0 :         return(os_file_handle_error_cond_exit(name, operation, FALSE));
     440                 : }
     441                 : 
     442                 : #undef USE_FILE_LOCK
     443                 : #define USE_FILE_LOCK
     444                 : #if defined(UNIV_HOTBACKUP) || defined(__WIN__) || defined(__NETWARE__)
     445                 : /* InnoDB Hot Backup does not lock the data files.
     446                 :  * On Windows, mandatory locking is used.
     447                 :  */
     448                 : # undef USE_FILE_LOCK
     449                 : #endif
     450                 : #ifdef USE_FILE_LOCK
     451                 : /********************************************************************
     452                 : Obtain an exclusive lock on a file. */
     453                 : static
     454                 : int
     455                 : os_file_lock(
     456                 : /*=========*/
     457                 :                                 /* out: 0 on success */
     458                 :         int             fd,     /* in: file descriptor */
     459                 :         const char*     name)   /* in: file name */
     460             126 : {
     461                 :         struct flock lk;
     462             126 :         lk.l_type = F_WRLCK;
     463             126 :         lk.l_whence = SEEK_SET;
     464             126 :         lk.l_start = lk.l_len = 0;
     465             126 :         if (fcntl(fd, F_SETLK, &lk) == -1) {
     466               0 :                 fprintf(stderr,
     467                 :                         "InnoDB: Unable to lock %s, error: %d\n", name, errno);
     468                 : 
     469               0 :                 if (errno == EAGAIN || errno == EACCES) {
     470               0 :                         fprintf(stderr,
     471                 :                                 "InnoDB: Check that you do not already have"
     472                 :                                 " another mysqld process\n"
     473                 :                                 "InnoDB: using the same InnoDB data"
     474                 :                                 " or log files.\n");
     475                 :                 }
     476                 : 
     477               0 :                 return(-1);
     478                 :         }
     479                 : 
     480             126 :         return(0);
     481                 : }
     482                 : #endif /* USE_FILE_LOCK */
     483                 : 
     484                 : /********************************************************************
     485                 : Creates the seek mutexes used in positioned reads and writes. */
     486                 : UNIV_INTERN
     487                 : void
     488                 : os_io_init_simple(void)
     489                 : /*===================*/
     490              21 : {
     491                 :         ulint   i;
     492                 : 
     493              21 :         os_file_count_mutex = os_mutex_create(NULL);
     494                 : 
     495             357 :         for (i = 0; i < OS_FILE_N_SEEK_MUTEXES; i++) {
     496             336 :                 os_file_seek_mutexes[i] = os_mutex_create(NULL);
     497                 :         }
     498              21 : }
     499                 : 
     500                 : /***************************************************************************
     501                 : Creates a temporary file.  This function is like tmpfile(3), but
     502                 : the temporary file is created in the MySQL temporary directory.
     503                 : On Netware, this function is like tmpfile(3), because the C run-time
     504                 : library of Netware does not expose the delete-on-close flag. */
     505                 : UNIV_INTERN
     506                 : FILE*
     507                 : os_file_create_tmpfile(void)
     508                 : /*========================*/
     509                 :                         /* out: temporary file handle, or NULL on error */
     510             105 : {
     511                 : #ifdef UNIV_HOTBACKUP
     512                 :         ut_error;
     513                 : 
     514                 :         return(NULL);
     515                 : #else
     516                 : # ifdef __NETWARE__
     517                 :         FILE*   file    = tmpfile();
     518                 : # else /* __NETWARE__ */
     519             105 :         FILE*   file    = NULL;
     520             105 :         int     fd      = innobase_mysql_tmpfile();
     521                 : 
     522             105 :         if (fd >= 0) {
     523             105 :                 file = fdopen(fd, "w+b");
     524                 :         }
     525                 : # endif /* __NETWARE__ */
     526                 : 
     527             105 :         if (!file) {
     528               0 :                 ut_print_timestamp(stderr);
     529               0 :                 fprintf(stderr,
     530                 :                         "  InnoDB: Error: unable to create temporary file;"
     531                 :                         " errno: %d\n", errno);
     532                 : # ifndef __NETWARE__
     533               0 :                 if (fd >= 0) {
     534               0 :                         close(fd);
     535                 :                 }
     536                 : # endif /* !__NETWARE__ */
     537                 :         }
     538                 : 
     539             105 :         return(file);
     540                 : #endif /* UNIV_HOTBACKUP */
     541                 : }
     542                 : 
     543                 : /***************************************************************************
     544                 : The os_file_opendir() function opens a directory stream corresponding to the
     545                 : directory named by the dirname argument. The directory stream is positioned
     546                 : at the first entry. In both Unix and Windows we automatically skip the '.'
     547                 : and '..' items at the start of the directory listing. */
     548                 : UNIV_INTERN
     549                 : os_file_dir_t
     550                 : os_file_opendir(
     551                 : /*============*/
     552                 :                                         /* out: directory stream, NULL if
     553                 :                                         error */
     554                 :         const char*     dirname,        /* in: directory name; it must not
     555                 :                                         contain a trailing '\' or '/' */
     556                 :         ibool           error_is_fatal) /* in: TRUE if we should treat an
     557                 :                                         error as a fatal error; if we try to
     558                 :                                         open symlinks then we do not wish a
     559                 :                                         fatal error if it happens not to be
     560                 :                                         a directory */
     561               0 : {
     562                 :         os_file_dir_t           dir;
     563                 : #ifdef __WIN__
     564                 :         LPWIN32_FIND_DATA       lpFindFileData;
     565                 :         char                    path[OS_FILE_MAX_PATH + 3];
     566                 : 
     567                 :         ut_a(strlen(dirname) < OS_FILE_MAX_PATH);
     568                 : 
     569                 :         strcpy(path, dirname);
     570                 :         strcpy(path + strlen(path), "\\*");
     571                 : 
     572                 :         /* Note that in Windows opening the 'directory stream' also retrieves
     573                 :         the first entry in the directory. Since it is '.', that is no problem,
     574                 :         as we will skip over the '.' and '..' entries anyway. */
     575                 : 
     576                 :         lpFindFileData = ut_malloc(sizeof(WIN32_FIND_DATA));
     577                 : 
     578                 :         dir = FindFirstFile((LPCTSTR) path, lpFindFileData);
     579                 : 
     580                 :         ut_free(lpFindFileData);
     581                 : 
     582                 :         if (dir == INVALID_HANDLE_VALUE) {
     583                 : 
     584                 :                 if (error_is_fatal) {
     585                 :                         os_file_handle_error(dirname, "opendir");
     586                 :                 }
     587                 : 
     588                 :                 return(NULL);
     589                 :         }
     590                 : 
     591                 :         return(dir);
     592                 : #else
     593               0 :         dir = opendir(dirname);
     594                 : 
     595               0 :         if (dir == NULL && error_is_fatal) {
     596               0 :                 os_file_handle_error(dirname, "opendir");
     597                 :         }
     598                 : 
     599               0 :         return(dir);
     600                 : #endif
     601                 : }
     602                 : 
     603                 : /***************************************************************************
     604                 : Closes a directory stream. */
     605                 : UNIV_INTERN
     606                 : int
     607                 : os_file_closedir(
     608                 : /*=============*/
     609                 :                                 /* out: 0 if success, -1 if failure */
     610                 :         os_file_dir_t   dir)    /* in: directory stream */
     611               0 : {
     612                 : #ifdef __WIN__
     613                 :         BOOL            ret;
     614                 : 
     615                 :         ret = FindClose(dir);
     616                 : 
     617                 :         if (!ret) {
     618                 :                 os_file_handle_error_no_exit(NULL, "closedir");
     619                 : 
     620                 :                 return(-1);
     621                 :         }
     622                 : 
     623                 :         return(0);
     624                 : #else
     625                 :         int     ret;
     626                 : 
     627               0 :         ret = closedir(dir);
     628                 : 
     629               0 :         if (ret) {
     630               0 :                 os_file_handle_error_no_exit(NULL, "closedir");
     631                 :         }
     632                 : 
     633               0 :         return(ret);
     634                 : #endif
     635                 : }
     636                 : 
     637                 : /***************************************************************************
     638                 : This function returns information of the next file in the directory. We jump
     639                 : over the '.' and '..' entries in the directory. */
     640                 : UNIV_INTERN
     641                 : int
     642                 : os_file_readdir_next_file(
     643                 : /*======================*/
     644                 :                                 /* out: 0 if ok, -1 if error, 1 if at the end
     645                 :                                 of the directory */
     646                 :         const char*     dirname,/* in: directory name or path */
     647                 :         os_file_dir_t   dir,    /* in: directory stream */
     648                 :         os_file_stat_t* info)   /* in/out: buffer where the info is returned */
     649               0 : {
     650                 : #ifdef __WIN__
     651                 :         LPWIN32_FIND_DATA       lpFindFileData;
     652                 :         BOOL                    ret;
     653                 : 
     654                 :         lpFindFileData = ut_malloc(sizeof(WIN32_FIND_DATA));
     655                 : next_file:
     656                 :         ret = FindNextFile(dir, lpFindFileData);
     657                 : 
     658                 :         if (ret) {
     659                 :                 ut_a(strlen((char *) lpFindFileData->cFileName)
     660                 :                      < OS_FILE_MAX_PATH);
     661                 : 
     662                 :                 if (strcmp((char *) lpFindFileData->cFileName, ".") == 0
     663                 :                     || strcmp((char *) lpFindFileData->cFileName, "..") == 0) {
     664                 : 
     665                 :                         goto next_file;
     666                 :                 }
     667                 : 
     668                 :                 strcpy(info->name, (char *) lpFindFileData->cFileName);
     669                 : 
     670                 :                 info->size = (ib_int64_t)(lpFindFileData->nFileSizeLow)
     671                 :                         + (((ib_int64_t)(lpFindFileData->nFileSizeHigh))
     672                 :                            << 32);
     673                 : 
     674                 :                 if (lpFindFileData->dwFileAttributes
     675                 :                     & FILE_ATTRIBUTE_REPARSE_POINT) {
     676                 :                         /* TODO: test Windows symlinks */
     677                 :                         /* TODO: MySQL has apparently its own symlink
     678                 :                         implementation in Windows, dbname.sym can
     679                 :                         redirect a database directory:
     680                 :                         http://dev.mysql.com/doc/refman/5.1/en/
     681                 :                         windows-symbolic-links.html */
     682                 :                         info->type = OS_FILE_TYPE_LINK;
     683                 :                 } else if (lpFindFileData->dwFileAttributes
     684                 :                            & FILE_ATTRIBUTE_DIRECTORY) {
     685                 :                         info->type = OS_FILE_TYPE_DIR;
     686                 :                 } else {
     687                 :                         /* It is probably safest to assume that all other
     688                 :                         file types are normal. Better to check them rather
     689                 :                         than blindly skip them. */
     690                 : 
     691                 :                         info->type = OS_FILE_TYPE_FILE;
     692                 :                 }
     693                 :         }
     694                 : 
     695                 :         ut_free(lpFindFileData);
     696                 : 
     697                 :         if (ret) {
     698                 :                 return(0);
     699                 :         } else if (GetLastError() == ERROR_NO_MORE_FILES) {
     700                 : 
     701                 :                 return(1);
     702                 :         } else {
     703                 :                 os_file_handle_error_no_exit(dirname,
     704                 :                                              "readdir_next_file");
     705                 :                 return(-1);
     706                 :         }
     707                 : #else
     708                 :         struct dirent*  ent;
     709                 :         char*           full_path;
     710                 :         int             ret;
     711                 :         struct stat     statinfo;
     712                 : #ifdef HAVE_READDIR_R
     713                 :         char            dirent_buf[sizeof(struct dirent)
     714                 :                                    + _POSIX_PATH_MAX + 100];
     715                 :         /* In /mysys/my_lib.c, _POSIX_PATH_MAX + 1 is used as
     716                 :         the max file name len; but in most standards, the
     717                 :         length is NAME_MAX; we add 100 to be even safer */
     718                 : #endif
     719                 : 
     720               0 : next_file:
     721                 : 
     722                 : #ifdef HAVE_READDIR_R
     723               0 :         ret = readdir_r(dir, (struct dirent*)dirent_buf, &ent);
     724                 : 
     725               0 :         if (ret != 0) {
     726               0 :                 fprintf(stderr,
     727                 :                         "InnoDB: cannot read directory %s, error %lu\n",
     728                 :                         dirname, (ulong)ret);
     729                 : 
     730               0 :                 return(-1);
     731                 :         }
     732                 : 
     733               0 :         if (ent == NULL) {
     734                 :                 /* End of directory */
     735                 : 
     736               0 :                 return(1);
     737                 :         }
     738                 : 
     739               0 :         ut_a(strlen(ent->d_name) < _POSIX_PATH_MAX + 100 - 1);
     740                 : #else
     741                 :         ent = readdir(dir);
     742                 : 
     743                 :         if (ent == NULL) {
     744                 : 
     745                 :                 return(1);
     746                 :         }
     747                 : #endif
     748               0 :         ut_a(strlen(ent->d_name) < OS_FILE_MAX_PATH);
     749                 : 
     750               0 :         if (strcmp(ent->d_name, ".") == 0 || strcmp(ent->d_name, "..") == 0) {
     751                 : 
     752                 :                 goto next_file;
     753                 :         }
     754                 : 
     755               0 :         strcpy(info->name, ent->d_name);
     756                 : 
     757               0 :         full_path = ut_malloc(strlen(dirname) + strlen(ent->d_name) + 10);
     758                 : 
     759               0 :         sprintf(full_path, "%s/%s", dirname, ent->d_name);
     760                 : 
     761               0 :         ret = stat(full_path, &statinfo);
     762                 : 
     763               0 :         if (ret) {
     764               0 :                 os_file_handle_error_no_exit(full_path, "stat");
     765                 : 
     766               0 :                 ut_free(full_path);
     767                 : 
     768               0 :                 return(-1);
     769                 :         }
     770                 : 
     771               0 :         info->size = (ib_int64_t)statinfo.st_size;
     772                 : 
     773               0 :         if (S_ISDIR(statinfo.st_mode)) {
     774               0 :                 info->type = OS_FILE_TYPE_DIR;
     775               0 :         } else if (S_ISLNK(statinfo.st_mode)) {
     776               0 :                 info->type = OS_FILE_TYPE_LINK;
     777               0 :         } else if (S_ISREG(statinfo.st_mode)) {
     778               0 :                 info->type = OS_FILE_TYPE_FILE;
     779                 :         } else {
     780               0 :                 info->type = OS_FILE_TYPE_UNKNOWN;
     781                 :         }
     782                 : 
     783               0 :         ut_free(full_path);
     784                 : 
     785               0 :         return(0);
     786                 : #endif
     787                 : }
     788                 : 
     789                 : /*********************************************************************
     790                 : This function attempts to create a directory named pathname. The new directory
     791                 : gets default permissions. On Unix the permissions are (0770 & ~umask). If the
     792                 : directory exists already, nothing is done and the call succeeds, unless the
     793                 : fail_if_exists arguments is true. */
     794                 : UNIV_INTERN
     795                 : ibool
     796                 : os_file_create_directory(
     797                 : /*=====================*/
     798                 :                                         /* out: TRUE if call succeeds,
     799                 :                                         FALSE on error */
     800                 :         const char*     pathname,       /* in: directory name as
     801                 :                                         null-terminated string */
     802                 :         ibool           fail_if_exists) /* in: if TRUE, pre-existing directory
     803                 :                                         is treated as an error. */
     804               0 : {
     805                 : #ifdef __WIN__
     806                 :         BOOL    rcode;
     807                 : 
     808                 :         rcode = CreateDirectory((LPCTSTR) pathname, NULL);
     809                 :         if (!(rcode != 0
     810                 :               || (GetLastError() == ERROR_ALREADY_EXISTS
     811                 :                   && !fail_if_exists))) {
     812                 :                 /* failure */
     813                 :                 os_file_handle_error(pathname, "CreateDirectory");
     814                 : 
     815                 :                 return(FALSE);
     816                 :         }
     817                 : 
     818                 :         return (TRUE);
     819                 : #else
     820                 :         int     rcode;
     821                 : 
     822               0 :         rcode = mkdir(pathname, 0770);
     823                 : 
     824               0 :         if (!(rcode == 0 || (errno == EEXIST && !fail_if_exists))) {
     825                 :                 /* failure */
     826               0 :                 os_file_handle_error(pathname, "mkdir");
     827                 : 
     828               0 :                 return(FALSE);
     829                 :         }
     830                 : 
     831               0 :         return (TRUE);
     832                 : #endif
     833                 : }
     834                 : 
     835                 : /********************************************************************
     836                 : A simple function to open or create a file. */
     837                 : UNIV_INTERN
     838                 : os_file_t
     839                 : os_file_create_simple(
     840                 : /*==================*/
     841                 :                                 /* out, own: handle to the file, not defined
     842                 :                                 if error, error number can be retrieved with
     843                 :                                 os_file_get_last_error */
     844                 :         const char*     name,   /* in: name of the file or path as a
     845                 :                                 null-terminated string */
     846                 :         ulint           create_mode,/* in: OS_FILE_OPEN if an existing file is
     847                 :                                 opened (if does not exist, error), or
     848                 :                                 OS_FILE_CREATE if a new file is created
     849                 :                                 (if exists, error), or
     850                 :                                 OS_FILE_CREATE_PATH if new file
     851                 :                                 (if exists, error) and subdirectories along
     852                 :                                 its path are created (if needed)*/
     853                 :         ulint           access_type,/* in: OS_FILE_READ_ONLY or
     854                 :                                 OS_FILE_READ_WRITE */
     855                 :         ibool*          success)/* out: TRUE if succeed, FALSE if error */
     856               0 : {
     857                 : #ifdef __WIN__
     858                 :         os_file_t       file;
     859                 :         DWORD           create_flag;
     860                 :         DWORD           access;
     861                 :         DWORD           attributes      = 0;
     862                 :         ibool           retry;
     863                 : 
     864                 : try_again:
     865                 :         ut_a(name);
     866                 : 
     867                 :         if (create_mode == OS_FILE_OPEN) {
     868                 :                 create_flag = OPEN_EXISTING;
     869                 :         } else if (create_mode == OS_FILE_CREATE) {
     870                 :                 create_flag = CREATE_NEW;
     871                 :         } else if (create_mode == OS_FILE_CREATE_PATH) {
     872                 :                 /* create subdirs along the path if needed  */
     873                 :                 *success = os_file_create_subdirs_if_needed(name);
     874                 :                 if (!*success) {
     875                 :                         ut_error;
     876                 :                 }
     877                 :                 create_flag = CREATE_NEW;
     878                 :                 create_mode = OS_FILE_CREATE;
     879                 :         } else {
     880                 :                 create_flag = 0;
     881                 :                 ut_error;
     882                 :         }
     883                 : 
     884                 :         if (access_type == OS_FILE_READ_ONLY) {
     885                 :                 access = GENERIC_READ;
     886                 :         } else if (access_type == OS_FILE_READ_WRITE) {
     887                 :                 access = GENERIC_READ | GENERIC_WRITE;
     888                 :         } else {
     889                 :                 access = 0;
     890                 :                 ut_error;
     891                 :         }
     892                 : 
     893                 :         file = CreateFile((LPCTSTR) name,
     894                 :                           access,
     895                 :                           FILE_SHARE_READ | FILE_SHARE_WRITE,
     896                 :                           /* file can be read and written also
     897                 :                           by other processes */
     898                 :                           NULL, /* default security attributes */
     899                 :                           create_flag,
     900                 :                           attributes,
     901                 :                           NULL);        /* no template file */
     902                 : 
     903                 :         if (file == INVALID_HANDLE_VALUE) {
     904                 :                 *success = FALSE;
     905                 : 
     906                 :                 retry = os_file_handle_error(name,
     907                 :                                              create_mode == OS_FILE_OPEN ?
     908                 :                                              "open" : "create");
     909                 :                 if (retry) {
     910                 :                         goto try_again;
     911                 :                 }
     912                 :         } else {
     913                 :                 *success = TRUE;
     914                 :         }
     915                 : 
     916                 :         return(file);
     917                 : #else /* __WIN__ */
     918                 :         os_file_t       file;
     919                 :         int             create_flag;
     920                 :         ibool           retry;
     921                 : 
     922               0 : try_again:
     923               0 :         ut_a(name);
     924                 : 
     925               0 :         if (create_mode == OS_FILE_OPEN) {
     926               0 :                 if (access_type == OS_FILE_READ_ONLY) {
     927               0 :                         create_flag = O_RDONLY;
     928                 :                 } else {
     929               0 :                         create_flag = O_RDWR;
     930                 :                 }
     931               0 :         } else if (create_mode == OS_FILE_CREATE) {
     932               0 :                 create_flag = O_RDWR | O_CREAT | O_EXCL;
     933               0 :         } else if (create_mode == OS_FILE_CREATE_PATH) {
     934                 :                 /* create subdirs along the path if needed  */
     935               0 :                 *success = os_file_create_subdirs_if_needed(name);
     936               0 :                 if (!*success) {
     937               0 :                         return (-1);
     938                 :                 }
     939               0 :                 create_flag = O_RDWR | O_CREAT | O_EXCL;
     940               0 :                 create_mode = OS_FILE_CREATE;
     941                 :         } else {
     942               0 :                 create_flag = 0;
     943               0 :                 ut_error;
     944                 :         }
     945                 : 
     946               0 :         if (create_mode == OS_FILE_CREATE) {
     947               0 :                 file = open(name, create_flag, S_IRUSR | S_IWUSR
     948                 :                             | S_IRGRP | S_IWGRP);
     949                 :         } else {
     950               0 :                 file = open(name, create_flag);
     951                 :         }
     952                 : 
     953               0 :         if (file == -1) {
     954               0 :                 *success = FALSE;
     955                 : 
     956               0 :                 retry = os_file_handle_error(name,
     957                 :                                              create_mode == OS_FILE_OPEN ?
     958                 :                                              "open" : "create");
     959               0 :                 if (retry) {
     960               0 :                         goto try_again;
     961                 :                 }
     962                 : #ifdef USE_FILE_LOCK
     963               0 :         } else if (access_type == OS_FILE_READ_WRITE
     964                 :                    && os_file_lock(file, name)) {
     965               0 :                 *success = FALSE;
     966               0 :                 close(file);
     967               0 :                 file = -1;
     968                 : #endif
     969                 :         } else {
     970               0 :                 *success = TRUE;
     971                 :         }
     972                 : 
     973               0 :         return(file);
     974                 : #endif /* __WIN__ */
     975                 : }
     976                 : 
     977                 : /********************************************************************
     978                 : A simple function to open or create a file. */
     979                 : UNIV_INTERN
     980                 : os_file_t
     981                 : os_file_create_simple_no_error_handling(
     982                 : /*====================================*/
     983                 :                                 /* out, own: handle to the file, not defined
     984                 :                                 if error, error number can be retrieved with
     985                 :                                 os_file_get_last_error */
     986                 :         const char*     name,   /* in: name of the file or path as a
     987                 :                                 null-terminated string */
     988                 :         ulint           create_mode,/* in: OS_FILE_OPEN if an existing file
     989                 :                                 is opened (if does not exist, error), or
     990                 :                                 OS_FILE_CREATE if a new file is created
     991                 :                                 (if exists, error) */
     992                 :         ulint           access_type,/* in: OS_FILE_READ_ONLY,
     993                 :                                 OS_FILE_READ_WRITE, or
     994                 :                                 OS_FILE_READ_ALLOW_DELETE; the last option is
     995                 :                                 used by a backup program reading the file */
     996                 :         ibool*          success)/* out: TRUE if succeed, FALSE if error */
     997               0 : {
     998                 : #ifdef __WIN__
     999                 :         os_file_t       file;
    1000                 :         DWORD           create_flag;
    1001                 :         DWORD           access;
    1002                 :         DWORD           attributes      = 0;
    1003                 :         DWORD           share_mode      = FILE_SHARE_READ | FILE_SHARE_WRITE;
    1004                 : 
    1005                 :         ut_a(name);
    1006                 : 
    1007                 :         if (create_mode == OS_FILE_OPEN) {
    1008                 :                 create_flag = OPEN_EXISTING;
    1009                 :         } else if (create_mode == OS_FILE_CREATE) {
    1010                 :                 create_flag = CREATE_NEW;
    1011                 :         } else {
    1012                 :                 create_flag = 0;
    1013                 :                 ut_error;
    1014                 :         }
    1015                 : 
    1016                 :         if (access_type == OS_FILE_READ_ONLY) {
    1017                 :                 access = GENERIC_READ;
    1018                 :         } else if (access_type == OS_FILE_READ_WRITE) {
    1019                 :                 access = GENERIC_READ | GENERIC_WRITE;
    1020                 :         } else if (access_type == OS_FILE_READ_ALLOW_DELETE) {
    1021                 :                 access = GENERIC_READ;
    1022                 :                 share_mode = FILE_SHARE_DELETE | FILE_SHARE_READ
    1023                 :                         | FILE_SHARE_WRITE;     /* A backup program has to give
    1024                 :                                                 mysqld the maximum freedom to
    1025                 :                                                 do what it likes with the
    1026                 :                                                 file */
    1027                 :         } else {
    1028                 :                 access = 0;
    1029                 :                 ut_error;
    1030                 :         }
    1031                 : 
    1032                 :         file = CreateFile((LPCTSTR) name,
    1033                 :                           access,
    1034                 :                           share_mode,
    1035                 :                           NULL, /* default security attributes */
    1036                 :                           create_flag,
    1037                 :                           attributes,
    1038                 :                           NULL);        /* no template file */
    1039                 : 
    1040                 :         if (file == INVALID_HANDLE_VALUE) {
    1041                 :                 *success = FALSE;
    1042                 :         } else {
    1043                 :                 *success = TRUE;
    1044                 :         }
    1045                 : 
    1046                 :         return(file);
    1047                 : #else /* __WIN__ */
    1048                 :         os_file_t       file;
    1049                 :         int             create_flag;
    1050                 : 
    1051               0 :         ut_a(name);
    1052                 : 
    1053               0 :         if (create_mode == OS_FILE_OPEN) {
    1054               0 :                 if (access_type == OS_FILE_READ_ONLY) {
    1055               0 :                         create_flag = O_RDONLY;
    1056                 :                 } else {
    1057               0 :                         create_flag = O_RDWR;
    1058                 :                 }
    1059               0 :         } else if (create_mode == OS_FILE_CREATE) {
    1060               0 :                 create_flag = O_RDWR | O_CREAT | O_EXCL;
    1061                 :         } else {
    1062               0 :                 create_flag = 0;
    1063               0 :                 ut_error;
    1064                 :         }
    1065                 : 
    1066               0 :         if (create_mode == OS_FILE_CREATE) {
    1067               0 :                 file = open(name, create_flag, S_IRUSR | S_IWUSR
    1068                 :                             | S_IRGRP | S_IWGRP);
    1069                 :         } else {
    1070               0 :                 file = open(name, create_flag);
    1071                 :         }
    1072                 : 
    1073               0 :         if (file == -1) {
    1074               0 :                 *success = FALSE;
    1075                 : #ifdef USE_FILE_LOCK
    1076               0 :         } else if (access_type == OS_FILE_READ_WRITE
    1077                 :                    && os_file_lock(file, name)) {
    1078               0 :                 *success = FALSE;
    1079               0 :                 close(file);
    1080               0 :                 file = -1;
    1081                 : #endif
    1082                 :         } else {
    1083               0 :                 *success = TRUE;
    1084                 :         }
    1085                 : 
    1086               0 :         return(file);
    1087                 : #endif /* __WIN__ */
    1088                 : }
    1089                 : 
    1090                 : /********************************************************************
    1091                 : Tries to disable OS caching on an opened file descriptor. */
    1092                 : UNIV_INTERN
    1093                 : void
    1094                 : os_file_set_nocache(
    1095                 : /*================*/
    1096                 :         int             fd,             /* in: file descriptor to alter */
    1097                 :         const char*     file_name,      /* in: file name, used in the
    1098                 :                                         diagnostic message */
    1099                 :         const char*     operation_name) /* in: "open" or "create"; used in the
    1100                 :                                         diagnostic message */
    1101               0 : {
    1102                 :         /* some versions of Solaris may not have DIRECTIO_ON */
    1103                 : #if defined(UNIV_SOLARIS) && defined(DIRECTIO_ON)
    1104                 :         if (directio(fd, DIRECTIO_ON) == -1) {
    1105                 :                 int     errno_save;
    1106                 :                 errno_save = (int)errno;
    1107                 :                 ut_print_timestamp(stderr);
    1108                 :                 fprintf(stderr,
    1109                 :                         "  InnoDB: Failed to set DIRECTIO_ON "
    1110                 :                         "on file %s: %s: %s, continuing anyway\n",
    1111                 :                         file_name, operation_name, strerror(errno_save));
    1112                 :         }
    1113                 : #elif defined(O_DIRECT)
    1114               0 :         if (fcntl(fd, F_SETFL, O_DIRECT) == -1) {
    1115                 :                 int     errno_save;
    1116               0 :                 errno_save = (int)errno;
    1117               0 :                 ut_print_timestamp(stderr);
    1118               0 :                 fprintf(stderr,
    1119                 :                         "  InnoDB: Failed to set O_DIRECT "
    1120                 :                         "on file %s: %s: %s, continuing anyway\n",
    1121                 :                         file_name, operation_name, strerror(errno_save));
    1122               0 :                 if (errno_save == EINVAL) {
    1123               0 :                         ut_print_timestamp(stderr);
    1124               0 :                         fprintf(stderr,
    1125                 :                                 "  InnoDB: O_DIRECT is known to result in "
    1126                 :                                 "'Invalid argument' on Linux on tmpfs, "
    1127                 :                                 "see MySQL Bug#26662\n");
    1128                 :                 }
    1129                 :         }
    1130                 : #else /* Required for OSX */
    1131                 :         (void)fd;
    1132                 :         (void)file_name;
    1133                 :         (void)operation_name;
    1134                 : #endif
    1135               0 : }
    1136                 : 
    1137                 : /********************************************************************
    1138                 : Opens an existing file or creates a new. */
    1139                 : UNIV_INTERN
    1140                 : os_file_t
    1141                 : os_file_create(
    1142                 : /*===========*/
    1143                 :                                 /* out, own: handle to the file, not defined
    1144                 :                                 if error, error number can be retrieved with
    1145                 :                                 os_file_get_last_error */
    1146                 :         const char*     name,   /* in: name of the file or path as a
    1147                 :                                 null-terminated string */
    1148                 :         ulint           create_mode,/* in: OS_FILE_OPEN if an existing file
    1149                 :                                 is opened (if does not exist, error), or
    1150                 :                                 OS_FILE_CREATE if a new file is created
    1151                 :                                 (if exists, error),
    1152                 :                                 OS_FILE_OVERWRITE if a new file is created
    1153                 :                                 or an old overwritten;
    1154                 :                                 OS_FILE_OPEN_RAW, if a raw device or disk
    1155                 :                                 partition should be opened */
    1156                 :         ulint           purpose,/* in: OS_FILE_AIO, if asynchronous,
    1157                 :                                 non-buffered i/o is desired,
    1158                 :                                 OS_FILE_NORMAL, if any normal file;
    1159                 :                                 NOTE that it also depends on type, os_aio_..
    1160                 :                                 and srv_.. variables whether we really use
    1161                 :                                 async i/o or unbuffered i/o: look in the
    1162                 :                                 function source code for the exact rules */
    1163                 :         ulint           type,   /* in: OS_DATA_FILE or OS_LOG_FILE */
    1164                 :         ibool*          success)/* out: TRUE if succeed, FALSE if error */
    1165             183 : {
    1166                 : #ifdef __WIN__
    1167                 :         os_file_t       file;
    1168                 :         DWORD           share_mode      = FILE_SHARE_READ;
    1169                 :         DWORD           create_flag;
    1170                 :         DWORD           attributes;
    1171                 :         ibool           retry;
    1172                 : try_again:
    1173                 :         ut_a(name);
    1174                 : 
    1175                 :         if (create_mode == OS_FILE_OPEN_RAW) {
    1176                 :                 create_flag = OPEN_EXISTING;
    1177                 :                 share_mode = FILE_SHARE_WRITE;
    1178                 :         } else if (create_mode == OS_FILE_OPEN
    1179                 :                    || create_mode == OS_FILE_OPEN_RETRY) {
    1180                 :                 create_flag = OPEN_EXISTING;
    1181                 :         } else if (create_mode == OS_FILE_CREATE) {
    1182                 :                 create_flag = CREATE_NEW;
    1183                 :         } else if (create_mode == OS_FILE_OVERWRITE) {
    1184                 :                 create_flag = CREATE_ALWAYS;
    1185                 :         } else {
    1186                 :                 create_flag = 0;
    1187                 :                 ut_error;
    1188                 :         }
    1189                 : 
    1190                 :         if (purpose == OS_FILE_AIO) {
    1191                 :                 /* If specified, use asynchronous (overlapped) io and no
    1192                 :                 buffering of writes in the OS */
    1193                 :                 attributes = 0;
    1194                 : #ifdef WIN_ASYNC_IO
    1195                 :                 if (os_aio_use_native_aio) {
    1196                 :                         attributes = attributes | FILE_FLAG_OVERLAPPED;
    1197                 :                 }
    1198                 : #endif
    1199                 : #ifdef UNIV_NON_BUFFERED_IO
    1200                 :                 if (type == OS_LOG_FILE && srv_flush_log_at_trx_commit == 2) {
    1201                 :                         /* Do not use unbuffered i/o to log files because
    1202                 :                         value 2 denotes that we do not flush the log at every
    1203                 :                         commit, but only once per second */
    1204                 :                 } else if (srv_win_file_flush_method
    1205                 :                            == SRV_WIN_IO_UNBUFFERED) {
    1206                 :                         attributes = attributes | FILE_FLAG_NO_BUFFERING;
    1207                 :                 }
    1208                 : #endif
    1209                 :         } else if (purpose == OS_FILE_NORMAL) {
    1210                 :                 attributes = 0;
    1211                 : #ifdef UNIV_NON_BUFFERED_IO
    1212                 :                 if (type == OS_LOG_FILE && srv_flush_log_at_trx_commit == 2) {
    1213                 :                         /* Do not use unbuffered i/o to log files because
    1214                 :                         value 2 denotes that we do not flush the log at every
    1215                 :                         commit, but only once per second */
    1216                 :                 } else if (srv_win_file_flush_method
    1217                 :                            == SRV_WIN_IO_UNBUFFERED) {
    1218                 :                         attributes = attributes | FILE_FLAG_NO_BUFFERING;
    1219                 :                 }
    1220                 : #endif
    1221                 :         } else {
    1222                 :                 attributes = 0;
    1223                 :                 ut_error;
    1224                 :         }
    1225                 : 
    1226                 :         file = CreateFile((LPCTSTR) name,
    1227                 :                           GENERIC_READ | GENERIC_WRITE, /* read and write
    1228                 :                                                         access */
    1229                 :                           share_mode,   /* File can be read also by other
    1230                 :                                         processes; we must give the read
    1231                 :                                         permission because of ibbackup. We do
    1232                 :                                         not give the write permission to
    1233                 :                                         others because if one would succeed to
    1234                 :                                         start 2 instances of mysqld on the
    1235                 :                                         SAME files, that could cause severe
    1236                 :                                         database corruption! When opening
    1237                 :                                         raw disk partitions, Microsoft manuals
    1238                 :                                         say that we must give also the write
    1239                 :                                         permission. */
    1240                 :                           NULL, /* default security attributes */
    1241                 :                           create_flag,
    1242                 :                           attributes,
    1243                 :                           NULL);        /* no template file */
    1244                 : 
    1245                 :         if (file == INVALID_HANDLE_VALUE) {
    1246                 :                 *success = FALSE;
    1247                 : 
    1248                 :                 /* When srv_file_per_table is on, file creation failure may not
    1249                 :                 be critical to the whole instance. Do not crash the server in
    1250                 :                 case of unknown errors. */
    1251                 :                 if (srv_file_per_table) {
    1252                 :                         retry = os_file_handle_error_no_exit(name,
    1253                 :                                                 create_mode == OS_FILE_CREATE ?
    1254                 :                                                 "create" : "open");
    1255                 :                 } else {
    1256                 :                         retry = os_file_handle_error(name,
    1257                 :                                                 create_mode == OS_FILE_CREATE ?
    1258                 :                                                 "create" : "open");
    1259                 :                 }
    1260                 : 
    1261                 :                 if (retry) {
    1262                 :                         goto try_again;
    1263                 :                 }
    1264                 :         } else {
    1265                 :                 *success = TRUE;
    1266                 :         }
    1267                 : 
    1268                 :         return(file);
    1269                 : #else /* __WIN__ */
    1270                 :         os_file_t       file;
    1271                 :         int             create_flag;
    1272                 :         ibool           retry;
    1273             183 :         const char*     mode_str        = NULL;
    1274             183 :         const char*     type_str        = NULL;
    1275             183 :         const char*     purpose_str     = NULL;
    1276                 : 
    1277             183 : try_again:
    1278             183 :         ut_a(name);
    1279                 : 
    1280             303 :         if (create_mode == OS_FILE_OPEN || create_mode == OS_FILE_OPEN_RAW
    1281                 :             || create_mode == OS_FILE_OPEN_RETRY) {
    1282             120 :                 mode_str = "OPEN";
    1283             120 :                 create_flag = O_RDWR;
    1284              63 :         } else if (create_mode == OS_FILE_CREATE) {
    1285              63 :                 mode_str = "CREATE";
    1286              63 :                 create_flag = O_RDWR | O_CREAT | O_EXCL;
    1287               0 :         } else if (create_mode == OS_FILE_OVERWRITE) {
    1288               0 :                 mode_str = "OVERWRITE";
    1289               0 :                 create_flag = O_RDWR | O_CREAT | O_TRUNC;
    1290                 :         } else {
    1291               0 :                 create_flag = 0;
    1292               0 :                 ut_error;
    1293                 :         }
    1294                 : 
    1295             183 :         if (type == OS_LOG_FILE) {
    1296             122 :                 type_str = "LOG";
    1297              61 :         } else if (type == OS_DATA_FILE) {
    1298              61 :                 type_str = "DATA";
    1299                 :         } else {
    1300               0 :                 ut_error;
    1301                 :         }
    1302                 : 
    1303             183 :         if (purpose == OS_FILE_AIO) {
    1304             101 :                 purpose_str = "AIO";
    1305              82 :         } else if (purpose == OS_FILE_NORMAL) {
    1306              82 :                 purpose_str = "NORMAL";
    1307                 :         } else {
    1308               0 :                 ut_error;
    1309                 :         }
    1310                 : 
    1311                 : #if 0
    1312                 :         fprintf(stderr, "Opening file %s, mode %s, type %s, purpose %s\n",
    1313                 :                 name, mode_str, type_str, purpose_str);
    1314                 : #endif
    1315                 : #ifdef O_SYNC
    1316                 :         /* We let O_SYNC only affect log files; note that we map O_DSYNC to
    1317                 :         O_SYNC because the datasync options seemed to corrupt files in 2001
    1318                 :         in both Linux and Solaris */
    1319             183 :         if (type == OS_LOG_FILE
    1320                 :             && srv_unix_file_flush_method == SRV_UNIX_O_DSYNC) {
    1321                 : 
    1322                 : # if 0
    1323                 :                 fprintf(stderr, "Using O_SYNC for file %s\n", name);
    1324                 : # endif
    1325                 : 
    1326               0 :                 create_flag = create_flag | O_SYNC;
    1327                 :         }
    1328                 : #endif /* O_SYNC */
    1329                 : 
    1330             183 :         file = open(name, create_flag, os_innodb_umask);
    1331                 : 
    1332             183 :         if (file == -1) {
    1333              57 :                 *success = FALSE;
    1334                 : 
    1335                 :                 /* When srv_file_per_table is on, file creation failure may not
    1336                 :                 be critical to the whole instance. Do not crash the server in
    1337                 :                 case of unknown errors. */
    1338              57 :                 if (srv_file_per_table) {
    1339               0 :                         retry = os_file_handle_error_no_exit(name,
    1340                 :                                                 create_mode == OS_FILE_CREATE ?
    1341                 :                                                 "create" : "open");
    1342                 :                 } else {
    1343              57 :                         retry = os_file_handle_error(name,
    1344                 :                                                 create_mode == OS_FILE_CREATE ?
    1345                 :                                                 "create" : "open");
    1346                 :                 }
    1347                 : 
    1348              57 :                 if (retry) {
    1349               0 :                         goto try_again;
    1350                 :                 } else {
    1351              57 :                         return(file /* -1 */);
    1352                 :                 }
    1353                 :         }
    1354                 :         /* else */
    1355                 : 
    1356             126 :         *success = TRUE;
    1357                 : 
    1358                 :         /* We disable OS caching (O_DIRECT) only on data files */
    1359             126 :         if (type != OS_LOG_FILE
    1360                 :             && srv_unix_file_flush_method == SRV_UNIX_O_DIRECT) {
    1361                 :                 
    1362               0 :                 os_file_set_nocache(file, name, mode_str);
    1363                 :         }
    1364                 : 
    1365                 : #ifdef USE_FILE_LOCK
    1366             126 :         if (create_mode != OS_FILE_OPEN_RAW && os_file_lock(file, name)) {
    1367                 : 
    1368               0 :                 if (create_mode == OS_FILE_OPEN_RETRY) {
    1369                 :                         int i;
    1370               0 :                         ut_print_timestamp(stderr);
    1371               0 :                         fputs("  InnoDB: Retrying to lock"
    1372                 :                               " the first data file\n",
    1373                 :                               stderr);
    1374               0 :                         for (i = 0; i < 100; i++) {
    1375               0 :                                 os_thread_sleep(1000000);
    1376               0 :                                 if (!os_file_lock(file, name)) {
    1377               0 :                                         *success = TRUE;
    1378               0 :                                         return(file);
    1379                 :                                 }
    1380                 :                         }
    1381               0 :                         ut_print_timestamp(stderr);
    1382               0 :                         fputs("  InnoDB: Unable to open the first data file\n",
    1383                 :                               stderr);
    1384                 :                 }
    1385                 : 
    1386               0 :                 *success = FALSE;
    1387               0 :                 close(file);
    1388               0 :                 file = -1;
    1389                 :         }
    1390                 : #endif /* USE_FILE_LOCK */
    1391                 : 
    1392             126 :         return(file);
    1393                 : #endif /* __WIN__ */
    1394                 : }
    1395                 : 
    1396                 : /***************************************************************************
    1397                 : Deletes a file if it exists. The file has to be closed before calling this. */
    1398                 : UNIV_INTERN
    1399                 : ibool
    1400                 : os_file_delete_if_exists(
    1401                 : /*=====================*/
    1402                 :                                 /* out: TRUE if success */
    1403                 :         const char*     name)   /* in: file path as a null-terminated string */
    1404               0 : {
    1405                 : #ifdef __WIN__
    1406                 :         BOOL    ret;
    1407                 :         ulint   count   = 0;
    1408                 : loop:
    1409                 :         /* In Windows, deleting an .ibd file may fail if ibbackup is copying
    1410                 :         it */
    1411                 : 
    1412                 :         ret = DeleteFile((LPCTSTR)name);
    1413                 : 
    1414                 :         if (ret) {
    1415                 :                 return(TRUE);
    1416                 :         }
    1417                 : 
    1418                 :         if (GetLastError() == ERROR_FILE_NOT_FOUND) {
    1419                 :                 /* the file does not exist, this not an error */
    1420                 : 
    1421                 :                 return(TRUE);
    1422                 :         }
    1423                 : 
    1424                 :         count++;
    1425                 : 
    1426                 :         if (count > 100 && 0 == (count % 10)) {
    1427                 :                 fprintf(stderr,
    1428                 :                         "InnoDB: Warning: cannot delete file %s\n"
    1429                 :                         "InnoDB: Are you running ibbackup"
    1430                 :                         " to back up the file?\n", name);
    1431                 : 
    1432                 :                 os_file_get_last_error(TRUE); /* print error information */
    1433                 :         }
    1434                 : 
    1435                 :         os_thread_sleep(1000000);       /* sleep for a second */
    1436                 : 
    1437                 :         if (count > 2000) {
    1438                 : 
    1439                 :                 return(FALSE);
    1440                 :         }
    1441                 : 
    1442                 :         goto loop;
    1443                 : #else
    1444                 :         int     ret;
    1445                 : 
    1446               0 :         ret = unlink(name);
    1447                 : 
    1448               0 :         if (ret != 0 && errno != ENOENT) {
    1449               0 :                 os_file_handle_error_no_exit(name, "delete");
    1450                 : 
    1451               0 :                 return(FALSE);
    1452                 :         }
    1453                 : 
    1454               0 :         return(TRUE);
    1455                 : #endif
    1456                 : }
    1457                 : 
    1458                 : /***************************************************************************
    1459                 : Deletes a file. The file has to be closed before calling this. */
    1460                 : UNIV_INTERN
    1461                 : ibool
    1462                 : os_file_delete(
    1463                 : /*===========*/
    1464                 :                                 /* out: TRUE if success */
    1465                 :         const char*     name)   /* in: file path as a null-terminated string */
    1466               0 : {
    1467                 : #ifdef __WIN__
    1468                 :         BOOL    ret;
    1469                 :         ulint   count   = 0;
    1470                 : loop:
    1471                 :         /* In Windows, deleting an .ibd file may fail if ibbackup is copying
    1472                 :         it */
    1473                 : 
    1474                 :         ret = DeleteFile((LPCTSTR)name);
    1475                 : 
    1476                 :         if (ret) {
    1477                 :                 return(TRUE);
    1478                 :         }
    1479                 : 
    1480                 :         if (GetLastError() == ERROR_FILE_NOT_FOUND) {
    1481                 :                 /* If the file does not exist, we classify this as a 'mild'
    1482                 :                 error and return */
    1483                 : 
    1484                 :                 return(FALSE);
    1485                 :         }
    1486                 : 
    1487                 :         count++;
    1488                 : 
    1489                 :         if (count > 100 && 0 == (count % 10)) {
    1490                 :                 fprintf(stderr,
    1491                 :                         "InnoDB: Warning: cannot delete file %s\n"
    1492                 :                         "InnoDB: Are you running ibbackup"
    1493                 :                         " to back up the file?\n", name);
    1494                 : 
    1495                 :                 os_file_get_last_error(TRUE); /* print error information */
    1496                 :         }
    1497                 : 
    1498                 :         os_thread_sleep(1000000);       /* sleep for a second */
    1499                 : 
    1500                 :         if (count > 2000) {
    1501                 : 
    1502                 :                 return(FALSE);
    1503                 :         }
    1504                 : 
    1505                 :         goto loop;
    1506                 : #else
    1507                 :         int     ret;
    1508                 : 
    1509               0 :         ret = unlink(name);
    1510                 : 
    1511               0 :         if (ret != 0) {
    1512               0 :                 os_file_handle_error_no_exit(name, "delete");
    1513                 : 
    1514               0 :                 return(FALSE);
    1515                 :         }
    1516                 : 
    1517               0 :         return(TRUE);
    1518                 : #endif
    1519                 : }
    1520                 : 
    1521                 : /***************************************************************************
    1522                 : Renames a file (can also move it to another directory). It is safest that the
    1523                 : file is closed before calling this function. */
    1524                 : UNIV_INTERN
    1525                 : ibool
    1526                 : os_file_rename(
    1527                 : /*===========*/
    1528                 :                                 /* out: TRUE if success */
    1529                 :         const char*     oldpath,/* in: old file path as a null-terminated
    1530                 :                                 string */
    1531                 :         const char*     newpath)/* in: new file path */
    1532               0 : {
    1533                 : #ifdef __WIN__
    1534                 :         BOOL    ret;
    1535                 : 
    1536                 :         ret = MoveFile((LPCTSTR)oldpath, (LPCTSTR)newpath);
    1537                 : 
    1538                 :         if (ret) {
    1539                 :                 return(TRUE);
    1540                 :         }
    1541                 : 
    1542                 :         os_file_handle_error_no_exit(oldpath, "rename");
    1543                 : 
    1544                 :         return(FALSE);
    1545                 : #else
    1546                 :         int     ret;
    1547                 : 
    1548               0 :         ret = rename(oldpath, newpath);
    1549                 : 
    1550               0 :         if (ret != 0) {
    1551               0 :                 os_file_handle_error_no_exit(oldpath, "rename");
    1552                 : 
    1553               0 :                 return(FALSE);
    1554                 :         }
    1555                 : 
    1556               0 :         return(TRUE);
    1557                 : #endif
    1558                 : }
    1559                 : 
    1560                 : /***************************************************************************
    1561                 : Closes a file handle. In case of error, error number can be retrieved with
    1562                 : os_file_get_last_error. */
    1563                 : UNIV_INTERN
    1564                 : ibool
    1565                 : os_file_close(
    1566                 : /*==========*/
    1567                 :                                 /* out: TRUE if success */
    1568                 :         os_file_t       file)   /* in, own: handle to a file */
    1569             126 : {
    1570                 : #ifdef __WIN__
    1571                 :         BOOL    ret;
    1572                 : 
    1573                 :         ut_a(file);
    1574                 : 
    1575                 :         ret = CloseHandle(file);
    1576                 : 
    1577                 :         if (ret) {
    1578                 :                 return(TRUE);
    1579                 :         }
    1580                 : 
    1581                 :         os_file_handle_error(NULL, "close");
    1582                 : 
    1583                 :         return(FALSE);
    1584                 : #else
    1585                 :         int     ret;
    1586                 : 
    1587             126 :         ret = close(file);
    1588                 : 
    1589             126 :         if (ret == -1) {
    1590               0 :                 os_file_handle_error(NULL, "close");
    1591                 : 
    1592               0 :                 return(FALSE);
    1593                 :         }
    1594                 : 
    1595             126 :         return(TRUE);
    1596                 : #endif
    1597                 : }
    1598                 : 
    1599                 : /***************************************************************************
    1600                 : Closes a file handle. */
    1601                 : UNIV_INTERN
    1602                 : ibool
    1603                 : os_file_close_no_error_handling(
    1604                 : /*============================*/
    1605                 :                                 /* out: TRUE if success */
    1606                 :         os_file_t       file)   /* in, own: handle to a file */
    1607               0 : {
    1608                 : #ifdef __WIN__
    1609                 :         BOOL    ret;
    1610                 : 
    1611                 :         ut_a(file);
    1612                 : 
    1613                 :         ret = CloseHandle(file);
    1614                 : 
    1615                 :         if (ret) {
    1616                 :                 return(TRUE);
    1617                 :         }
    1618                 : 
    1619                 :         return(FALSE);
    1620                 : #else
    1621                 :         int     ret;
    1622                 : 
    1623               0 :         ret = close(file);
    1624                 : 
    1625               0 :         if (ret == -1) {
    1626                 : 
    1627               0 :                 return(FALSE);
    1628                 :         }
    1629                 : 
    1630               0 :         return(TRUE);
    1631                 : #endif
    1632                 : }
    1633                 : 
    1634                 : /***************************************************************************
    1635                 : Gets a file size. */
    1636                 : UNIV_INTERN
    1637                 : ibool
    1638                 : os_file_get_size(
    1639                 : /*=============*/
    1640                 :                                 /* out: TRUE if success */
    1641                 :         os_file_t       file,   /* in: handle to a file */
    1642                 :         ulint*          size,   /* out: least significant 32 bits of file
    1643                 :                                 size */
    1644                 :         ulint*          size_high)/* out: most significant 32 bits of size */
    1645              57 : {
    1646                 : #ifdef __WIN__
    1647                 :         DWORD   high;
    1648                 :         DWORD   low;
    1649                 : 
    1650                 :         low = GetFileSize(file, &high);
    1651                 : 
    1652                 :         if ((low == 0xFFFFFFFF) && (GetLastError() != NO_ERROR)) {
    1653                 :                 return(FALSE);
    1654                 :         }
    1655                 : 
    1656                 :         *size = low;
    1657                 :         *size_high = high;
    1658                 : 
    1659                 :         return(TRUE);
    1660                 : #else
    1661                 :         off_t   offs;
    1662                 : 
    1663              57 :         offs = lseek(file, 0, SEEK_END);
    1664                 : 
    1665              57 :         if (offs == ((off_t)-1)) {
    1666                 : 
    1667               0 :                 return(FALSE);
    1668                 :         }
    1669                 : 
    1670                 :         if (sizeof(off_t) > 4) {
    1671              57 :                 *size = (ulint)(offs & 0xFFFFFFFFUL);
    1672              57 :                 *size_high = (ulint)(offs >> 32);
    1673                 :         } else {
    1674                 :                 *size = (ulint) offs;
    1675                 :                 *size_high = 0;
    1676                 :         }
    1677                 : 
    1678              57 :         return(TRUE);
    1679                 : #endif
    1680                 : }
    1681                 : 
    1682                 : /***************************************************************************
    1683                 : Gets file size as a 64-bit integer ib_int64_t. */
    1684                 : UNIV_INTERN
    1685                 : ib_int64_t
    1686                 : os_file_get_size_as_iblonglong(
    1687                 : /*===========================*/
    1688                 :                                 /* out: size in bytes, -1 if error */
    1689                 :         os_file_t       file)   /* in: handle to a file */
    1690               0 : {
    1691                 :         ulint   size;
    1692                 :         ulint   size_high;
    1693                 :         ibool   success;
    1694                 : 
    1695               0 :         success = os_file_get_size(file, &size, &size_high);
    1696                 : 
    1697               0 :         if (!success) {
    1698                 : 
    1699               0 :                 return(-1);
    1700                 :         }
    1701                 : 
    1702               0 :         return((((ib_int64_t)size_high) << 32) + (ib_int64_t)size);
    1703                 : }
    1704                 : 
    1705                 : /***************************************************************************
    1706                 : Write the specified number of zeros to a newly created file. */
    1707                 : UNIV_INTERN
    1708                 : ibool
    1709                 : os_file_set_size(
    1710                 : /*=============*/
    1711                 :                                 /* out: TRUE if success */
    1712                 :         const char*     name,   /* in: name of the file or path as a
    1713                 :                                 null-terminated string */
    1714                 :         os_file_t       file,   /* in: handle to a file */
    1715                 :         ulint           size,   /* in: least significant 32 bits of file
    1716                 :                                 size */
    1717                 :         ulint           size_high)/* in: most significant 32 bits of size */
    1718               6 : {
    1719                 :         ib_int64_t      current_size;
    1720                 :         ib_int64_t      desired_size;
    1721                 :         ibool           ret;
    1722                 :         byte*           buf;
    1723                 :         byte*           buf2;
    1724                 :         ulint           buf_size;
    1725                 : 
    1726               6 :         ut_a(size == (size & 0xFFFFFFFF));
    1727                 : 
    1728               6 :         current_size = 0;
    1729               6 :         desired_size = (ib_int64_t)size + (((ib_int64_t)size_high) << 32);
    1730                 : 
    1731                 :         /* Write up to 1 megabyte at a time. */
    1732               6 :         buf_size = ut_min(64, (ulint) (desired_size / UNIV_PAGE_SIZE))
    1733                 :                 * UNIV_PAGE_SIZE;
    1734               6 :         buf2 = ut_malloc(buf_size + UNIV_PAGE_SIZE);
    1735                 : 
    1736                 :         /* Align the buffer for possible raw i/o */
    1737               6 :         buf = ut_align(buf2, UNIV_PAGE_SIZE);
    1738                 : 
    1739                 :         /* Write buffer full of zeros */
    1740               6 :         memset(buf, 0, buf_size);
    1741                 : 
    1742               6 :         if (desired_size >= (ib_int64_t)(100 * 1024 * 1024)) {
    1743                 : 
    1744               0 :                 fprintf(stderr, "InnoDB: Progress in MB:");
    1745                 :         }
    1746                 : 
    1747              52 :         while (current_size < desired_size) {
    1748                 :                 ulint   n_bytes;
    1749                 : 
    1750              40 :                 if (desired_size - current_size < (ib_int64_t) buf_size) {
    1751               0 :                         n_bytes = (ulint) (desired_size - current_size);
    1752                 :                 } else {
    1753              40 :                         n_bytes = buf_size;
    1754                 :                 }
    1755                 : 
    1756              40 :                 ret = os_file_write(name, file, buf,
    1757                 :                                     (ulint)(current_size & 0xFFFFFFFF),
    1758                 :                                     (ulint)(current_size >> 32),
    1759                 :                                     n_bytes);
    1760              40 :                 if (!ret) {
    1761               0 :                         ut_free(buf2);
    1762               0 :                         goto error_handling;
    1763                 :                 }
    1764                 : 
    1765                 :                 /* Print about progress for each 100 MB written */
    1766              40 :                 if ((ib_int64_t) (current_size + n_bytes) / (ib_int64_t)(100 * 1024 * 1024)
    1767                 :                     != current_size / (ib_int64_t)(100 * 1024 * 1024)) {
    1768                 : 
    1769               0 :                         fprintf(stderr, " %lu00",
    1770                 :                                 (ulong) ((current_size + n_bytes)
    1771                 :                                          / (ib_int64_t)(100 * 1024 * 1024)));
    1772                 :                 }
    1773                 : 
    1774              40 :                 current_size += n_bytes;
    1775                 :         }
    1776                 : 
    1777               6 :         if (desired_size >= (ib_int64_t)(100 * 1024 * 1024)) {
    1778                 : 
    1779               0 :                 fprintf(stderr, "\n");
    1780                 :         }
    1781                 : 
    1782               6 :         ut_free(buf2);
    1783                 : 
    1784               6 :         ret = os_file_flush(file);
    1785                 : 
    1786               6 :         if (ret) {
    1787               6 :                 return(TRUE);
    1788                 :         }
    1789                 : 
    1790               0 : error_handling:
    1791               0 :         return(FALSE);
    1792                 : }
    1793                 : 
    1794                 : /***************************************************************************
    1795                 : Truncates a file at its current position. */
    1796                 : UNIV_INTERN
    1797                 : ibool
    1798                 : os_file_set_eof(
    1799                 : /*============*/
    1800                 :                                 /* out: TRUE if success */
    1801                 :         FILE*           file)   /* in: file to be truncated */
    1802              21 : {
    1803                 : #ifdef __WIN__
    1804                 :         HANDLE h = (HANDLE) _get_osfhandle(fileno(file));
    1805                 :         return(SetEndOfFile(h));
    1806                 : #else /* __WIN__ */
    1807              21 :         return(!ftruncate(fileno(file), ftell(file)));
    1808                 : #endif /* __WIN__ */
    1809                 : }
    1810                 : 
    1811                 : #ifndef __WIN__
    1812                 : /***************************************************************************
    1813                 : Wrapper to fsync(2) that retries the call on some errors.
    1814                 : Returns the value 0 if successful; otherwise the value -1 is returned and
    1815                 : the global variable errno is set to indicate the error. */
    1816                 : 
    1817                 : static
    1818                 : int
    1819                 : os_file_fsync(
    1820                 : /*==========*/
    1821                 :                                 /* out: 0 if success, -1 otherwise */
    1822                 :         os_file_t       file)   /* in: handle to a file */
    1823           57662 : {
    1824                 :         int     ret;
    1825                 :         int     failures;
    1826                 :         ibool   retry;
    1827                 : 
    1828           57662 :         failures = 0;
    1829                 : 
    1830                 :         do {
    1831           57662 :                 ret = fsync(file);
    1832                 : 
    1833           57662 :                 os_n_fsyncs++;
    1834                 : 
    1835           57662 :                 if (ret == -1 && errno == ENOLCK) {
    1836                 : 
    1837               0 :                         if (failures % 100 == 0) {
    1838                 : 
    1839               0 :                                 ut_print_timestamp(stderr);
    1840               0 :                                 fprintf(stderr,
    1841                 :                                         "  InnoDB: fsync(): "
    1842                 :                                         "No locks available; retrying\n");
    1843                 :                         }
    1844                 : 
    1845               0 :                         os_thread_sleep(200000 /* 0.2 sec */);
    1846                 : 
    1847               0 :                         failures++;
    1848                 : 
    1849               0 :                         retry = TRUE;
    1850                 :                 } else {
    1851                 : 
    1852           57662 :                         retry = FALSE;
    1853                 :                 }
    1854           57662 :         } while (retry);
    1855                 : 
    1856           57662 :         return(ret);
    1857                 : }
    1858                 : #endif /* !__WIN__ */
    1859                 : 
    1860                 : /***************************************************************************
    1861                 : Flushes the write buffers of a given file to the disk. */
    1862                 : UNIV_INTERN
    1863                 : ibool
    1864                 : os_file_flush(
    1865                 : /*==========*/
    1866                 :                                 /* out: TRUE if success */
    1867                 :         os_file_t       file)   /* in, own: handle to a file */
    1868           57662 : {
    1869                 : #ifdef __WIN__
    1870                 :         BOOL    ret;
    1871                 : 
    1872                 :         ut_a(file);
    1873                 : 
    1874                 :         os_n_fsyncs++;
    1875                 : 
    1876                 :         ret = FlushFileBuffers(file);
    1877                 : 
    1878                 :         if (ret) {
    1879                 :                 return(TRUE);
    1880                 :         }
    1881                 : 
    1882                 :         /* Since Windows returns ERROR_INVALID_FUNCTION if the 'file' is
    1883                 :         actually a raw device, we choose to ignore that error if we are using
    1884                 :         raw disks */
    1885                 : 
    1886                 :         if (srv_start_raw_disk_in_use && GetLastError()
    1887                 :             == ERROR_INVALID_FUNCTION) {
    1888                 :                 return(TRUE);
    1889                 :         }
    1890                 : 
    1891                 :         os_file_handle_error(NULL, "flush");
    1892                 : 
    1893                 :         /* It is a fatal error if a file flush does not succeed, because then
    1894                 :         the database can get corrupt on disk */
    1895                 :         ut_error;
    1896                 : 
    1897                 :         return(FALSE);
    1898                 : #else
    1899                 :         int     ret;
    1900                 : 
    1901                 : #if defined(HAVE_DARWIN_THREADS)
    1902                 : # ifndef F_FULLFSYNC
    1903                 :         /* The following definition is from the Mac OS X 10.3 <sys/fcntl.h> */
    1904                 : #  define F_FULLFSYNC 51 /* fsync + ask the drive to flush to the media */
    1905                 : # elif F_FULLFSYNC != 51
    1906                 : #  error "F_FULLFSYNC != 51: ABI incompatibility with Mac OS X 10.3"
    1907                 : # endif
    1908                 :         /* Apple has disabled fsync() for internal disk drives in OS X. That
    1909                 :         caused corruption for a user when he tested a power outage. Let us in
    1910                 :         OS X use a nonstandard flush method recommended by an Apple
    1911                 :         engineer. */
    1912                 : 
    1913                 :         if (!srv_have_fullfsync) {
    1914                 :                 /* If we are not on an operating system that supports this,
    1915                 :                 then fall back to a plain fsync. */
    1916                 : 
    1917                 :                 ret = os_file_fsync(file);
    1918                 :         } else {
    1919                 :                 ret = fcntl(file, F_FULLFSYNC, NULL);
    1920                 : 
    1921                 :                 if (ret) {
    1922                 :                         /* If we are not on a file system that supports this,
    1923                 :                         then fall back to a plain fsync. */
    1924                 :                         ret = os_file_fsync(file);
    1925                 :                 }
    1926                 :         }
    1927                 : #else
    1928           57662 :         ret = os_file_fsync(file);
    1929                 : #endif
    1930                 : 
    1931           57662 :         if (ret == 0) {
    1932           57662 :                 return(TRUE);
    1933                 :         }
    1934                 : 
    1935                 :         /* Since Linux returns EINVAL if the 'file' is actually a raw device,
    1936                 :         we choose to ignore that error if we are using raw disks */
    1937                 : 
    1938               0 :         if (srv_start_raw_disk_in_use && errno == EINVAL) {
    1939                 : 
    1940               0 :                 return(TRUE);
    1941                 :         }
    1942                 : 
    1943               0 :         ut_print_timestamp(stderr);
    1944                 : 
    1945               0 :         fprintf(stderr,
    1946                 :                 "  InnoDB: Error: the OS said file flush did not succeed\n");
    1947                 : 
    1948               0 :         os_file_handle_error(NULL, "flush");
    1949                 : 
    1950                 :         /* It is a fatal error if a file flush does not succeed, because then
    1951                 :         the database can get corrupt on disk */
    1952               0 :         ut_error;
    1953                 : 
    1954                 :         return(FALSE);
    1955                 : #endif
    1956                 : }
    1957                 : 
    1958                 : #ifndef __WIN__
    1959                 : /***********************************************************************
    1960                 : Does a synchronous read operation in Posix. */
    1961                 : static
    1962                 : ssize_t
    1963                 : os_file_pread(
    1964                 : /*==========*/
    1965                 :                                 /* out: number of bytes read, -1 if error */
    1966                 :         os_file_t       file,   /* in: handle to a file */
    1967                 :         void*           buf,    /* in: buffer where to read */
    1968                 :         ulint           n,      /* in: number of bytes to read */
    1969                 :         ulint           offset, /* in: least significant 32 bits of file
    1970                 :                                 offset from where to read */
    1971                 :         ulint           offset_high) /* in: most significant 32 bits of
    1972                 :                                 offset */
    1973             726 : {
    1974                 :         off_t   offs;
    1975                 :         ssize_t n_bytes;
    1976                 : 
    1977             726 :         ut_a((offset & 0xFFFFFFFFUL) == offset);
    1978                 : 
    1979                 :         /* If off_t is > 4 bytes in size, then we assume we can pass a
    1980                 :         64-bit address */
    1981                 : 
    1982                 :         if (sizeof(off_t) > 4) {
    1983             726 :                 offs = (off_t)offset + (((off_t)offset_high) << 32);
    1984                 : 
    1985                 :         } else {
    1986                 :                 offs = (off_t)offset;
    1987                 : 
    1988                 :                 if (offset_high > 0) {
    1989                 :                         fprintf(stderr,
    1990                 :                                 "InnoDB: Error: file read at offset > 4 GB\n");
    1991                 :                 }
    1992                 :         }
    1993                 : 
    1994             726 :         os_n_file_reads++;
    1995                 : 
    1996                 : #if defined(HAVE_PREAD) && !defined(HAVE_BROKEN_PREAD)
    1997             726 :         os_mutex_enter(os_file_count_mutex);
    1998             726 :         os_file_n_pending_preads++;
    1999             726 :         os_n_pending_reads++;
    2000             726 :         os_mutex_exit(os_file_count_mutex);
    2001                 : 
    2002             726 :         n_bytes = pread(file, buf, (ssize_t)n, offs);
    2003                 : 
    2004             726 :         os_mutex_enter(os_file_count_mutex);
    2005             726 :         os_file_n_pending_preads--;
    2006             726 :         os_n_pending_reads--;
    2007             726 :         os_mutex_exit(os_file_count_mutex);
    2008                 : 
    2009             726 :         return(n_bytes);
    2010                 : #else
    2011                 :         {
    2012                 :                 off_t   ret_offset;
    2013                 :                 ssize_t ret;
    2014                 :                 ulint   i;
    2015                 : 
    2016                 :                 os_mutex_enter(os_file_count_mutex);
    2017                 :                 os_n_pending_reads++;
    2018                 :                 os_mutex_exit(os_file_count_mutex);
    2019                 : 
    2020                 :                 /* Protect the seek / read operation with a mutex */
    2021                 :                 i = ((ulint) file) % OS_FILE_N_SEEK_MUTEXES;
    2022                 : 
    2023                 :                 os_mutex_enter(os_file_seek_mutexes[i]);
    2024                 : 
    2025                 :                 ret_offset = lseek(file, offs, SEEK_SET);
    2026                 : 
    2027                 :                 if (ret_offset < 0) {
    2028                 :                         ret = -1;
    2029                 :                 } else {
    2030                 :                         ret = read(file, buf, (ssize_t)n);
    2031                 :                 }
    2032                 : 
    2033                 :                 os_mutex_exit(os_file_seek_mutexes[i]);
    2034                 : 
    2035                 :                 os_mutex_enter(os_file_count_mutex);
    2036                 :                 os_n_pending_reads--;
    2037                 :                 os_mutex_exit(os_file_count_mutex);
    2038                 : 
    2039                 :                 return(ret);
    2040                 :         }
    2041                 : #endif
    2042                 : }
    2043                 : 
    2044                 : /***********************************************************************
    2045                 : Does a synchronous write operation in Posix. */
    2046                 : static
    2047                 : ssize_t
    2048                 : os_file_pwrite(
    2049                 : /*===========*/
    2050                 :                                 /* out: number of bytes written, -1 if error */
    2051                 :         os_file_t       file,   /* in: handle to a file */
    2052                 :         const void*     buf,    /* in: buffer from where to write */
    2053                 :         ulint           n,      /* in: number of bytes to write */
    2054                 :         ulint           offset, /* in: least significant 32 bits of file
    2055                 :                                 offset where to write */
    2056                 :         ulint           offset_high) /* in: most significant 32 bits of
    2057                 :                                 offset */
    2058           58284 : {
    2059                 :         ssize_t ret;
    2060                 :         off_t   offs;
    2061                 : 
    2062           58284 :         ut_a((offset & 0xFFFFFFFFUL) == offset);
    2063                 : 
    2064                 :         /* If off_t is > 4 bytes in size, then we assume we can pass a
    2065                 :         64-bit address */
    2066                 : 
    2067                 :         if (sizeof(off_t) > 4) {
    2068           58284 :                 offs = (off_t)offset + (((off_t)offset_high) << 32);
    2069                 :         } else {
    2070                 :                 offs = (off_t)offset;
    2071                 : 
    2072                 :                 if (offset_high > 0) {
    2073                 :                         fprintf(stderr,
    2074                 :                                 "InnoDB: Error: file write"
    2075                 :                                 " at offset > 4 GB\n");
    2076                 :                 }
    2077                 :         }
    2078                 : 
    2079           58284 :         os_n_file_writes++;
    2080                 : 
    2081                 : #if defined(HAVE_PWRITE) && !defined(HAVE_BROKEN_PREAD)
    2082           58284 :         os_mutex_enter(os_file_count_mutex);
    2083           58284 :         os_file_n_pending_pwrites++;
    2084           58284 :         os_n_pending_writes++;
    2085           58284 :         os_mutex_exit(os_file_count_mutex);
    2086                 : 
    2087           58284 :         ret = pwrite(file, buf, (ssize_t)n, offs);
    2088                 : 
    2089           58284 :         os_mutex_enter(os_file_count_mutex);
    2090           58284 :         os_file_n_pending_pwrites--;
    2091           58284 :         os_n_pending_writes--;
    2092           58284 :         os_mutex_exit(os_file_count_mutex);
    2093                 : 
    2094                 : # ifdef UNIV_DO_FLUSH
    2095                 :         if (srv_unix_file_flush_method != SRV_UNIX_LITTLESYNC
    2096                 :             && srv_unix_file_flush_method != SRV_UNIX_NOSYNC
    2097                 :             && !os_do_not_call_flush_at_each_write) {
    2098                 : 
    2099                 :                 /* Always do fsync to reduce the probability that when
    2100                 :                 the OS crashes, a database page is only partially
    2101                 :                 physically written to disk. */
    2102                 : 
    2103                 :                 ut_a(TRUE == os_file_flush(file));
    2104                 :         }
    2105                 : # endif /* UNIV_DO_FLUSH */
    2106                 : 
    2107           58284 :         return(ret);
    2108                 : #else
    2109                 :         {
    2110                 :                 off_t   ret_offset;
    2111                 :                 ulint   i;
    2112                 : 
    2113                 :                 os_mutex_enter(os_file_count_mutex);
    2114                 :                 os_n_pending_writes++;
    2115                 :                 os_mutex_exit(os_file_count_mutex);
    2116                 : 
    2117                 :                 /* Protect the seek / write operation with a mutex */
    2118                 :                 i = ((ulint) file) % OS_FILE_N_SEEK_MUTEXES;
    2119                 : 
    2120                 :                 os_mutex_enter(os_file_seek_mutexes[i]);
    2121                 : 
    2122                 :                 ret_offset = lseek(file, offs, SEEK_SET);
    2123                 : 
    2124                 :                 if (ret_offset < 0) {
    2125                 :                         ret = -1;
    2126                 : 
    2127                 :                         goto func_exit;
    2128                 :                 }
    2129                 : 
    2130                 :                 ret = write(file, buf, (ssize_t)n);
    2131                 : 
    2132                 : # ifdef UNIV_DO_FLUSH
    2133                 :                 if (srv_unix_file_flush_method != SRV_UNIX_LITTLESYNC
    2134                 :                     && srv_unix_file_flush_method != SRV_UNIX_NOSYNC
    2135                 :                     && !os_do_not_call_flush_at_each_write) {
    2136                 : 
    2137                 :                         /* Always do fsync to reduce the probability that when
    2138                 :                         the OS crashes, a database page is only partially
    2139                 :                         physically written to disk. */
    2140                 : 
    2141                 :                         ut_a(TRUE == os_file_flush(file));
    2142                 :                 }
    2143                 : # endif /* UNIV_DO_FLUSH */
    2144                 : 
    2145                 : func_exit:
    2146                 :                 os_mutex_exit(os_file_seek_mutexes[i]);
    2147                 : 
    2148                 :                 os_mutex_enter(os_file_count_mutex);
    2149                 :                 os_n_pending_writes--;
    2150                 :                 os_mutex_exit(os_file_count_mutex);
    2151                 : 
    2152                 :                 return(ret);
    2153                 :         }
    2154                 : #endif
    2155                 : }
    2156                 : #endif
    2157                 : 
    2158                 : /***********************************************************************
    2159                 : Requests a synchronous positioned read operation. */
    2160                 : UNIV_INTERN
    2161                 : ibool
    2162                 : os_file_read(
    2163                 : /*=========*/
    2164                 :                                 /* out: TRUE if request was
    2165                 :                                 successful, FALSE if fail */
    2166                 :         os_file_t       file,   /* in: handle to a file */
    2167                 :         void*           buf,    /* in: buffer where to read */
    2168                 :         ulint           offset, /* in: least significant 32 bits of file
    2169                 :                                 offset where to read */
    2170                 :         ulint           offset_high, /* in: most significant 32 bits of
    2171                 :                                 offset */
    2172                 :         ulint           n)      /* in: number of bytes to read */
    2173             726 : {
    2174                 : #ifdef __WIN__
    2175                 :         BOOL            ret;
    2176                 :         DWORD           len;
    2177                 :         DWORD           ret2;
    2178                 :         DWORD           low;
    2179                 :         DWORD           high;
    2180                 :         ibool           retry;
    2181                 :         ulint           i;
    2182                 : 
    2183                 :         ut_a((offset & 0xFFFFFFFFUL) == offset);
    2184                 : 
    2185                 :         os_n_file_reads++;
    2186                 :         os_bytes_read_since_printout += n;
    2187                 : 
    2188                 : try_again:
    2189                 :         ut_ad(file);
    2190                 :         ut_ad(buf);
    2191                 :         ut_ad(n > 0);
    2192                 : 
    2193                 :         low = (DWORD) offset;
    2194                 :         high = (DWORD) offset_high;
    2195                 : 
    2196                 :         os_mutex_enter(os_file_count_mutex);
    2197                 :         os_n_pending_reads++;
    2198                 :         os_mutex_exit(os_file_count_mutex);
    2199                 : 
    2200                 :         /* Protect the seek / read operation with a mutex */
    2201                 :         i = ((ulint) file) % OS_FILE_N_SEEK_MUTEXES;
    2202                 : 
    2203                 :         os_mutex_enter(os_file_seek_mutexes[i]);
    2204                 : 
    2205                 :         ret2 = SetFilePointer(file, low, &high, FILE_BEGIN);
    2206                 : 
    2207                 :         if (ret2 == 0xFFFFFFFF && GetLastError() != NO_ERROR) {
    2208                 : 
    2209                 :                 os_mutex_exit(os_file_seek_mutexes[i]);
    2210                 : 
    2211                 :                 os_mutex_enter(os_file_count_mutex);
    2212                 :                 os_n_pending_reads--;
    2213                 :                 os_mutex_exit(os_file_count_mutex);
    2214                 : 
    2215                 :                 goto error_handling;
    2216                 :         }
    2217                 : 
    2218                 :         ret = ReadFile(file, buf, (DWORD) n, &len, NULL);
    2219                 : 
    2220                 :         os_mutex_exit(os_file_seek_mutexes[i]);
    2221                 : 
    2222                 :         os_mutex_enter(os_file_count_mutex);
    2223                 :         os_n_pending_reads--;
    2224                 :         os_mutex_exit(os_file_count_mutex);
    2225                 : 
    2226                 :         if (ret && len == n) {
    2227                 :                 return(TRUE);
    2228                 :         }
    2229                 : #else
    2230                 :         ibool   retry;
    2231                 :         ssize_t ret;
    2232                 : 
    2233             726 :         os_bytes_read_since_printout += n;
    2234                 : 
    2235             726 : try_again:
    2236             726 :         ret = os_file_pread(file, buf, n, offset, offset_high);
    2237                 : 
    2238             726 :         if ((ulint)ret == n) {
    2239                 : 
    2240             726 :                 return(TRUE);
    2241                 :         }
    2242                 : 
    2243               0 :         fprintf(stderr,
    2244                 :                 "InnoDB: Error: tried to read %lu bytes at offset %lu %lu.\n"
    2245                 :                 "InnoDB: Was only able to read %ld.\n",
    2246                 :                 (ulong)n, (ulong)offset_high,
    2247                 :                 (ulong)offset, (long)ret);
    2248                 : #endif
    2249                 : #ifdef __WIN__
    2250                 : error_handling:
    2251                 : #endif
    2252               0 :         retry = os_file_handle_error(NULL, "read");
    2253                 : 
    2254               0 :         if (retry) {
    2255               0 :                 goto try_again;
    2256                 :         }
    2257                 : 
    2258               0 :         fprintf(stderr,
    2259                 :                 "InnoDB: Fatal error: cannot read from file."
    2260                 :                 " OS error number %lu.\n",
    2261                 : #ifdef __WIN__
    2262                 :                 (ulong) GetLastError()
    2263                 : #else
    2264                 :                 (ulong) errno
    2265                 : #endif
    2266                 :                 );
    2267               0 :         fflush(stderr);
    2268                 : 
    2269               0 :         ut_error;
    2270                 : 
    2271                 :         return(FALSE);
    2272                 : }
    2273                 : 
    2274                 : /***********************************************************************
    2275                 : Requests a synchronous positioned read operation. This function does not do
    2276                 : any error handling. In case of error it returns FALSE. */
    2277                 : UNIV_INTERN
    2278                 : ibool
    2279                 : os_file_read_no_error_handling(
    2280                 : /*===========================*/
    2281                 :                                 /* out: TRUE if request was
    2282                 :                                 successful, FALSE if fail */
    2283                 :         os_file_t       file,   /* in: handle to a file */
    2284                 :         void*           buf,    /* in: buffer where to read */
    2285                 :         ulint           offset, /* in: least significant 32 bits of file
    2286                 :                                 offset where to read */
    2287                 :         ulint           offset_high, /* in: most significant 32 bits of
    2288                 :                                 offset */
    2289                 :         ulint           n)      /* in: number of bytes to read */
    2290               0 : {
    2291                 : #ifdef __WIN__
    2292                 :         BOOL            ret;
    2293                 :         DWORD           len;
    2294                 :         DWORD           ret2;
    2295                 :         DWORD           low;
    2296                 :         DWORD           high;
    2297                 :         ibool           retry;
    2298                 :         ulint           i;
    2299                 : 
    2300                 :         ut_a((offset & 0xFFFFFFFFUL) == offset);
    2301                 : 
    2302                 :         os_n_file_reads++;
    2303                 :         os_bytes_read_since_printout += n;
    2304                 : 
    2305                 : try_again:
    2306                 :         ut_ad(file);
    2307                 :         ut_ad(buf);
    2308                 :         ut_ad(n > 0);
    2309                 : 
    2310                 :         low = (DWORD) offset;
    2311                 :         high = (DWORD) offset_high;
    2312                 : 
    2313                 :         os_mutex_enter(os_file_count_mutex);
    2314                 :         os_n_pending_reads++;
    2315                 :         os_mutex_exit(os_file_count_mutex);
    2316                 : 
    2317                 :         /* Protect the seek / read operation with a mutex */
    2318                 :         i = ((ulint) file) % OS_FILE_N_SEEK_MUTEXES;
    2319                 : 
    2320                 :         os_mutex_enter(os_file_seek_mutexes[i]);
    2321                 : 
    2322                 :         ret2 = SetFilePointer(file, low, &high, FILE_BEGIN);
    2323                 : 
    2324                 :         if (ret2 == 0xFFFFFFFF && GetLastError() != NO_ERROR) {
    2325                 : 
    2326                 :                 os_mutex_exit(os_file_seek_mutexes[i]);
    2327                 : 
    2328                 :                 os_mutex_enter(os_file_count_mutex);
    2329                 :                 os_n_pending_reads--;
    2330                 :                 os_mutex_exit(os_file_count_mutex);
    2331                 : 
    2332                 :                 goto error_handling;
    2333                 :         }
    2334                 : 
    2335                 :         ret = ReadFile(file, buf, (DWORD) n, &len, NULL);
    2336                 : 
    2337                 :         os_mutex_exit(os_file_seek_mutexes[i]);
    2338                 : 
    2339                 :         os_mutex_enter(os_file_count_mutex);
    2340                 :         os_n_pending_reads--;
    2341                 :         os_mutex_exit(os_file_count_mutex);
    2342                 : 
    2343                 :         if (ret && len == n) {
    2344                 :                 return(TRUE);
    2345                 :         }
    2346                 : #else
    2347                 :         ibool   retry;
    2348                 :         ssize_t ret;
    2349                 : 
    2350               0 :         os_bytes_read_since_printout += n;
    2351                 : 
    2352               0 : try_again:
    2353               0 :         ret = os_file_pread(file, buf, n, offset, offset_high);
    2354                 : 
    2355               0 :         if ((ulint)ret == n) {
    2356                 : 
    2357               0 :                 return(TRUE);
    2358                 :         }
    2359                 : #endif
    2360                 : #ifdef __WIN__
    2361                 : error_handling:
    2362                 : #endif
    2363               0 :         retry = os_file_handle_error_no_exit(NULL, "read");
    2364                 : 
    2365               0 :         if (retry) {
    2366               0 :                 goto try_again;
    2367                 :         }
    2368                 : 
    2369               0 :         return(FALSE);
    2370                 : }
    2371                 : 
    2372                 : /***********************************************************************
    2373                 : Rewind file to its start, read at most size - 1 bytes from it to str, and
    2374                 : NUL-terminate str. All errors are silently ignored. This function is
    2375                 : mostly meant to be used with temporary files. */
    2376                 : UNIV_INTERN
    2377                 : void
    2378                 : os_file_read_string(
    2379                 : /*================*/
    2380                 :         FILE*   file,   /* in: file to read from */
    2381                 :         char*   str,    /* in: buffer where to read */
    2382                 :         ulint   size)   /* in: size of buffer */
    2383              21 : {
    2384                 :         size_t  flen;
    2385                 : 
    2386              21 :         if (size == 0) {
    2387               0 :                 return;
    2388                 :         }
    2389                 : 
    2390              21 :         rewind(file);
    2391              21 :         flen = fread(str, 1, size - 1, file);
    2392              21 :         str[flen] = '\0';
    2393                 : }
    2394                 : 
    2395                 : /***********************************************************************
    2396                 : Requests a synchronous write operation. */
    2397                 : UNIV_INTERN
    2398                 : ibool
    2399                 : os_file_write(
    2400                 : /*==========*/
    2401                 :                                 /* out: TRUE if request was
    2402                 :                                 successful, FALSE if fail */
    2403                 :         const char*     name,   /* in: name of the file or path as a
    2404                 :                                 null-terminated string */
    2405                 :         os_file_t       file,   /* in: handle to a file */
    2406                 :         const void*     buf,    /* in: buffer from which to write */
    2407                 :         ulint           offset, /* in: least significant 32 bits of file
    2408                 :                                 offset where to write */
    2409                 :         ulint           offset_high, /* in: most significant 32 bits of
    2410                 :                                 offset */
    2411                 :         ulint           n)      /* in: number of bytes to write */
    2412           58284 : {
    2413                 : #ifdef __WIN__
    2414                 :         BOOL            ret;
    2415                 :         DWORD           len;
    2416                 :         DWORD           ret2;
    2417                 :         DWORD           low;
    2418                 :         DWORD           high;
    2419                 :         ulint           i;
    2420                 :         ulint           n_retries       = 0;
    2421                 :         ulint           err;
    2422                 : 
    2423                 :         ut_a((offset & 0xFFFFFFFF) == offset);
    2424                 : 
    2425                 :         os_n_file_writes++;
    2426                 : 
    2427                 :         ut_ad(file);
    2428                 :         ut_ad(buf);
    2429                 :         ut_ad(n > 0);
    2430                 : retry:
    2431                 :         low = (DWORD) offset;
    2432                 :         high = (DWORD) offset_high;
    2433                 : 
    2434                 :         os_mutex_enter(os_file_count_mutex);
    2435                 :         os_n_pending_writes++;
    2436                 :         os_mutex_exit(os_file_count_mutex);
    2437                 : 
    2438                 :         /* Protect the seek / write operation with a mutex */
    2439                 :         i = ((ulint) file) % OS_FILE_N_SEEK_MUTEXES;
    2440                 : 
    2441                 :         os_mutex_enter(os_file_seek_mutexes[i]);
    2442                 : 
    2443                 :         ret2 = SetFilePointer(file, low, &high, FILE_BEGIN);
    2444                 : 
    2445                 :         if (ret2 == 0xFFFFFFFF && GetLastError() != NO_ERROR) {
    2446                 : 
    2447                 :                 os_mutex_exit(os_file_seek_mutexes[i]);
    2448                 : 
    2449                 :                 os_mutex_enter(os_file_count_mutex);
    2450                 :                 os_n_pending_writes--;
    2451                 :                 os_mutex_exit(os_file_count_mutex);
    2452                 : 
    2453                 :                 ut_print_timestamp(stderr);
    2454                 : 
    2455                 :                 fprintf(stderr,
    2456                 :                         "  InnoDB: Error: File pointer positioning to"
    2457                 :                         " file %s failed at\n"
    2458                 :                         "InnoDB: offset %lu %lu. Operating system"
    2459                 :                         " error number %lu.\n"
    2460                 :                         "InnoDB: Some operating system error numbers"
    2461                 :                         " are described at\n"
    2462                 :                         "InnoDB: "
    2463                 :                         "http://dev.mysql.com/doc/refman/5.1/en/"
    2464                 :                         "operating-system-error-codes.html\n",
    2465                 :                         name, (ulong) offset_high, (ulong) offset,
    2466                 :                         (ulong) GetLastError());
    2467                 : 
    2468                 :                 return(FALSE);
    2469                 :         }
    2470                 : 
    2471                 :         ret = WriteFile(file, buf, (DWORD) n, &len, NULL);
    2472                 : 
    2473                 :         /* Always do fsync to reduce the probability that when the OS crashes,
    2474                 :         a database page is only partially physically written to disk. */
    2475                 : 
    2476                 : # ifdef UNIV_DO_FLUSH
    2477                 :         if (!os_do_not_call_flush_at_each_write) {
    2478                 :                 ut_a(TRUE == os_file_flush(file));
    2479                 :         }
    2480                 : # endif /* UNIV_DO_FLUSH */
    2481                 : 
    2482                 :         os_mutex_exit(os_file_seek_mutexes[i]);
    2483                 : 
    2484                 :         os_mutex_enter(os_file_count_mutex);
    2485                 :         os_n_pending_writes--;
    2486                 :         os_mutex_exit(os_file_count_mutex);
    2487                 : 
    2488                 :         if (ret && len == n) {
    2489                 : 
    2490                 :                 return(TRUE);
    2491                 :         }
    2492                 : 
    2493                 :         /* If some background file system backup tool is running, then, at
    2494                 :         least in Windows 2000, we may get here a specific error. Let us
    2495                 :         retry the operation 100 times, with 1 second waits. */
    2496                 : 
    2497                 :         if (GetLastError() == ERROR_LOCK_VIOLATION && n_retries < 100) {
    2498                 : 
    2499                 :                 os_thread_sleep(1000000);
    2500                 : 
    2501                 :                 n_retries++;
    2502                 : 
    2503                 :                 goto retry;
    2504                 :         }
    2505                 : 
    2506                 :         if (!os_has_said_disk_full) {
    2507                 : 
    2508                 :                 err = (ulint)GetLastError();
    2509                 : 
    2510                 :                 ut_print_timestamp(stderr);
    2511                 : 
    2512                 :                 fprintf(stderr,
    2513                 :                         "  InnoDB: Error: Write to file %s failed"
    2514                 :                         " at offset %lu %lu.\n"
    2515                 :                         "InnoDB: %lu bytes should have been written,"
    2516                 :                         " only %lu were written.\n"
    2517                 :                         "InnoDB: Operating system error number %lu.\n"
    2518                 :                         "InnoDB: Check that your OS and file system"
    2519                 :                         " support files of this size.\n"
    2520                 :                         "InnoDB: Check also that the disk is not full"
    2521                 :                         " or a disk quota exceeded.\n",
    2522                 :                         name, (ulong) offset_high, (ulong) offset,
    2523                 :                         (ulong) n, (ulong) len, (ulong) err);
    2524                 : 
    2525                 :                 if (strerror((int)err) != NULL) {
    2526                 :                         fprintf(stderr,
    2527                 :                                 "InnoDB: Error number %lu means '%s'.\n",
    2528                 :                                 (ulong) err, strerror((int)err));
    2529                 :                 }
    2530                 : 
    2531                 :                 fprintf(stderr,
    2532                 :                         "InnoDB: Some operating system error numbers"
    2533                 :                         " are described at\n"
    2534                 :                         "InnoDB: "
    2535                 :                         "http://dev.mysql.com/doc/refman/5.1/en/"
    2536                 :                         "operating-system-error-codes.html\n");
    2537                 : 
    2538                 :                 os_has_said_disk_full = TRUE;
    2539                 :         }
    2540                 : 
    2541                 :         return(FALSE);
    2542                 : #else
    2543                 :         ssize_t ret;
    2544                 : 
    2545           58284 :         ret = os_file_pwrite(file, buf, n, offset, offset_high);
    2546                 : 
    2547           58284 :         if ((ulint)ret == n) {
    2548                 : 
    2549           58284 :                 return(TRUE);
    2550                 :         }
    2551                 : 
    2552               0 :         if (!os_has_said_disk_full) {
    2553                 : 
    2554               0 :                 ut_print_timestamp(stderr);
    2555                 : 
    2556               0 :                 fprintf(stderr,
    2557                 :                         "  InnoDB: Error: Write to file %s failed"
    2558                 :                         " at offset %lu %lu.\n"
    2559                 :                         "InnoDB: %lu bytes should have been written,"
    2560                 :                         " only %ld were written.\n"
    2561                 :                         "InnoDB: Operating system error number %lu.\n"
    2562                 :                         "InnoDB: Check that your OS and file system"
    2563                 :                         " support files of this size.\n"
    2564                 :                         "InnoDB: Check also that the disk is not full"
    2565                 :                         " or a disk quota exceeded.\n",
    2566                 :                         name, offset_high, offset, n, (long int)ret,
    2567                 :                         (ulint)errno);
    2568               0 :                 if (strerror(errno) != NULL) {
    2569               0 :                         fprintf(stderr,
    2570                 :                                 "InnoDB: Error number %lu means '%s'.\n",
    2571                 :                                 (ulint)errno, strerror(errno));
    2572                 :                 }
    2573                 : 
    2574               0 :                 fprintf(stderr,
    2575                 :                         "InnoDB: Some operating system error numbers"
    2576                 :                         " are described at\n"
    2577                 :                         "InnoDB: "
    2578                 :                         "http://dev.mysql.com/doc/refman/5.1/en/"
    2579                 :                         "operating-system-error-codes.html\n");
    2580                 : 
    2581               0 :                 os_has_said_disk_full = TRUE;
    2582                 :         }
    2583                 : 
    2584               0 :         return(FALSE);
    2585                 : #endif
    2586                 : }
    2587                 : 
    2588                 : /***********************************************************************
    2589                 : Check the existence and type of the given file. */
    2590                 : UNIV_INTERN
    2591                 : ibool
    2592                 : os_file_status(
    2593                 : /*===========*/
    2594                 :                                 /* out: TRUE if call succeeded */
    2595                 :         const char*     path,   /* in:  pathname of the file */
    2596                 :         ibool*          exists, /* out: TRUE if file exists */
    2597                 :         os_file_type_t* type)   /* out: type of the file (if it exists) */
    2598               0 : {
    2599                 : #ifdef __WIN__
    2600                 :         int             ret;
    2601                 :         struct _stat    statinfo;
    2602                 : 
    2603                 :         ret = _stat(path, &statinfo);
    2604                 :         if (ret && (errno == ENOENT || errno == ENOTDIR)) {
    2605                 :                 /* file does not exist */
    2606                 :                 *exists = FALSE;
    2607                 :                 return(TRUE);
    2608                 :         } else if (ret) {
    2609                 :                 /* file exists, but stat call failed */
    2610                 : 
    2611                 :                 os_file_handle_error_no_exit(path, "stat");
    2612                 : 
    2613                 :                 return(FALSE);
    2614                 :         }
    2615                 : 
    2616                 :         if (_S_IFDIR & statinfo.st_mode) {
    2617                 :                 *type = OS_FILE_TYPE_DIR;
    2618                 :         } else if (_S_IFREG & statinfo.st_mode) {
    2619                 :                 *type = OS_FILE_TYPE_FILE;
    2620                 :         } else {
    2621                 :                 *type = OS_FILE_TYPE_UNKNOWN;
    2622                 :         }
    2623                 : 
    2624                 :         *exists = TRUE;
    2625                 : 
    2626                 :         return(TRUE);
    2627                 : #else
    2628                 :         int             ret;
    2629                 :         struct stat     statinfo;
    2630                 : 
    2631               0 :         ret = stat(path, &statinfo);
    2632               0 :         if (ret && (errno == ENOENT || errno == ENOTDIR)) {
    2633                 :                 /* file does not exist */
    2634               0 :                 *exists = FALSE;
    2635               0 :                 return(TRUE);
    2636               0 :         } else if (ret) {
    2637                 :                 /* file exists, but stat call failed */
    2638                 : 
    2639               0 :                 os_file_handle_error_no_exit(path, "stat");
    2640                 : 
    2641               0 :                 return(FALSE);
    2642                 :         }
    2643                 : 
    2644               0 :         if (S_ISDIR(statinfo.st_mode)) {
    2645               0 :                 *type = OS_FILE_TYPE_DIR;
    2646               0 :         } else if (S_ISLNK(statinfo.st_mode)) {
    2647               0 :                 *type = OS_FILE_TYPE_LINK;
    2648               0 :         } else if (S_ISREG(statinfo.st_mode)) {
    2649               0 :                 *type = OS_FILE_TYPE_FILE;
    2650                 :         } else {
    2651               0 :                 *type = OS_FILE_TYPE_UNKNOWN;
    2652                 :         }
    2653                 : 
    2654               0 :         *exists = TRUE;
    2655                 : 
    2656               0 :         return(TRUE);
    2657                 : #endif
    2658                 : }
    2659                 : 
    2660                 : /***********************************************************************
    2661                 : This function returns information about the specified file */
    2662                 : UNIV_INTERN
    2663                 : ibool
    2664                 : os_file_get_status(
    2665                 : /*===============*/
    2666                 :                                         /* out: TRUE if stat
    2667                 :                                         information found */
    2668                 :         const char*     path,           /* in:  pathname of the file */
    2669                 :         os_file_stat_t* stat_info)      /* information of a file in a
    2670                 :                                         directory */
    2671             107 : {
    2672                 : #ifdef __WIN__
    2673                 :         int             ret;
    2674                 :         struct _stat    statinfo;
    2675                 : 
    2676                 :         ret = _stat(path, &statinfo);
    2677                 :         if (ret && (errno == ENOENT || errno == ENOTDIR)) {
    2678                 :                 /* file does not exist */
    2679                 : 
    2680                 :                 return(FALSE);
    2681                 :         } else if (ret) {
    2682                 :                 /* file exists, but stat call failed */
    2683                 : 
    2684                 :                 os_file_handle_error_no_exit(path, "stat");
    2685                 : 
    2686                 :                 return(FALSE);
    2687                 :         }
    2688                 :         if (_S_IFDIR & statinfo.st_mode) {
    2689                 :                 stat_info->type = OS_FILE_TYPE_DIR;
    2690                 :         } else if (_S_IFREG & statinfo.st_mode) {
    2691                 :                 stat_info->type = OS_FILE_TYPE_FILE;
    2692                 :         } else {
    2693                 :                 stat_info->type = OS_FILE_TYPE_UNKNOWN;
    2694                 :         }
    2695                 : 
    2696                 :         stat_info->ctime = statinfo.st_ctime;
    2697                 :         stat_info->atime = statinfo.st_atime;
    2698                 :         stat_info->mtime = statinfo.st_mtime;
    2699                 :         stat_info->size       = statinfo.st_size;
    2700                 : 
    2701                 :         return(TRUE);
    2702                 : #else
    2703                 :         int             ret;
    2704                 :         struct stat     statinfo;
    2705                 : 
    2706             107 :         ret = stat(path, &statinfo);
    2707                 : 
    2708             107 :         if (ret && (errno == ENOENT || errno == ENOTDIR)) {
    2709                 :                 /* file does not exist */
    2710                 : 
    2711               3 :                 return(FALSE);
    2712             104 :         } else if (ret) {
    2713                 :                 /* file exists, but stat call failed */
    2714                 : 
    2715               0 :                 os_file_handle_error_no_exit(path, "stat");
    2716                 : 
    2717               0 :                 return(FALSE);
    2718                 :         }
    2719                 : 
    2720             104 :         if (S_ISDIR(statinfo.st_mode)) {
    2721               0 :                 stat_info->type = OS_FILE_TYPE_DIR;
    2722             104 :         } else if (S_ISLNK(statinfo.st_mode)) {
    2723               0 :                 stat_info->type = OS_FILE_TYPE_LINK;
    2724             104 :         } else if (S_ISREG(statinfo.st_mode)) {
    2725             104 :                 stat_info->type = OS_FILE_TYPE_FILE;
    2726                 :         } else {
    2727               0 :                 stat_info->type = OS_FILE_TYPE_UNKNOWN;
    2728                 :         }
    2729                 : 
    2730             104 :         stat_info->ctime = statinfo.st_ctime;
    2731             104 :         stat_info->atime = statinfo.st_atime;
    2732             104 :         stat_info->mtime = statinfo.st_mtime;
    2733             104 :         stat_info->size       = statinfo.st_size;
    2734                 : 
    2735             104 :         return(TRUE);
    2736                 : #endif
    2737                 : }
    2738                 : 
    2739                 : /* path name separator character */
    2740                 : #ifdef __WIN__
    2741                 : #  define OS_FILE_PATH_SEPARATOR        '\\'
    2742                 : #else
    2743                 : #  define OS_FILE_PATH_SEPARATOR        '/'
    2744                 : #endif
    2745                 : 
    2746                 : /********************************************************************
    2747                 : The function os_file_dirname returns a directory component of a
    2748                 : null-terminated pathname string.  In the usual case, dirname returns
    2749                 : the string up to, but not including, the final '/', and basename
    2750                 : is the component following the final '/'.  Trailing '/' charac­
    2751                 : ters are not counted as part of the pathname.
    2752                 : 
    2753                 : If path does not contain a slash, dirname returns the string ".".
    2754                 : 
    2755                 : Concatenating the string returned by dirname, a "/", and the basename
    2756                 : yields a complete pathname.
    2757                 : 
    2758                 : The return value is  a copy of the directory component of the pathname.
    2759                 : The copy is allocated from heap. It is the caller responsibility
    2760                 : to free it after it is no longer needed.
    2761                 : 
    2762                 : The following list of examples (taken from SUSv2) shows the strings
    2763                 : returned by dirname and basename for different paths:
    2764                 : 
    2765                 :        path           dirname        basename
    2766                 :        "/usr/lib"     "/usr"             "lib"
    2767                 :        "/usr/"              "/"          "usr"
    2768                 :        "usr"        "."          "usr"
    2769                 :        "/"          "/"          "/"
    2770                 :        "."          "."          "."
    2771                 :        ".."         "."          ".."
    2772                 : */
    2773                 : UNIV_INTERN
    2774                 : char*
    2775                 : os_file_dirname(
    2776                 : /*============*/
    2777                 :                                 /* out, own: directory component of the
    2778                 :                                 pathname */
    2779                 :         const char*     path)   /* in: pathname */
    2780               0 : {
    2781                 :         /* Find the offset of the last slash */
    2782               0 :         const char* last_slash = strrchr(path, OS_FILE_PATH_SEPARATOR);
    2783               0 :         if (!last_slash) {
    2784                 :                 /* No slash in the path, return "." */
    2785                 : 
    2786               0 :                 return(mem_strdup("."));
    2787                 :         }
    2788                 : 
    2789                 :         /* Ok, there is a slash */
    2790                 : 
    2791               0 :         if (last_slash == path) {
    2792                 :                 /* last slash is the first char of the path */
    2793                 : 
    2794               0 :                 return(mem_strdup("/"));
    2795                 :         }
    2796                 : 
    2797                 :         /* Non-trivial directory component */
    2798                 : 
    2799               0 :         return(mem_strdupl(path, last_slash - path));
    2800                 : }
    2801                 : 
    2802                 : /********************************************************************
    2803                 : Creates all missing subdirectories along the given path. */
    2804                 : UNIV_INTERN
    2805                 : ibool
    2806                 : os_file_create_subdirs_if_needed(
    2807                 : /*=============================*/
    2808                 :                                 /* out: TRUE if call succeeded
    2809                 :                                    FALSE otherwise */
    2810                 :         const char*     path)   /* in: path name */
    2811               0 : {
    2812                 :         char*           subdir;
    2813                 :         ibool           success, subdir_exists;
    2814                 :         os_file_type_t  type;
    2815                 : 
    2816               0 :         subdir = os_file_dirname(path);
    2817               0 :         if (strlen(subdir) == 1
    2818                 :             && (*subdir == OS_FILE_PATH_SEPARATOR || *subdir == '.')) {
    2819                 :                 /* subdir is root or cwd, nothing to do */
    2820               0 :                 mem_free(subdir);
    2821                 : 
    2822               0 :                 return(TRUE);
    2823                 :         }
    2824                 : 
    2825                 :         /* Test if subdir exists */
    2826               0 :         success = os_file_status(subdir, &subdir_exists, &type);
    2827               0 :         if (success && !subdir_exists) {
    2828                 :                 /* subdir does not exist, create it */
    2829               0 :                 success = os_file_create_subdirs_if_needed(subdir);
    2830               0 :                 if (!success) {
    2831               0 :                         mem_free(subdir);
    2832                 : 
    2833               0 :                         return(FALSE);
    2834                 :                 }
    2835               0 :                 success = os_file_create_directory(subdir, FALSE);
    2836                 :         }
    2837                 : 
    2838               0 :         mem_free(subdir);
    2839                 : 
    2840               0 :         return(success);
    2841                 : }
    2842                 : 
    2843                 : /********************************************************************
    2844                 : Returns a pointer to the nth slot in the aio array. */
    2845                 : static
    2846                 : os_aio_slot_t*
    2847                 : os_aio_array_get_nth_slot(
    2848                 : /*======================*/
    2849                 :                                         /* out: pointer to slot */
    2850                 :         os_aio_array_t*         array,  /* in: aio array */
    2851                 :         ulint                   index)  /* in: index of the slot */
    2852         2192154 : {
    2853         2192154 :         ut_a(index < array->n_slots);
    2854                 : 
    2855         2194127 :         return((array->slots) + index);
    2856                 : }
    2857                 : 
    2858                 : /****************************************************************************
    2859                 : Creates an aio wait array. */
    2860                 : static
    2861                 : os_aio_array_t*
    2862                 : os_aio_array_create(
    2863                 : /*================*/
    2864                 :                                 /* out, own: aio array */
    2865                 :         ulint   n,              /* in: maximum number of pending aio operations
    2866                 :                                 allowed; n must be divisible by n_segments */
    2867                 :         ulint   n_segments)     /* in: number of segments in the aio array */
    2868             105 : {
    2869                 :         os_aio_array_t* array;
    2870                 :         ulint           i;
    2871                 :         os_aio_slot_t*  slot;
    2872                 : #ifdef WIN_ASYNC_IO
    2873                 :         OVERLAPPED*     over;
    2874                 : #endif
    2875             105 :         ut_a(n > 0);
    2876             105 :         ut_a(n_segments > 0);
    2877                 : 
    2878             105 :         array = ut_malloc(sizeof(os_aio_array_t));
    2879                 : 
    2880             105 :         array->mutex         = os_mutex_create(NULL);
    2881             105 :         array->not_full              = os_event_create(NULL);
    2882             105 :         array->is_empty              = os_event_create(NULL);
    2883                 : 
    2884             105 :         os_event_set(array->is_empty);
    2885                 : 
    2886             105 :         array->n_slots               = n;
    2887             105 :         array->n_segments    = n_segments;
    2888             105 :         array->n_reserved    = 0;
    2889             105 :         array->slots         = ut_malloc(n * sizeof(os_aio_slot_t));
    2890                 : #ifdef __WIN__
    2891                 :         array->native_events = ut_malloc(n * sizeof(os_native_event_t));
    2892                 : #endif
    2893           23709 :         for (i = 0; i < n; i++) {
    2894           23604 :                 slot = os_aio_array_get_nth_slot(array, i);
    2895                 : 
    2896           23604 :                 slot->pos = i;
    2897           23604 :                 slot->reserved = FALSE;
    2898                 : #ifdef WIN_ASYNC_IO
    2899                 :                 slot->event = os_event_create(NULL);
    2900                 : 
    2901                 :                 over = &(slot->control);
    2902                 : 
    2903                 :                 over->hEvent = slot->event->handle;
    2904                 : 
    2905                 :                 *((array->native_events) + i) = over->hEvent;
    2906                 : #endif
    2907                 :         }
    2908                 : 
    2909             105 :         return(array);
    2910                 : }
    2911                 : 
    2912                 : /****************************************************************************
    2913                 : Initializes the asynchronous io system. Calls also os_io_init_simple.
    2914                 : Creates a separate aio array for
    2915                 : non-ibuf read and write, a third aio array for the ibuf i/o, with just one
    2916                 : segment, two aio arrays for log reads and writes with one segment, and a
    2917                 : synchronous aio array of the specified size. The combined number of segments
    2918                 : in the three first aio arrays is the parameter n_segments given to the
    2919                 : function. The caller must create an i/o handler thread for each segment in
    2920                 : the four first arrays, but not for the sync aio array. */
    2921                 : UNIV_INTERN
    2922                 : void
    2923                 : os_aio_init(
    2924                 : /*========*/
    2925                 :         ulint   n,              /* in: maximum number of pending aio operations
    2926                 :                                 allowed; n must be divisible by n_segments */
    2927                 :         ulint   n_segments,     /* in: combined number of segments in the four
    2928                 :                                 first aio arrays; must be >= 4 */
    2929                 :         ulint   n_slots_sync)   /* in: number of slots in the sync aio array */
    2930              21 : {
    2931                 :         ulint   n_read_segs;
    2932                 :         ulint   n_write_segs;
    2933                 :         ulint   n_per_seg;
    2934                 :         ulint   i;
    2935                 : 
    2936                 :         ut_ad(n % n_segments == 0);
    2937                 :         ut_ad(n_segments >= 4);
    2938                 : 
    2939              21 :         os_io_init_simple();
    2940                 : 
    2941             105 :         for (i = 0; i < n_segments; i++) {
    2942              84 :                 srv_set_io_thread_op_info(i, "not started yet");
    2943                 :         }
    2944                 : 
    2945              21 :         n_per_seg = n / n_segments;
    2946              21 :         n_write_segs = (n_segments - 2) / 2;
    2947              21 :         n_read_segs = n_segments - 2 - n_write_segs;
    2948                 : 
    2949                 :         /* fprintf(stderr, "Array n per seg %lu\n", n_per_seg); */
    2950                 : 
    2951              21 :         os_aio_ibuf_array = os_aio_array_create(n_per_seg, 1);
    2952                 : 
    2953              21 :         srv_io_thread_function[0] = "insert buffer thread";
    2954                 : 
    2955              21 :         os_aio_log_array = os_aio_array_create(n_per_seg, 1);
    2956                 : 
    2957              21 :         srv_io_thread_function[1] = "log thread";
    2958                 : 
    2959              21 :         os_aio_read_array = os_aio_array_create(n_read_segs * n_per_seg,
    2960                 :                                                 n_read_segs);
    2961              42 :         for (i = 2; i < 2 + n_read_segs; i++) {
    2962              21 :                 ut_a(i < SRV_MAX_N_IO_THREADS);
    2963              21 :                 srv_io_thread_function[i] = "read thread";
    2964                 :         }
    2965                 : 
    2966              21 :         os_aio_write_array = os_aio_array_create(n_write_segs * n_per_seg,
    2967                 :                                                  n_write_segs);
    2968              42 :         for (i = 2 + n_read_segs; i < n_segments; i++) {
    2969              21 :                 ut_a(i < SRV_MAX_N_IO_THREADS);
    2970              21 :                 srv_io_thread_function[i] = "write thread";
    2971                 :         }
    2972                 : 
    2973              21 :         os_aio_sync_array = os_aio_array_create(n_slots_sync, 1);
    2974                 : 
    2975              21 :         os_aio_n_segments = n_segments;
    2976                 : 
    2977              21 :         os_aio_validate();
    2978                 : 
    2979              21 :         os_aio_segment_wait_events = ut_malloc(n_segments * sizeof(void*));
    2980                 : 
    2981             105 :         for (i = 0; i < n_segments; i++) {
    2982              84 :                 os_aio_segment_wait_events[i] = os_event_create(NULL);
    2983                 :         }
    2984                 : 
    2985              21 :         os_last_printout = time(NULL);
    2986                 : 
    2987              21 : }
    2988                 : 
    2989                 : #ifdef WIN_ASYNC_IO
    2990                 : /****************************************************************************
    2991                 : Wakes up all async i/o threads in the array in Windows async i/o at
    2992                 : shutdown. */
    2993                 : static
    2994                 : void
    2995                 : os_aio_array_wake_win_aio_at_shutdown(
    2996                 : /*==================================*/
    2997                 :         os_aio_array_t* array)  /* in: aio array */
    2998                 : {
    2999                 :         ulint   i;
    3000                 : 
    3001                 :         for (i = 0; i < array->n_slots; i++) {
    3002                 : 
    3003                 :                 os_event_set((array->slots + i)->event);
    3004                 :         }
    3005                 : }
    3006                 : #endif
    3007                 : 
    3008                 : /****************************************************************************
    3009                 : Wakes up all async i/o threads so that they know to exit themselves in
    3010                 : shutdown. */
    3011                 : UNIV_INTERN
    3012                 : void
    3013                 : os_aio_wake_all_threads_at_shutdown(void)
    3014                 : /*=====================================*/
    3015              41 : {
    3016                 :         ulint   i;
    3017                 : 
    3018                 : #ifdef WIN_ASYNC_IO
    3019                 :         /* This code wakes up all ai/o threads in Windows native aio */
    3020                 :         os_aio_array_wake_win_aio_at_shutdown(os_aio_read_array);
    3021                 :         os_aio_array_wake_win_aio_at_shutdown(os_aio_write_array);
    3022                 :         os_aio_array_wake_win_aio_at_shutdown(os_aio_ibuf_array);
    3023                 :         os_aio_array_wake_win_aio_at_shutdown(os_aio_log_array);
    3024                 : #endif
    3025                 :         /* This loop wakes up all simulated ai/o threads */
    3026                 : 
    3027             205 :         for (i = 0; i < os_aio_n_segments; i++) {
    3028                 : 
    3029             164 :                 os_event_set(os_aio_segment_wait_events[i]);
    3030                 :         }
    3031              41 : }
    3032                 : 
    3033                 : /****************************************************************************
    3034                 : Waits until there are no pending writes in os_aio_write_array. There can
    3035                 : be other, synchronous, pending writes. */
    3036                 : UNIV_INTERN
    3037                 : void
    3038                 : os_aio_wait_until_no_pending_writes(void)
    3039                 : /*=====================================*/
    3040             139 : {
    3041             139 :         os_event_wait(os_aio_write_array->is_empty);
    3042             139 : }
    3043                 : 
    3044                 : /**************************************************************************
    3045                 : Calculates segment number for a slot. */
    3046                 : static
    3047                 : ulint
    3048                 : os_aio_get_segment_no_from_slot(
    3049                 : /*============================*/
    3050                 :                                 /* out: segment number (which is the number
    3051                 :                                 used by, for example, i/o-handler threads) */
    3052                 :         os_aio_array_t* array,  /* in: aio wait array */
    3053                 :         os_aio_slot_t*  slot)   /* in: slot in this array */
    3054             192 : {
    3055                 :         ulint   segment;
    3056                 :         ulint   seg_len;
    3057                 : 
    3058             192 :         if (array == os_aio_ibuf_array) {
    3059               4 :                 segment = 0;
    3060                 : 
    3061             188 :         } else if (array == os_aio_log_array) {
    3062             188 :                 segment = 1;
    3063                 : 
    3064               0 :         } else if (array == os_aio_read_array) {
    3065               0 :                 seg_len = os_aio_read_array->n_slots
    3066                 :                         / os_aio_read_array->n_segments;
    3067                 : 
    3068               0 :                 segment = 2 + slot->pos / seg_len;
    3069                 :         } else {
    3070               0 :                 ut_a(array == os_aio_write_array);
    3071               0 :                 seg_len = os_aio_write_array->n_slots
    3072                 :                         / os_aio_write_array->n_segments;
    3073                 : 
    3074               0 :                 segment = os_aio_read_array->n_segments + 2
    3075                 :                         + slot->pos / seg_len;
    3076                 :         }
    3077                 : 
    3078             192 :         return(segment);
    3079                 : }
    3080                 : 
    3081                 : /**************************************************************************
    3082                 : Calculates local segment number and aio array from global segment number. */
    3083                 : static
    3084                 : ulint
    3085                 : os_aio_get_array_and_local_segment(
    3086                 : /*===============================*/
    3087                 :                                         /* out: local segment number within
    3088                 :                                         the aio array */
    3089                 :         os_aio_array_t** array,         /* out: aio wait array */
    3090                 :         ulint            global_segment)/* in: global segment number */
    3091            7004 : {
    3092                 :         ulint   segment;
    3093                 : 
    3094            7004 :         ut_a(global_segment < os_aio_n_segments);
    3095                 : 
    3096            7004 :         if (global_segment == 0) {
    3097             221 :                 *array = os_aio_ibuf_array;
    3098             221 :                 segment = 0;
    3099                 : 
    3100            6783 :         } else if (global_segment == 1) {
    3101             589 :                 *array = os_aio_log_array;
    3102             589 :                 segment = 0;
    3103                 : 
    3104            6194 :         } else if (global_segment < os_aio_read_array->n_segments + 2) {
    3105             476 :                 *array = os_aio_read_array;
    3106                 : 
    3107             476 :                 segment = global_segment - 2;
    3108                 :         } else {
    3109            5718 :                 *array = os_aio_write_array;
    3110                 : 
    3111            5718 :                 segment = global_segment - (os_aio_read_array->n_segments + 2);
    3112                 :         }
    3113                 : 
    3114            7004 :         return(segment);
    3115                 : }
    3116                 : 
    3117                 : /***********************************************************************
    3118                 : Requests for a slot in the aio array. If no slot is available, waits until
    3119                 : not_full-event becomes signaled. */
    3120                 : static
    3121                 : os_aio_slot_t*
    3122                 : os_aio_array_reserve_slot(
    3123                 : /*======================*/
    3124                 :                                 /* out: pointer to slot */
    3125                 :         ulint           type,   /* in: OS_FILE_READ or OS_FILE_WRITE */
    3126                 :         os_aio_array_t* array,  /* in: aio array */
    3127                 :         fil_node_t*     message1,/* in: message to be passed along with
    3128                 :                                 the aio operation */
    3129                 :         void*           message2,/* in: message to be passed along with
    3130                 :                                 the aio operation */
    3131                 :         os_file_t       file,   /* in: file handle */
    3132                 :         const char*     name,   /* in: name of the file or path as a
    3133                 :                                 null-terminated string */
    3134                 :         void*           buf,    /* in: buffer where to read or from which
    3135                 :                                 to write */
    3136                 :         ulint           offset, /* in: least significant 32 bits of file
    3137                 :                                 offset */
    3138                 :         ulint           offset_high, /* in: most significant 32 bits of
    3139                 :                                 offset */
    3140                 :         ulint           len)    /* in: length of the block to read or write */
    3141            5960 : {
    3142                 :         os_aio_slot_t*  slot;
    3143                 : #ifdef WIN_ASYNC_IO
    3144                 :         OVERLAPPED*     control;
    3145                 : #endif
    3146                 :         ulint           i;
    3147            5960 : loop:
    3148            5960 :         os_mutex_enter(array->mutex);
    3149                 : 
    3150            5960 :         if (array->n_reserved == array->n_slots) {
    3151               0 :                 os_mutex_exit(array->mutex);
    3152                 : 
    3153               0 :                 if (!os_aio_use_native_aio) {
    3154                 :                         /* If the handler threads are suspended, wake them
    3155                 :                         so that we get more slots */
    3156                 : 
    3157               0 :                         os_aio_simulated_wake_handler_threads();
    3158                 :                 }
    3159                 : 
    3160               0 :                 os_event_wait(array->not_full);
    3161                 : 
    3162               0 :                 goto loop;
    3163                 :         }
    3164                 : 
    3165          255084 :         for (i = 0;; i++) {
    3166          255084 :                 slot = os_aio_array_get_nth_slot(array, i);
    3167                 : 
    3168          255084 :                 if (slot->reserved == FALSE) {
    3169            5960 :                         break;
    3170                 :                 }
    3171          249124 :         }
    3172                 : 
    3173            5960 :         array->n_reserved++;
    3174                 : 
    3175            5960 :         if (array->n_reserved == 1) {
    3176             370 :                 os_event_reset(array->is_empty);
    3177                 :         }
    3178                 : 
    3179            5960 :         if (array->n_reserved == array->n_slots) {
    3180               0 :                 os_event_reset(array->not_full);
    3181                 :         }
    3182                 : 
    3183            5960 :         slot->reserved = TRUE;
    3184            5960 :         slot->reservation_time = time(NULL);
    3185            5960 :         slot->message1 = message1;
    3186            5960 :         slot->message2 = message2;
    3187            5960 :         slot->file     = file;
    3188            5960 :         slot->name     = name;
    3189            5960 :         slot->len      = len;
    3190            5960 :         slot->type     = type;
    3191            5960 :         slot->buf      = buf;
    3192            5960 :         slot->offset   = offset;
    3193            5960 :         slot->offset_high = offset_high;
    3194            5960 :         slot->io_already_done = FALSE;
    3195                 : 
    3196                 : #ifdef WIN_ASYNC_IO
    3197                 :         control = &(slot->control);
    3198                 :         control->Offset = (DWORD)offset;
    3199                 :         control->OffsetHigh = (DWORD)offset_high;
    3200                 :         os_event_reset(slot->event);
    3201                 : #endif
    3202                 : 
    3203            5960 :         os_mutex_exit(array->mutex);
    3204                 : 
    3205            5960 :         return(slot);
    3206                 : }
    3207                 : 
    3208                 : /***********************************************************************
    3209                 : Frees a slot in the aio array. */
    3210                 : static
    3211                 : void
    3212                 : os_aio_array_free_slot(
    3213                 : /*===================*/
    3214                 :         os_aio_array_t* array,  /* in: aio array */
    3215                 :         os_aio_slot_t*  slot)   /* in: pointer to slot */
    3216            5960 : {
    3217                 :         ut_ad(array);
    3218                 :         ut_ad(slot);
    3219                 : 
    3220            5960 :         os_mutex_enter(array->mutex);
    3221                 : 
    3222                 :         ut_ad(slot->reserved);
    3223                 : 
    3224            5960 :         slot->reserved = FALSE;
    3225                 : 
    3226            5960 :         array->n_reserved--;
    3227                 : 
    3228            5960 :         if (array->n_reserved == array->n_slots - 1) {
    3229               0 :                 os_event_set(array->not_full);
    3230                 :         }
    3231                 : 
    3232            5960 :         if (array->n_reserved == 0) {
    3233             370 :                 os_event_set(array->is_empty);
    3234                 :         }
    3235                 : 
    3236                 : #ifdef WIN_ASYNC_IO
    3237                 :         os_event_reset(slot->event);
    3238                 : #endif
    3239            5960 :         os_mutex_exit(array->mutex);
    3240            5960 : }
    3241                 : 
    3242                 : /**************************************************************************
    3243                 : Wakes up a simulated aio i/o-handler thread if it has something to do. */
    3244                 : static
    3245                 : void
    3246                 : os_aio_simulated_wake_handler_thread(
    3247                 : /*=================================*/
    3248                 :         ulint   global_segment) /* in: the number of the segment in the aio
    3249                 :                                 arrays */
    3250             920 : {
    3251                 :         os_aio_array_t* array;
    3252                 :         os_aio_slot_t*  slot;
    3253                 :         ulint           segment;
    3254                 :         ulint           n;
    3255                 :         ulint           i;
    3256                 : 
    3257                 :         ut_ad(!os_aio_use_native_aio);
    3258                 : 
    3259             920 :         segment = os_aio_get_array_and_local_segment(&array, global_segment);
    3260                 : 
    3261             920 :         n = array->n_slots / array->n_segments;
    3262                 : 
    3263                 :         /* Look through n slots after the segment * n'th slot */
    3264                 : 
    3265             920 :         os_mutex_enter(array->mutex);
    3266                 : 
    3267          141464 :         for (i = 0; i < n; i++) {
    3268          140915 :                 slot = os_aio_array_get_nth_slot(array, i + segment * n);
    3269                 : 
    3270          140915 :                 if (slot->reserved) {
    3271                 :                         /* Found an i/o request */
    3272                 : 
    3273             371 :                         break;
    3274                 :                 }
    3275                 :         }
    3276                 : 
    3277             920 :         os_mutex_exit(array->mutex);
    3278                 : 
    3279             920 :         if (i < n) {
    3280             371 :                 os_event_set(os_aio_segment_wait_events[global_segment]);
    3281                 :         }
    3282             920 : }
    3283                 : 
    3284                 : /**************************************************************************
    3285                 : Wakes up simulated aio i/o-handler threads if they have something to do. */
    3286                 : UNIV_INTERN
    3287                 : void
    3288                 : os_aio_simulated_wake_handler_threads(void)
    3289                 : /*=======================================*/
    3290             182 : {
    3291                 :         ulint   i;
    3292                 : 
    3293             182 :         if (os_aio_use_native_aio) {
    3294                 :                 /* We do not use simulated aio: do nothing */
    3295                 : 
    3296               0 :                 return;
    3297                 :         }
    3298                 : 
    3299             182 :         os_aio_recommend_sleep_for_read_threads = FALSE;
    3300                 : 
    3301             910 :         for (i = 0; i < os_aio_n_segments; i++) {
    3302             728 :                 os_aio_simulated_wake_handler_thread(i);
    3303                 :         }
    3304                 : }
    3305                 : 
    3306                 : /**************************************************************************
    3307                 : This function can be called if one wants to post a batch of reads and
    3308                 : prefers an i/o-handler thread to handle them all at once later. You must
    3309                 : call os_aio_simulated_wake_handler_threads later to ensure the threads
    3310                 : are not left sleeping! */
    3311                 : UNIV_INTERN
    3312                 : void
    3313                 : os_aio_simulated_put_read_threads_to_sleep(void)
    3314                 : /*============================================*/
    3315              10 : {
    3316                 :         os_aio_array_t* array;
    3317                 :         ulint           g;
    3318                 : 
    3319              10 :         os_aio_recommend_sleep_for_read_threads = TRUE;
    3320                 : 
    3321              50 :         for (g = 0; g < os_aio_n_segments; g++) {
    3322              40 :                 os_aio_get_array_and_local_segment(&array, g);
    3323                 : 
    3324              40 :                 if (array == os_aio_read_array) {
    3325                 : 
    3326              10 :                         os_event_reset(os_aio_segment_wait_events[g]);
    3327                 :                 }
    3328                 :         }
    3329              10 : }
    3330                 : 
    3331                 : /***********************************************************************
    3332                 : Requests an asynchronous i/o operation. */
    3333                 : UNIV_INTERN
    3334                 : ibool
    3335                 : os_aio(
    3336                 : /*===*/
    3337                 :                                 /* out: TRUE if request was queued
    3338                 :                                 successfully, FALSE if fail */
    3339                 :         ulint           type,   /* in: OS_FILE_READ or OS_FILE_WRITE */
    3340                 :         ulint           mode,   /* in: OS_AIO_NORMAL, ..., possibly ORed
    3341                 :                                 to OS_AIO_SIMULATED_WAKE_LATER: the
    3342                 :                                 last flag advises this function not to wake
    3343                 :                                 i/o-handler threads, but the caller will
    3344                 :                                 do the waking explicitly later, in this
    3345                 :                                 way the caller can post several requests in
    3346                 :                                 a batch; NOTE that the batch must not be
    3347                 :                                 so big that it exhausts the slots in aio
    3348                 :                                 arrays! NOTE that a simulated batch
    3349                 :                                 may introduce hidden chances of deadlocks,
    3350                 :                                 because i/os are not actually handled until
    3351                 :                                 all have been posted: use with great
    3352                 :                                 caution! */
    3353                 :         const char*     name,   /* in: name of the file or path as a
    3354                 :                                 null-terminated string */
    3355                 :         os_file_t       file,   /* in: handle to a file */
    3356                 :         void*           buf,    /* in: buffer where to read or from which
    3357                 :                                 to write */
    3358                 :         ulint           offset, /* in: least significant 32 bits of file
    3359                 :                                 offset where to read or write */
    3360                 :         ulint           offset_high, /* in: most significant 32 bits of
    3361                 :                                 offset */
    3362                 :         ulint           n,      /* in: number of bytes to read or write */
    3363                 :         fil_node_t*     message1,/* in: messages for the aio handler (these
    3364                 :                                 can be used to identify a completed aio
    3365                 :                                 operation); if mode is OS_AIO_SYNC, these
    3366                 :                                 are ignored */
    3367                 :         void*           message2)
    3368           64002 : {
    3369                 :         os_aio_array_t* array;
    3370                 :         os_aio_slot_t*  slot;
    3371                 : #ifdef WIN_ASYNC_IO
    3372                 :         ibool           retval;
    3373                 :         BOOL            ret             = TRUE;
    3374                 :         DWORD           len             = (DWORD) n;
    3375                 :         struct fil_node_struct * dummy_mess1;
    3376                 :         void*           dummy_mess2;
    3377                 :         ulint           dummy_type;
    3378                 : #endif
    3379           64002 :         ulint           err             = 0;
    3380                 :         ibool           retry;
    3381                 :         ulint           wake_later;
    3382                 : 
    3383                 :         ut_ad(file);
    3384                 :         ut_ad(buf);
    3385                 :         ut_ad(n > 0);
    3386                 :         ut_ad(n % OS_FILE_LOG_BLOCK_SIZE == 0);
    3387                 :         ut_ad(offset % OS_FILE_LOG_BLOCK_SIZE == 0);
    3388                 :         ut_ad(os_aio_validate());
    3389                 : 
    3390           64002 :         wake_later = mode & OS_AIO_SIMULATED_WAKE_LATER;
    3391           64002 :         mode = mode & (~OS_AIO_SIMULATED_WAKE_LATER);
    3392                 : 
    3393           64002 :         if (mode == OS_AIO_SYNC
    3394                 : #ifdef WIN_ASYNC_IO
    3395                 :             && !os_aio_use_native_aio
    3396                 : #endif
    3397                 :             ) {
    3398                 :                 /* This is actually an ordinary synchronous read or write:
    3399                 :                 no need to use an i/o-handler thread. NOTE that if we use
    3400                 :                 Windows async i/o, Windows does not allow us to use
    3401                 :                 ordinary synchronous os_file_read etc. on the same file,
    3402                 :                 therefore we have built a special mechanism for synchronous
    3403                 :                 wait in the Windows case. */
    3404                 : 
    3405           58042 :                 if (type == OS_FILE_READ) {
    3406             610 :                         return(os_file_read(file, buf, offset,
    3407                 :                                             offset_high, n));
    3408                 :                 }
    3409                 : 
    3410           57432 :                 ut_a(type == OS_FILE_WRITE);
    3411                 : 
    3412           57432 :                 return(os_file_write(name, file, buf, offset, offset_high, n));
    3413                 :         }
    3414                 : 
    3415            5960 : try_again:
    3416            5960 :         if (mode == OS_AIO_NORMAL) {
    3417            5768 :                 if (type == OS_FILE_READ) {
    3418             263 :                         array = os_aio_read_array;
    3419                 :                 } else {
    3420            5505 :                         array = os_aio_write_array;
    3421                 :                 }
    3422             192 :         } else if (mode == OS_AIO_IBUF) {
    3423                 :                 ut_ad(type == OS_FILE_READ);
    3424                 :                 /* Reduce probability of deadlock bugs in connection with ibuf:
    3425                 :                 do not let the ibuf i/o handler sleep */
    3426                 : 
    3427               4 :                 wake_later = FALSE;
    3428                 : 
    3429               4 :                 array = os_aio_ibuf_array;
    3430             188 :         } else if (mode == OS_AIO_LOG) {
    3431                 : 
    3432             188 :                 array = os_aio_log_array;
    3433               0 :         } else if (mode == OS_AIO_SYNC) {
    3434               0 :                 array = os_aio_sync_array;
    3435                 :         } else {
    3436               0 :                 array = NULL; /* Eliminate compiler warning */
    3437               0 :                 ut_error;
    3438                 :         }
    3439                 : 
    3440            5960 :         slot = os_aio_array_reserve_slot(type, array, message1, message2, file,
    3441                 :                                          name, buf, offset, offset_high, n);
    3442            5960 :         if (type == OS_FILE_READ) {
    3443             267 :                 if (os_aio_use_native_aio) {
    3444                 : #ifdef WIN_ASYNC_IO
    3445                 :                         os_n_file_reads++;
    3446                 :                         os_bytes_read_since_printout += len;
    3447                 : 
    3448                 :                         ret = ReadFile(file, buf, (DWORD)n, &len,
    3449                 :                                        &(slot->control));
    3450                 : #endif
    3451                 :                 } else {
    3452             267 :                         if (!wake_later) {
    3453               4 :                                 os_aio_simulated_wake_handler_thread(
    3454                 :                                         os_aio_get_segment_no_from_slot(
    3455                 :                                                 array, slot));
    3456                 :                         }
    3457                 :                 }
    3458            5693 :         } else if (type == OS_FILE_WRITE) {
    3459            5693 :                 if (os_aio_use_native_aio) {
    3460                 : #ifdef WIN_ASYNC_IO
    3461                 :                         os_n_file_writes++;
    3462                 :                         ret = WriteFile(file, buf, (DWORD)n, &len,
    3463                 :                                         &(slot->control));
    3464                 : #endif
    3465                 :                 } else {
    3466            5693 :                         if (!wake_later) {
    3467             188 :                                 os_aio_simulated_wake_handler_thread(
    3468                 :                                         os_aio_get_segment_no_from_slot(
    3469                 :                                                 array, slot));
    3470                 :                         }
    3471                 :                 }
    3472                 :         } else {
    3473               0 :                 ut_error;
    3474                 :         }
    3475                 : 
    3476                 : #ifdef WIN_ASYNC_IO
    3477                 :         if (os_aio_use_native_aio) {
    3478                 :                 if ((ret && len == n)
    3479                 :                     || (!ret && GetLastError() == ERROR_IO_PENDING)) {
    3480                 :                         /* aio was queued successfully! */
    3481                 : 
    3482                 :                         if (mode == OS_AIO_SYNC) {
    3483                 :                                 /* We want a synchronous i/o operation on a
    3484                 :                                 file where we also use async i/o: in Windows
    3485                 :                                 we must use the same wait mechanism as for
    3486                 :                                 async i/o */
    3487                 : 
    3488                 :                                 retval = os_aio_windows_handle(ULINT_UNDEFINED,
    3489                 :                                                                slot->pos,
    3490                 :                                                                &dummy_mess1,
    3491                 :                                                                &dummy_mess2,
    3492                 :                                                                &dummy_type);
    3493                 : 
    3494                 :                                 return(retval);
    3495                 :                         }
    3496                 : 
    3497                 :                         return(TRUE);
    3498                 :                 }
    3499                 : 
    3500                 :                 err = 1; /* Fall through the next if */
    3501                 :         }
    3502                 : #endif
    3503            5960 :         if (err == 0) {
    3504                 :                 /* aio was queued successfully! */
    3505                 : 
    3506            5960 :                 return(TRUE);
    3507                 :         }
    3508                 : 
    3509               0 :         os_aio_array_free_slot(array, slot);
    3510                 : 
    3511               0 :         retry = os_file_handle_error(name,
    3512                 :                                      type == OS_FILE_READ
    3513                 :                                      ? "aio read" : "aio write");
    3514               0 :         if (retry) {
    3515                 : 
    3516               0 :                 goto try_again;
    3517                 :         }
    3518                 : 
    3519               0 :         return(FALSE);
    3520                 : }
    3521                 : 
    3522                 : #ifdef WIN_ASYNC_IO
    3523                 : /**************************************************************************
    3524                 : This function is only used in Windows asynchronous i/o.
    3525                 : Waits for an aio operation to complete. This function is used to wait the
    3526                 : for completed requests. The aio array of pending requests is divided
    3527                 : into segments. The thread specifies which segment or slot it wants to wait
    3528                 : for. NOTE: this function will also take care of freeing the aio slot,
    3529                 : therefore no other thread is allowed to do the freeing! */
    3530                 : UNIV_INTERN
    3531                 : ibool
    3532                 : os_aio_windows_handle(
    3533                 : /*==================*/
    3534                 :                                 /* out: TRUE if the aio operation succeeded */
    3535                 :         ulint   segment,        /* in: the number of the segment in the aio
    3536                 :                                 arrays to wait for; segment 0 is the ibuf
    3537                 :                                 i/o thread, segment 1 the log i/o thread,
    3538                 :                                 then follow the non-ibuf read threads, and as
    3539                 :                                 the last are the non-ibuf write threads; if
    3540                 :                                 this is ULINT_UNDEFINED, then it means that
    3541                 :                                 sync aio is used, and this parameter is
    3542                 :                                 ignored */
    3543                 :         ulint   pos,            /* this parameter is used only in sync aio:
    3544                 :                                 wait for the aio slot at this position */
    3545                 :         fil_node_t**message1,   /* out: the messages passed with the aio
    3546                 :                                 request; note that also in the case where
    3547                 :                                 the aio operation failed, these output
    3548                 :                                 parameters are valid and can be used to
    3549                 :                                 restart the operation, for example */
    3550                 :         void**  message2,
    3551                 :         ulint*  type)           /* out: OS_FILE_WRITE or ..._READ */
    3552                 : {
    3553                 :         ulint           orig_seg        = segment;
    3554                 :         os_aio_array_t* array;
    3555                 :         os_aio_slot_t*  slot;
    3556                 :         ulint           n;
    3557                 :         ulint           i;
    3558                 :         ibool           ret_val;
    3559                 :         BOOL            ret;
    3560                 :         DWORD           len;
    3561                 : 
    3562                 :         if (segment == ULINT_UNDEFINED) {
    3563                 :                 array = os_aio_sync_array;
    3564                 :                 segment = 0;
    3565                 :         } else {
    3566                 :                 segment = os_aio_get_array_and_local_segment(&array, segment);
    3567                 :         }
    3568                 : 
    3569                 :         /* NOTE! We only access constant fields in os_aio_array. Therefore
    3570                 :         we do not have to acquire the protecting mutex yet */
    3571                 : 
    3572                 :         ut_ad(os_aio_validate());
    3573                 :         ut_ad(segment < array->n_segments);
    3574                 : 
    3575                 :         n = array->n_slots / array->n_segments;
    3576                 : 
    3577                 :         if (array == os_aio_sync_array) {
    3578                 :                 os_event_wait(os_aio_array_get_nth_slot(array, pos)->event);
    3579                 :                 i = pos;
    3580                 :         } else {
    3581                 :                 srv_set_io_thread_op_info(orig_seg, "wait Windows aio");
    3582                 :                 i = os_event_wait_multiple(n,
    3583                 :                                            (array->native_events)
    3584                 :                                            + segment * n);
    3585                 :         }
    3586                 : 
    3587                 :         os_mutex_enter(array->mutex);
    3588                 : 
    3589                 :         slot = os_aio_array_get_nth_slot(array, i + segment * n);
    3590                 : 
    3591                 :         ut_a(slot->reserved);
    3592                 : 
    3593                 :         if (orig_seg != ULINT_UNDEFINED) {
    3594                 :                 srv_set_io_thread_op_info(orig_seg,
    3595                 :                                           "get windows aio return value");
    3596                 :         }
    3597                 : 
    3598                 :         ret = GetOverlappedResult(slot->file, &(slot->control), &len, TRUE);
    3599                 : 
    3600                 :         *message1 = slot->message1;
    3601                 :         *message2 = slot->message2;
    3602                 : 
    3603                 :         *type = slot->type;
    3604                 : 
    3605                 :         if (ret && len == slot->len) {
    3606                 :                 ret_val = TRUE;
    3607                 : 
    3608                 : #ifdef UNIV_DO_FLUSH
    3609                 :                 if (slot->type == OS_FILE_WRITE
    3610                 :                     && !os_do_not_call_flush_at_each_write) {
    3611                 :                         ut_a(TRUE == os_file_flush(slot->file));
    3612                 :                 }
    3613                 : #endif /* UNIV_DO_FLUSH */
    3614                 :         } else {
    3615                 :                 os_file_handle_error(slot->name, "Windows aio");
    3616                 : 
    3617                 :                 ret_val = FALSE;
    3618                 :         }
    3619                 : 
    3620                 :         os_mutex_exit(array->mutex);
    3621                 : 
    3622                 :         os_aio_array_free_slot(array, slot);
    3623                 : 
    3624                 :         return(ret_val);
    3625                 : }
    3626                 : #endif
    3627                 : 
    3628                 : /**************************************************************************
    3629                 : Does simulated aio. This function should be called by an i/o-handler
    3630                 : thread. */
    3631                 : UNIV_INTERN
    3632                 : ibool
    3633                 : os_aio_simulated_handle(
    3634                 : /*====================*/
    3635                 :                                 /* out: TRUE if the aio operation succeeded */
    3636                 :         ulint   global_segment, /* in: the number of the segment in the aio
    3637                 :                                 arrays to wait for; segment 0 is the ibuf
    3638                 :                                 i/o thread, segment 1 the log i/o thread,
    3639                 :                                 then follow the non-ibuf read threads, and as
    3640                 :                                 the last are the non-ibuf write threads */
    3641                 :         fil_node_t**message1,   /* out: the messages passed with the aio
    3642                 :                                 request; note that also in the case where
    3643                 :                                 the aio operation failed, these output
    3644                 :                                 parameters are valid and can be used to
    3645                 :                                 restart the operation, for example */
    3646                 :         void**  message2,
    3647                 :         ulint*  type)           /* out: OS_FILE_WRITE or ..._READ */
    3648            6044 : {
    3649                 :         os_aio_array_t* array;
    3650                 :         ulint           segment;
    3651                 :         os_aio_slot_t*  slot;
    3652                 :         os_aio_slot_t*  slot2;
    3653                 :         os_aio_slot_t*  consecutive_ios[OS_AIO_MERGE_N_CONSECUTIVE];
    3654                 :         ulint           n_consecutive;
    3655                 :         ulint           total_len;
    3656                 :         ulint           offs;
    3657                 :         ulint           lowest_offset;
    3658                 :         ulint           biggest_age;
    3659                 :         ulint           age;
    3660                 :         byte*           combined_buf;
    3661                 :         byte*           combined_buf2;
    3662                 :         ibool           ret;
    3663                 :         ulint           n;
    3664                 :         ulint           i;
    3665                 : 
    3666            6044 :         segment = os_aio_get_array_and_local_segment(&array, global_segment);
    3667                 : 
    3668            6415 : restart:
    3669                 :         /* NOTE! We only access constant fields in os_aio_array. Therefore
    3670                 :         we do not have to acquire the protecting mutex yet */
    3671                 : 
    3672            6415 :         srv_set_io_thread_op_info(global_segment,
    3673                 :                                   "looking for i/o requests (a)");
    3674                 :         ut_ad(os_aio_validate());
    3675                 :         ut_ad(segment < array->n_segments);
    3676                 : 
    3677            6414 :         n = array->n_slots / array->n_segments;
    3678                 : 
    3679                 :         /* Look through n slots after the segment * n'th slot */
    3680                 : 
    3681            6414 :         if (array == os_aio_read_array
    3682                 :             && os_aio_recommend_sleep_for_read_threads) {
    3683                 : 
    3684                 :                 /* Give other threads chance to add several i/os to the array
    3685                 :                 at once. */
    3686                 : 
    3687               1 :                 goto recommended_sleep;
    3688                 :         }
    3689                 : 
    3690            6413 :         os_mutex_enter(array->mutex);
    3691                 : 
    3692            6414 :         srv_set_io_thread_op_info(global_segment,
    3693                 :                                   "looking for i/o requests (b)");
    3694                 : 
    3695                 :         /* Check if there is a slot for which the i/o has already been
    3696                 :         done */
    3697                 : 
    3698          589277 :         for (i = 0; i < n; i++) {
    3699          587828 :                 slot = os_aio_array_get_nth_slot(array, i + segment * n);
    3700                 : 
    3701          587617 :                 if (slot->reserved && slot->io_already_done) {
    3702                 : 
    3703            5051 :                         if (os_aio_print_debug) {
    3704               0 :                                 fprintf(stderr,
    3705                 :                                         "InnoDB: i/o for slot %lu"
    3706                 :                                         " already done, returning\n",
    3707                 :                                         (ulong) i);
    3708                 :                         }
    3709                 : 
    3710            5051 :                         ret = TRUE;
    3711                 : 
    3712            5051 :                         goto slot_io_done;
    3713                 :                 }
    3714                 :         }
    3715                 : 
    3716            1449 :         n_consecutive = 0;
    3717                 : 
    3718                 :         /* If there are at least 2 seconds old requests, then pick the oldest
    3719                 :         one to prevent starvation. If several requests have the same age,
    3720                 :         then pick the one at the lowest offset. */
    3721                 : 
    3722            1449 :         biggest_age = 0;
    3723            1449 :         lowest_offset = ULINT_MAX;
    3724                 : 
    3725          350050 :         for (i = 0; i < n; i++) {
    3726          348577 :                 slot = os_aio_array_get_nth_slot(array, i + segment * n);
    3727                 : 
    3728          348601 :                 if (slot->reserved) {
    3729           26821 :                         age = (ulint)difftime(time(NULL),
    3730                 :                                               slot->reservation_time);
    3731                 : 
    3732           26821 :                         if ((age >= 2 && age > biggest_age)
    3733                 :                             || (age >= 2 && age == biggest_age
    3734                 :                                 && slot->offset < lowest_offset)) {
    3735                 : 
    3736                 :                                 /* Found an i/o request */
    3737               0 :                                 consecutive_ios[0] = slot;
    3738                 : 
    3739               0 :                                 n_consecutive = 1;
    3740                 : 
    3741               0 :                                 biggest_age = age;
    3742               0 :                                 lowest_offset = slot->offset;
    3743                 :                         }
    3744                 :                 }
    3745                 :         }
    3746                 : 
    3747            1473 :         if (n_consecutive == 0) {
    3748                 :                 /* There were no old requests. Look for an i/o request at the
    3749                 :                 lowest offset in the array (we ignore the high 32 bits of the
    3750                 :                 offset in these heuristics) */
    3751                 : 
    3752            1363 :                 lowest_offset = ULINT_MAX;
    3753                 : 
    3754          349092 :                 for (i = 0; i < n; i++) {
    3755          347839 :                         slot = os_aio_array_get_nth_slot(array,
    3756                 :                                                          i + segment * n);
    3757                 : 
    3758          347729 :                         if (slot->reserved && slot->offset < lowest_offset) {
    3759                 : 
    3760                 :                                 /* Found an i/o request */
    3761            1454 :                                 consecutive_ios[0] = slot;
    3762                 : 
    3763            1454 :                                 n_consecutive = 1;
    3764                 : 
    3765            1454 :                                 lowest_offset = slot->offset;
    3766                 :                         }
    3767                 :                 }
    3768                 :         }
    3769                 : 
    3770            1363 :         if (n_consecutive == 0) {
    3771                 : 
    3772                 :                 /* No i/o requested at the moment */
    3773                 : 
    3774             454 :                 goto wait_for_io;
    3775                 :         }
    3776                 : 
    3777             909 :         slot = consecutive_ios[0];
    3778                 : 
    3779                 :         /* Check if there are several consecutive blocks to read or write */
    3780                 : 
    3781            5940 : consecutive_loop:
    3782          468271 :         for (i = 0; i < n; i++) {
    3783          467382 :                 slot2 = os_aio_array_get_nth_slot(array, i + segment * n);
    3784                 : 
    3785          467382 :                 if (slot2->reserved && slot2 != slot
    3786                 :                     && slot2->offset == slot->offset + slot->len
    3787                 :                     /* check that sum does not wrap over */
    3788                 :                     && slot->offset + slot->len > slot->offset
    3789                 :                     && slot2->offset_high == slot->offset_high
    3790                 :                     && slot2->type == slot->type
    3791                 :                     && slot2->file == slot->file) {
    3792                 : 
    3793                 :                         /* Found a consecutive i/o request */
    3794                 : 
    3795            5051 :                         consecutive_ios[n_consecutive] = slot2;
    3796            5051 :                         n_consecutive++;
    3797                 : 
    3798            5051 :                         slot = slot2;
    3799                 : 
    3800            5051 :                         if (n_consecutive < OS_AIO_MERGE_N_CONSECUTIVE) {
    3801                 : 
    3802            5031 :                                 goto consecutive_loop;
    3803                 :                         } else {
    3804              20 :                                 break;
    3805                 :                         }
    3806                 :                 }
    3807                 :         }
    3808                 : 
    3809             909 :         srv_set_io_thread_op_info(global_segment, "consecutive i/o requests");
    3810                 : 
    3811                 :         /* We have now collected n_consecutive i/o requests in the array;
    3812                 :         allocate a single buffer which can hold all data, and perform the
    3813                 :         i/o */
    3814                 : 
    3815             909 :         total_len = 0;
    3816             909 :         slot = consecutive_ios[0];
    3817                 : 
    3818            6869 :         for (i = 0; i < n_consecutive; i++) {
    3819            5960 :                 total_len += consecutive_ios[i]->len;
    3820                 :         }
    3821                 : 
    3822             909 :         if (n_consecutive == 1) {
    3823                 :                 /* We can use the buffer of the i/o request */
    3824             477 :                 combined_buf = slot->buf;
    3825             477 :                 combined_buf2 = NULL;
    3826                 :         } else {
    3827             432 :                 combined_buf2 = ut_malloc(total_len + UNIV_PAGE_SIZE);
    3828                 : 
    3829             432 :                 ut_a(combined_buf2);
    3830                 : 
    3831             432 :                 combined_buf = ut_align(combined_buf2, UNIV_PAGE_SIZE);
    3832                 :         }
    3833                 : 
    3834                 :         /* We release the array mutex for the time of the i/o: NOTE that
    3835                 :         this assumes that there is just one i/o-handler thread serving
    3836                 :         a single segment of slots! */
    3837                 : 
    3838             909 :         os_mutex_exit(array->mutex);
    3839                 : 
    3840             909 :         if (slot->type == OS_FILE_WRITE && n_consecutive > 1) {
    3841                 :                 /* Copy the buffers to the combined buffer */
    3842             370 :                 offs = 0;
    3843                 : 
    3844            5621 :                 for (i = 0; i < n_consecutive; i++) {
    3845                 : 
    3846            5251 :                         ut_memcpy(combined_buf + offs, consecutive_ios[i]->buf,
    3847                 :                                   consecutive_ios[i]->len);
    3848            5251 :                         offs += consecutive_ios[i]->len;
    3849                 :                 }
    3850                 :         }
    3851                 : 
    3852             909 :         srv_set_io_thread_op_info(global_segment, "doing file i/o");
    3853                 : 
    3854             909 :         if (os_aio_print_debug) {
    3855               0 :                 fprintf(stderr,
    3856                 :                         "InnoDB: doing i/o of type %lu at offset %lu %lu,"
    3857                 :                         " length %lu\n",
    3858                 :                         (ulong) slot->type, (ulong) slot->offset_high,
    3859                 :                         (ulong) slot->offset, (ulong) total_len);
    3860                 :         }
    3861                 : 
    3862                 :         /* Do the i/o with ordinary, synchronous i/o functions: */
    3863             909 :         if (slot->type == OS_FILE_WRITE) {
    3864             812 :                 ret = os_file_write(slot->name, slot->file, combined_buf,
    3865                 :                                     slot->offset, slot->offset_high,
    3866                 :                                     total_len);
    3867                 :         } else {
    3868              97 :                 ret = os_file_read(slot->file, combined_buf,
    3869                 :                                    slot->offset, slot->offset_high, total_len);
    3870                 :         }
    3871                 : 
    3872             909 :         ut_a(ret);
    3873             909 :         srv_set_io_thread_op_info(global_segment, "file i/o done");
    3874                 : 
    3875                 : #if 0
    3876                 :         fprintf(stderr,
    3877                 :                 "aio: %lu consecutive %lu:th segment, first offs %lu blocks\n",
    3878                 :                 n_consecutive, global_segment, slot->offset / UNIV_PAGE_SIZE);
    3879                 : #endif
    3880                 : 
    3881             909 :         if (slot->type == OS_FILE_READ && n_consecutive > 1) {
    3882                 :                 /* Copy the combined buffer to individual buffers */
    3883              62 :                 offs = 0;
    3884                 : 
    3885             294 :                 for (i = 0; i < n_consecutive; i++) {
    3886                 : 
    3887             232 :                         ut_memcpy(consecutive_ios[i]->buf, combined_buf + offs,
    3888                 :                                   consecutive_ios[i]->len);
    3889             232 :                         offs += consecutive_ios[i]->len;
    3890                 :                 }
    3891                 :         }
    3892                 : 
    3893             909 :         if (combined_buf2) {
    3894             432 :                 ut_free(combined_buf2);
    3895                 :         }
    3896                 : 
    3897             909 :         os_mutex_enter(array->mutex);
    3898                 : 
    3899                 :         /* Mark the i/os done in slots */
    3900                 : 
    3901            6869 :         for (i = 0; i < n_consecutive; i++) {
    3902            5960 :                 consecutive_ios[i]->io_already_done = TRUE;
    3903                 :         }
    3904                 : 
    3905                 :         /* We return the messages for the first slot now, and if there were
    3906                 :         several slots, the messages will be returned with subsequent calls
    3907                 :         of this function */
    3908                 : 
    3909            5960 : slot_io_done:
    3910                 : 
    3911            5960 :         ut_a(slot->reserved);
    3912                 : 
    3913            5960 :         *message1 = slot->message1;
    3914            5960 :         *message2 = slot->message2;
    3915                 : 
    3916            5960 :         *type = slot->type;
    3917                 : 
    3918            5960 :         os_mutex_exit(array->mutex);
    3919                 : 
    3920            5960 :         os_aio_array_free_slot(array, slot);
    3921                 : 
    3922            5960 :         return(ret);
    3923                 : 
    3924             454 : wait_for_io:
    3925             454 :         srv_set_io_thread_op_info(global_segment, "resetting wait event");
    3926                 : 
    3927                 :         /* We wait here until there again can be i/os in the segment
    3928                 :         of this thread */
    3929                 : 
    3930             454 :         os_event_reset(os_aio_segment_wait_events[global_segment]);
    3931                 : 
    3932             454 :         os_mutex_exit(array->mutex);
    3933                 : 
    3934             455 : recommended_sleep:
    3935             455 :         srv_set_io_thread_op_info(global_segment, "waiting for i/o request");
    3936                 : 
    3937             455 :         os_event_wait(os_aio_segment_wait_events[global_segment]);
    3938                 : 
    3939             371 :         if (os_aio_print_debug) {
    3940               0 :                 fprintf(stderr,
    3941                 :                         "InnoDB: i/o handler thread for i/o"
    3942                 :                         " segment %lu wakes up\n",
    3943                 :                         (ulong) global_segment);
    3944                 :         }
    3945                 : 
    3946             371 :         goto restart;
    3947                 : }
    3948                 : 
    3949                 : /**************************************************************************
    3950                 : Validates the consistency of an aio array. */
    3951                 : static
    3952                 : ibool
    3953                 : os_aio_array_validate(
    3954                 : /*==================*/
    3955                 :                                 /* out: TRUE if ok */
    3956                 :         os_aio_array_t* array)  /* in: aio wait array */
    3957             105 : {
    3958                 :         os_aio_slot_t*  slot;
    3959             105 :         ulint           n_reserved      = 0;
    3960                 :         ulint           i;
    3961                 : 
    3962             105 :         ut_a(array);
    3963                 : 
    3964             105 :         os_mutex_enter(array->mutex);
    3965                 : 
    3966             105 :         ut_a(array->n_slots > 0);
    3967             105 :         ut_a(array->n_segments > 0);
    3968                 : 
    3969           23709 :         for (i = 0; i < array->n_slots; i++) {
    3970           23604 :                 slot = os_aio_array_get_nth_slot(array, i);
    3971                 : 
    3972           23604 :                 if (slot->reserved) {
    3973               0 :                         n_reserved++;
    3974               0 :                         ut_a(slot->len > 0);
    3975                 :                 }
    3976                 :         }
    3977                 : 
    3978             105 :         ut_a(array->n_reserved == n_reserved);
    3979                 : 
    3980             105 :         os_mutex_exit(array->mutex);
    3981                 : 
    3982             105 :         return(TRUE);
    3983                 : }
    3984                 : 
    3985                 : /**************************************************************************
    3986                 : Validates the consistency the aio system. */
    3987                 : UNIV_INTERN
    3988                 : ibool
    3989                 : os_aio_validate(void)
    3990                 : /*=================*/
    3991                 :                                 /* out: TRUE if ok */
    3992              21 : {
    3993              21 :         os_aio_array_validate(os_aio_read_array);
    3994              21 :         os_aio_array_validate(os_aio_write_array);
    3995              21 :         os_aio_array_validate(os_aio_ibuf_array);
    3996              21 :         os_aio_array_validate(os_aio_log_array);
    3997              21 :         os_aio_array_validate(os_aio_sync_array);
    3998                 : 
    3999              21 :         return(TRUE);
    4000                 : }
    4001                 : 
    4002                 : /**************************************************************************
    4003                 : Prints info of the aio arrays. */
    4004                 : UNIV_INTERN
    4005                 : void
    4006                 : os_aio_print(
    4007                 : /*=========*/
    4008                 :         FILE*   file)   /* in: file where to print */
    4009               0 : {
    4010                 :         os_aio_array_t* array;
    4011                 :         os_aio_slot_t*  slot;
    4012                 :         ulint           n_reserved;
    4013                 :         time_t          current_time;
    4014                 :         double          time_elapsed;
    4015                 :         double          avg_bytes_read;
    4016                 :         ulint           i;
    4017                 : 
    4018               0 :         for (i = 0; i < srv_n_file_io_threads; i++) {
    4019               0 :                 fprintf(file, "I/O thread %lu state: %s (%s)", (ulong) i,
    4020                 :                         srv_io_thread_op_info[i],
    4021                 :                         srv_io_thread_function[i]);
    4022                 : 
    4023                 : #ifndef __WIN__
    4024               0 :                 if (os_aio_segment_wait_events[i]->is_set) {
    4025               0 :                         fprintf(file, " ev set");
    4026                 :                 }
    4027                 : #endif
    4028                 : 
    4029               0 :                 fprintf(file, "\n");
    4030                 :         }
    4031                 : 
    4032               0 :         fputs("Pending normal aio reads:", file);
    4033                 : 
    4034               0 :         array = os_aio_read_array;
    4035               0 : loop:
    4036               0 :         ut_a(array);
    4037                 : 
    4038               0 :         os_mutex_enter(array->mutex);
    4039                 : 
    4040               0 :         ut_a(array->n_slots > 0);
    4041               0 :         ut_a(array->n_segments > 0);
    4042                 : 
    4043               0 :         n_reserved = 0;
    4044                 : 
    4045               0 :         for (i = 0; i < array->n_slots; i++) {
    4046               0 :                 slot = os_aio_array_get_nth_slot(array, i);
    4047                 : 
    4048               0 :                 if (slot->reserved) {
    4049               0 :                         n_reserved++;
    4050                 : #if 0
    4051                 :                         fprintf(stderr, "Reserved slot, messages %p %p\n",
    4052                 :                                 (void*) slot->message1,
    4053                 :                                 (void*) slot->message2);
    4054                 : #endif
    4055               0 :                         ut_a(slot->len > 0);
    4056                 :                 }
    4057                 :         }
    4058                 : 
    4059               0 :         ut_a(array->n_reserved == n_reserved);
    4060                 : 
    4061               0 :         fprintf(file, " %lu", (ulong) n_reserved);
    4062                 : 
    4063               0 :         os_mutex_exit(array->mutex);
    4064                 : 
    4065               0 :         if (array == os_aio_read_array) {
    4066               0 :                 fputs(", aio writes:", file);
    4067                 : 
    4068               0 :                 array = os_aio_write_array;
    4069                 : 
    4070               0 :                 goto loop;
    4071                 :         }
    4072                 : 
    4073               0 :         if (array == os_aio_write_array) {
    4074               0 :                 fputs(",\n ibuf aio reads:", file);
    4075               0 :                 array = os_aio_ibuf_array;
    4076                 : 
    4077               0 :                 goto loop;
    4078                 :         }
    4079                 : 
    4080               0 :         if (array == os_aio_ibuf_array) {
    4081               0 :                 fputs(", log i/o's:", file);
    4082               0 :                 array = os_aio_log_array;
    4083                 : 
    4084               0 :                 goto loop;
    4085                 :         }
    4086                 : 
    4087               0 :         if (array == os_aio_log_array) {
    4088               0 :                 fputs(", sync i/o's:", file);
    4089               0 :                 array = os_aio_sync_array;
    4090                 : 
    4091               0 :                 goto loop;
    4092                 :         }
    4093                 : 
    4094               0 :         putc('\n', file);
    4095               0 :         current_time = time(NULL);
    4096               0 :         time_elapsed = 0.001 + difftime(current_time, os_last_printout);
    4097                 : 
    4098               0 :         fprintf(file,
    4099                 :                 "Pending flushes (fsync) log: %lu; buffer pool: %lu\n"
    4100                 :                 "%lu OS file reads, %lu OS file writes, %lu OS fsyncs\n",
    4101                 :                 (ulong) fil_n_pending_log_flushes,
    4102                 :                 (ulong) fil_n_pending_tablespace_flushes,
    4103                 :                 (ulong) os_n_file_reads, (ulong) os_n_file_writes,
    4104                 :                 (ulong) os_n_fsyncs);
    4105                 : 
    4106               0 :         if (os_file_n_pending_preads != 0 || os_file_n_pending_pwrites != 0) {
    4107               0 :                 fprintf(file,
    4108                 :                         "%lu pending preads, %lu pending pwrites\n",
    4109                 :                         (ulong) os_file_n_pending_preads,
    4110                 :                         (ulong) os_file_n_pending_pwrites);
    4111                 :         }
    4112                 : 
    4113               0 :         if (os_n_file_reads == os_n_file_reads_old) {
    4114               0 :                 avg_bytes_read = 0.0;
    4115                 :         } else {
    4116               0 :                 avg_bytes_read = (double) os_bytes_read_since_printout
    4117                 :                         / (os_n_file_reads - os_n_file_reads_old);
    4118                 :         }
    4119                 : 
    4120               0 :         fprintf(file,
    4121                 :                 "%.2f reads/s, %lu avg bytes/read,"
    4122                 :                 " %.2f writes/s, %.2f fsyncs/s\n",
    4123                 :                 (os_n_file_reads - os_n_file_reads_old)
    4124                 :                 / time_elapsed,
    4125                 :                 (ulong)avg_bytes_read,
    4126                 :                 (os_n_file_writes - os_n_file_writes_old)
    4127                 :                 / time_elapsed,
    4128                 :                 (os_n_fsyncs - os_n_fsyncs_old)
    4129                 :                 / time_elapsed);
    4130                 : 
    4131               0 :         os_n_file_reads_old = os_n_file_reads;
    4132               0 :         os_n_file_writes_old = os_n_file_writes;
    4133               0 :         os_n_fsyncs_old = os_n_fsyncs;
    4134               0 :         os_bytes_read_since_printout = 0;
    4135                 : 
    4136               0 :         os_last_printout = current_time;
    4137               0 : }
    4138                 : 
    4139                 : /**************************************************************************
    4140                 : Refreshes the statistics used to print per-second averages. */
    4141                 : UNIV_INTERN
    4142                 : void
    4143                 : os_aio_refresh_stats(void)
    4144                 : /*======================*/
    4145               8 : {
    4146               8 :         os_n_file_reads_old = os_n_file_reads;
    4147               8 :         os_n_file_writes_old = os_n_file_writes;
    4148               8 :         os_n_fsyncs_old = os_n_fsyncs;
    4149               8 :         os_bytes_read_since_printout = 0;
    4150                 : 
    4151               8 :         os_last_printout = time(NULL);
    4152               8 : }
    4153                 : 
    4154                 : #ifdef UNIV_DEBUG
    4155                 : /**************************************************************************
    4156                 : Checks that all slots in the system have been freed, that is, there are
    4157                 : no pending io operations. */
    4158                 : UNIV_INTERN
    4159                 : ibool
    4160                 : os_aio_all_slots_free(void)
    4161                 : /*=======================*/
    4162                 :                                 /* out: TRUE if all free */
    4163                 : {
    4164                 :         os_aio_array_t* array;
    4165                 :         ulint           n_res   = 0;
    4166                 : 
    4167                 :         array = os_aio_read_array;
    4168                 : 
    4169                 :         os_mutex_enter(array->mutex);
    4170                 : 
    4171                 :         n_res += array->n_reserved;
    4172                 : 
    4173                 :         os_mutex_exit(array->mutex);
    4174                 : 
    4175                 :         array = os_aio_write_array;
    4176                 : 
    4177                 :         os_mutex_enter(array->mutex);
    4178                 : 
    4179                 :         n_res += array->n_reserved;
    4180                 : 
    4181                 :         os_mutex_exit(array->mutex);
    4182                 : 
    4183                 :         array = os_aio_ibuf_array;
    4184                 : 
    4185                 :         os_mutex_enter(array->mutex);
    4186                 : 
    4187                 :         n_res += array->n_reserved;
    4188                 : 
    4189                 :         os_mutex_exit(array->mutex);
    4190                 : 
    4191                 :         array = os_aio_log_array;
    4192                 : 
    4193                 :         os_mutex_enter(array->mutex);
    4194                 : 
    4195                 :         n_res += array->n_reserved;
    4196                 : 
    4197                 :         os_mutex_exit(array->mutex);
    4198                 : 
    4199                 :         array = os_aio_sync_array;
    4200                 : 
    4201                 :         os_mutex_enter(array->mutex);
    4202                 : 
    4203                 :         n_res += array->n_reserved;
    4204                 : 
    4205                 :         os_mutex_exit(array->mutex);
    4206                 : 
    4207                 :         if (n_res == 0) {
    4208                 : 
    4209                 :                 return(TRUE);
    4210                 :         }
    4211                 : 
    4212                 :         return(FALSE);
    4213                 : }
    4214                 : #endif /* UNIV_DEBUG */

Generated by: LTP GCOV extension version 1.6