linux读写二进制大文件_linux 文件读写_sunmenggmail的博客

相关文章推荐
慷慨的铁板烧 · sparksql的udf序列函数 ...· 3 月前 ·
文质彬彬的红金鱼 · （六：4）ElasticSearch的查询权 ...· 3 月前 ·
欢快的韭菜 · 连接到平面文件数据源（SQL Server ...· 1 年前 ·
冷冷的青蛙 · 如何使用Python从JSON数组中删除元素 ...· 1 年前 ·
千年单身的马铃薯 · 使用多層感知器類神經網路之蒜頭選別機__臺灣 ...· 1 年前 ·
https://raw.githubusercontent.com/ceph/ceph/master/src/common/safe_io.c
一般而言，用pwrite和pread读写磁盘文件不需要用循环，但是在读写超大的文件时就一定需要循环，保证正确读写
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
// vim: ts=8 sw=2 smarttab
 * Ceph - scalable distributed file system
 * This is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License version 2.1, as published by the Free Software
 * Foundation.  See file COPYING.
#define _XOPEN_SOURCE 500
#include <stdio.h>
#include <string.h>
#include <unistd.h>
#include <errno.h>
#include <fcntl.h>
#include <limits.h>
#include "common/safe_io.h"
#include "include/compat.h"
ssize_t safe_read(int fd, void *buf, size_t count)
	size_t cnt = 0;
	while (cnt < count) {
		ssize_t r = read(fd, buf, count - cnt);
		if (r <= 0) {
			if (r == 0) {
				// EOF
				return cnt;
			if (errno == EINTR)
				continue;
			return -errno;
		cnt += r;
		buf = (char *)buf + r;
	return cnt;
ssize_t safe_read_exact(int fd, void *buf, size_t count)
        ssize_t ret = safe_read(fd, buf, count);
	if (ret < 0)
		return ret;
	if ((size_t)ret != count)
		return -EDOM;
	return 0;
ssize_t safe_write(int fd, const void *buf, size_t count)
	while (count > 0) {
		ssize_t r = write(fd, buf, count);
		if (r < 0) {
			if (errno == EINTR)
				continue;
			return -errno;
		count -= r;
		buf = (char *)buf + r;
	return 0;
ssize_t safe_pread(int fd, void *buf, size_t count, off_t offset)
	size_t cnt = 0;
	char *b = (char*)buf;
	while (cnt < count) {
		ssize_t r = pread(fd, b + cnt, count - cnt, offset + cnt);
		if (r <= 0) {
			if (r == 0) {
				// EOF
				return cnt;
			if (errno == EINTR)
				continue;
			return -errno;
		cnt += r;
	return cnt;
ssize_t safe_pread_exact(int fd, void *buf, size_t count, off_t offset)
	ssize_t ret = safe_pread(fd, buf, count, offset);
	if (ret < 0)
		return ret;
	if ((size_t)ret != count)
		return -EDOM;
	return 0;
ssize_t safe_pwrite(int fd, const void *buf, size_t count, off_t offset)
	while (count > 0) {
		ssize_t r = pwrite(fd, buf, count, offset);
		if (r < 0) {
			if (errno == EINTR)
				continue;
			return -errno;
		count -= r;
		buf = (char *)buf + r;
		offset += r;
	return 0;
#ifdef CEPH_HAVE_SPLICE
ssize_t safe_splice(int fd_in, loff_t *off_in, int fd_out, loff_t *off_out,
		    size_t len, unsigned int flags)
  size_t cnt = 0;
  while (cnt < len) {
    ssize_t r = splice(fd_in, off_in, fd_out, off_out, len - cnt, flags);
    if (r <= 0) {
      if (r == 0) {
	// EOF
	return cnt;
      if (errno == EINTR)
	continue;
      return -errno;
    cnt += r;
  return cnt;
ssize_t safe_splice_exact(int fd_in, loff_t *off_in, int fd_out,
			  loff_t *off_out, size_t len, unsigned int flags)
  ssize_t ret = safe_splice(fd_in, off_in, fd_out, off_out, len, flags);
  if (ret < 0)
    return ret;
  if ((size_t)ret != len)
    return -EDOM;
  return 0;
#endif
int safe_write_file(const char *base, const char *file,
		    const char *val, size_t vallen)
  int ret;
  char fn[PATH_MAX];
  char tmp[PATH_MAX];
  int fd;
  // does the file already have correct content?
  char oldval[80];
  ret = safe_read_file(base, file, oldval, sizeof(oldval));
  if (ret == (int)vallen && memcmp(oldval, val, vallen) == 0)
    return 0;  // yes.
  snprintf(fn, sizeof(fn), "%s/%s", base, file);
  snprintf(tmp, sizeof(tmp), "%s/%s.tmp", base, file);
  fd = open(tmp, O_WRONLY|O_CREAT|O_TRUNC, 0644);
  if (fd < 0) {
    ret = errno;
    return -ret;
  ret = safe_write(fd, val, vallen);
  if (ret) {
    VOID_TEMP_FAILURE_RETRY(close(fd));
    return ret;
  ret = fsync(fd);
  if (ret < 0) ret = -errno;
  VOID_TEMP_FAILURE_RETRY(close(fd));
  if (ret < 0) {
    unlink(tmp);
    return ret;
  ret = rename(tmp, fn);
  if (ret < 0) {
    ret = -errno;
    unlink(tmp);
    return ret;
  fd = open(base, O_RDONLY);
  if (fd < 0) {
    ret = -errno;
    return ret;
  ret = fsync(fd);
  if (ret < 0) ret = -errno;
  VOID_TEMP_FAILURE_RETRY(close(fd));
  return ret;
int safe_read_file(const char *base, const char *file,
		   char *val, size_t vallen)
  char fn[PATH_MAX];
  int fd, len;
  snprintf(fn, sizeof(fn), "%s/%s", base, file);
  fd = open(fn, O_RDONLY);
  if (fd < 0) {
    return -errno;
  len = safe_read(fd, val, vallen);
  if (len < 0) {
    VOID_TEMP_FAILURE_RETRY(close(fd));
    return len;
  // close sometimes returns errors, but only after write()
  VOID_TEMP_FAILURE_RETRY(close(fd));
  return len;
下面是自己写的代码，为了验证read/writeBlock的正确性，分两批写入文件，然后又分两批读入文件，实际工程中只要一次读写就行了 
#include <unistd.h>
#include <iostream>
#include <stdlib.h>
#include <string.h>
#include <stdint.h>
#include <string>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <fstream>
#include <vector>
#include <sstream>
#include <errno.h>
using namespace std;
bool writeBlock(int fd, int64_t &fdOft,int8_t *arr, int64_t len) {
    int64_t num = 0;
    int64_t oft = 0;
    int8_t *arr1 = (int8_t*)arr;
    while (oft < len) {
        num = pwrite(fd, arr1+oft, len-oft, fdOft+oft);
        if (num == -1) {
            return false;
        oft += num;
    fdOft += oft;
    return true;
bool readBlock(int fd, int64_t &fdOft, void *arr, int64_t len) {
    int64_t num = 0;
    int64_t oft = 0;
    int8_t *b = (int8_t*)arr;
    while (oft < len) {
        num = pread(fd, b + oft, len - oft, fdOft + oft);
        if (num <= 0) {
            if (num == 0) {
                if (oft != len) return false;
                else return true;
            if (errno == EINTR) {
                continue;//the call was interrupted by a signal befor any data was read
            return false;
        oft += num;
    fdOft += oft;
    return true;
int main(int argc, char *argv[]) {
    int64_t len = atol(argv[1]);
    cout << "len = "<<len <<endl;
    int fd = open(argv[2], O_RDWR|O_CREAT|O_TRUNC, S_IRWXU);
    if (fd == -1) {
        cout << "not open\n";
        return -1;
    int64_t total = sizeof(double)*len;
    cout << "total = " << total <<endl;
    double *arr = (double*)malloc(total);
    if (arr == NULL) {
        cout << "malloc err\n";
        return -1;
    arr[0] = 0.001;
    for (int64_t i = 1; i < len; ++i) {
        arr[i] = arr[i-1] + 0.001;
    //int64_t num = pwrite(fd,arr,total,0);
    int64_t fdOft = 0;
    int64_t first = total/2;
    int64_t second = total - first;
    bool flag = writeBlock(fd, fdOft,(int8_t*)arr, first);
    if (!flag) {
        cout << "first no\n";
    else {
        cout << "first yes\n";
	flag = writeBlock(fd, fdOft, (int8_t*)arr + first, second);
    //cout << num<<endl;
	if (!flag) {
        cout << "second no\n";
    else {
        cout << "second yes\n";
	close(fd);
    int fdr = open(argv[2], O_RDONLY);
    double *arr1 = (double*)malloc(total);
    if (arr1 == NULL) {
        cout << "malloc error\n";
        return -1;
    int64_t readOft = 0;
    if (readBlock(fdr, readOft, arr1, first)) {
        cout << "first read yes\n";
    if (readBlock(fdr, readOft, (int8_t*)arr1 + first, second)) {
        cout << "second read yes\n";
    flag = true;
    for (int64_t i = 0; i < len; ++i) {
        if (arr1[i] != arr[i]) {
            flag = false;
            cout << i << " no equal\n";
    if (flag) cout << "equal\n";
    close(fdr);
    return 1;
                    https://raw.githubusercontent.com/ceph/ceph/master/src/common/safe_io.c一般而言，用pwrite和pread读写磁盘文件不需要用循环，但是在读写超大的文件时就一定需要循环，保证正确读写// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:
				在Linux当中，查看二进制文件内容可以用hexdump工具，语法如下： 
　　hexdump [选项] [文件]...
    -n length 只格式化输入文件的前length个字节。
    -C 输出规范的十六进制和ASCII码。 
    -c 单字节字符显示。 
    -d 双字节十进制显示。
    -o 双字节八进制显示。 
    -x 双字节十六进制显示。
    -s 从偏
od - dump files in octal and other formats
xxd - make a hexdump or do the reverse.
hexdump, hd — ASCII, decimal, hexadecimal, octal dump
这三种方法都是用来以指定格式查看文件，没有本质区别。
其中只有 od 是 POSIX 标准，原因参考 Single UNIX rationale。
以上面三种方式分别实现 hex + ASCII 显示：
xxd // xxd 命令用于用二进制或十六进制显示文件的内容
-r // 把xxd的十六进制输出内容转换回原文件的二进制内容
-ps // 以 postscript的连续十六进制转储输出，这也叫做纯十六进制转储
2.朴素用法
echo -e -n "\x11\x22" > test       // 2 个字节
-e // 表...
				Linux下对文件操作有几大接口，先简单的介绍一下open,read,write
先用open接口去打开文件，flag表示打开文件的权限不同。
int open(const char *pathname, int flags);
int open(const char *pathname, int flags, mode_t mode);
一般常用的就是O_RDWR,O_RDONLY类似于fread的r模式。三个参数版本的是O_CREAT和O_EXCL,第三个参数是八进制权限，如0777。
int ma
				在Linux编程需要读写文件时，有两种方式：
（1）ANSIC： 使用stdio.h里的函数。fopen, fclose, fwrite, fread
（2）Linux API：Linux提供了另外一套API用于操作文件。open, close,  write,  read
ANSI C优点：被各平台都支持，因此一份代码可以适用多种平台。
ANSIC函数:
(1)文件路径: 使用/
(2...
				Python读写二进制文件可以使用内置的open函数，并指定相应的mode参数。在mode参数中，使用“rb”表示以二进制模式打开文件进行读取操作，使用“wb”表示以二进制模式打开文件进行写入操作。
读取二进制文件的示例代码如下：
``` python
with open('file.bin', 'rb') as f:
    data = f.read()
以上代码是以只读二进制模式打开“file.bin”文件，并将文件内容读取到data变量中。
写入二进制文件的示例代码如下：
``` python
data = b'\x54\x65\x73\x74\x20\x44\x61\x74\x61'
with open('file.bin', 'wb') as f:
    f.write(data)
以上代码是以二进制写入模式打开“file.bin”文件，将二进制数据data写入到文件中。
在进行二进制文件的读写操作时，需要注意以下几点：
1. 以二进制模式打开文件时，文件中的数据不会被自动转换为Unicode编码。
2. 二进制文件中的数据可以是任意二进制数据，包括0x00字节等特殊字符。
3. 在使用read方法读取文件数据时，需要注意文件大小，以免一次读取过多数据导致内存溢出。
4. 在使用write方法写入数据时，需要注意是否要追加数据或覆盖原有数据，以及写入数据的字节数是否正确。