HOG2
DiskBitFile.cpp
Go to the documentation of this file.
1 //
2 // DiskBitFile.cpp
3 // Rubik
4 //
5 // Created by Nathan Sturtevant on 4/5/13.
6 // Copyright (c) 2013 Nathan Sturtevant. All rights reserved.
7 //
8 
9 #include "DiskBitFile.h"
10 
11 DiskBitFile::DiskBitFile(const char *pre)
12 {
13 // subBucketBits = subSize;
14  strncpy(prefix, pre, 62);
15  outputFile = 0;
16  outputBucket = -1;
17  outputSubBucket = -1;
18  cacheOffset = -1; // beginning of cache [in bytes]
19 
20  theCacheSize = 0; // valid bytes in the cache
21  cacheFilePosition = 0; // current offset in file (bytes)
22  cacheChanged = false;
23 
24  bytesRead = 0;
25  bytesWritten = 0;
26 
27 
28  fileOpen = false;
29  chunkFile = 0;
30  fileOffset = 0;
31  currBucket = -1;
32  currSubBucket = -1;
33 }
34 
36 {
37  CloseReadFile();
39 }
40 
42 {
43  if (outputFile != 0)
44  {
45  fclose(outputFile);
46  //printf("Closing file\n");fflush(stdout);
47  outputFile = 0;
48  }
49  outputBucket = -1;
50  outputSubBucket = -1;
51  cacheOffset = -1;
53 }
54 
55 // incoming offset is in entries, not bytes
56 void DiskBitFile::WriteFileDepth(int bucket, int64_t offset, uint8_t value)
57 {
58  int64_t subBucket = (offset*BITS/8)>>subBucketBits;
59  if (bucket == -1)
60  {
61  FlushCache();
63  return;
64  }
65  if ((bucket != outputBucket) || (subBucket != outputSubBucket))
66  {
67  FlushCache();
69  }
70  if (outputFile == 0)
71  {
72  //printf("Opening '%s'\n", getBucketFileName(bucket, subBucket)); fflush(stdout);
73  outputFile = fopen(getBucketFileName(bucket, subBucket), "r+");
74  if (outputFile == 0) { printf("Unable to open '%s'; aborting\n", getBucketFileName(bucket, subBucket)); exit(0); }
75  outputBucket = bucket;
76  outputSubBucket = subBucket;
78  cacheOffset = -1;
79  }
80 
81  offset -= subBucket*(1<<subBucketBits)*8/BITS;
82 #if BITS==8
83  assert(cacheOffset != -1);
84  assert(offset-cacheOffset < theCacheSize);
85  assert(offset-cacheOffset >= 0);
86  cache[offset-cacheOffset] = value;
87  //printf("Writing %d to local cache offset %lld\n", value, offset-cacheOffset);
88  cacheChanged = true;
89 #elif BITS==4
90  assert(cacheOffset != -1);
91  assert((offset>>1)-cacheOffset < theCacheSize);
92  assert((offset>>1)-cacheOffset >= 0);
93  uint8_t curr;
94  curr = cache[(offset>>1)-cacheOffset];
95  curr &= (~(0xF<<(4*(offset%2)))); // wipe out old value
96  curr |= (value<<(4*(offset%2))); // or in new value
97  cache[(offset>>1)-cacheOffset] = curr;
98  cacheChanged = true;
99 #elif BITS==2
100  assert(cacheOffset != -1);
101  assert((offset>>2)-cacheOffset < theCacheSize);
102  assert((offset>>2)-cacheOffset >= 0);
103  uint8_t curr;
104  curr = cache[(offset>>2)-cacheOffset];
105  curr &= (~(0x3<<(2*(offset%4)))); // wipe out old value
106  curr |= (value<<(2*(offset%4))); // or in new value
107  cache[(offset>>2)-cacheOffset] = curr;
108  cacheChanged = true;
109 #else // BITS==1
110  assert(false);
111 #endif
112 
113 }
114 
115 int DiskBitFile::ReadFileDepth(int bucket, int64_t offset)
116 {
117  int64_t subBucket = (offset*BITS/8)>>subBucketBits;
118  if (bucket == -1)
119  {
120  FlushCache();
122  return 0;
123  }
124  if ((bucket != outputBucket) || (subBucket != outputSubBucket))
125  {
126  FlushCache();
128  }
129  if (outputFile == 0)
130  {
131  //printf("Opening '%s'\n", getBucketFileName(bucket, subBucket)); fflush(stdout);
132  outputFile = fopen(getBucketFileName(bucket, subBucket), "r+");
133  if (outputFile == 0) { printf("Unable to open '%s'; aborting\n", getBucketFileName(bucket, subBucket)); exit(0); }
134  outputBucket = bucket;
135  outputSubBucket = subBucket;
136  cacheFilePosition = 0;
137  cacheOffset = -1;
138  }
139 
140  offset -= subBucket*(1<<subBucketBits)*8/BITS;
141 #if BITS==8
142  if ((cacheOffset == -1) || // just read into memory
143  (offset-cacheOffset < 0) || (offset-cacheOffset >= theCacheSize))
144  {
145  FlushCache();
146 
147  cacheOffset = offset&(~(cacheSize-1));
148  fseek(outputFile, cacheOffset-cacheFilePosition, SEEK_CUR);
149  //fseek(outputFile, cacheOffset, SEEK_SET);
151  theCacheSize = fread(cache, sizeof(uint8_t), cacheSize, outputFile);
152  chunksRead++;
154  //printf("cacheOffset: %lld; cacheFilePosition: %lld\n", cacheOffset, cacheFilePosition);
155  }
156  return cache[offset-cacheOffset];
157 
158 #elif BITS==4
159 
160  if ((cacheOffset == -1) || // just read into memory
161  (offset>>1)-cacheOffset < 0 || (offset>>1)-cacheOffset >= theCacheSize)
162  {
163  FlushCache();
164 
165  cacheOffset = (offset>>1)&(~(cacheSize-1));
166  fseek(outputFile, cacheOffset-cacheFilePosition, SEEK_CUR);
167  //fseek(outputFile, cacheOffset, SEEK_SET);
169  theCacheSize = fread(cache, sizeof(uint8_t), cacheSize, outputFile);
170  //chunksRead++;
171  bytesRead += cacheSize;
173  }
174  return (cache[(offset>>1)-cacheOffset]>>(4*(offset%2)))&0xF;
175 
176 #elif BITS==2
177  if ((cacheOffset == -1) || // just read into memory
178  (offset>>2)-cacheOffset < 0 || (offset>>2)-cacheOffset >= theCacheSize)
179  {
180  FlushCache();
181 
182  cacheOffset = (offset>>2)&(~(cacheSize-1));
183  fseek(outputFile, cacheOffset-cacheFilePosition, SEEK_CUR);
184  //fseek(outputFile, cacheOffset, SEEK_SET);
186  theCacheSize = fread(cache, sizeof(uint8_t), cacheSize, outputFile);
187  chunksRead++;
189  }
190  return (cache[(offset>>2)-cacheOffset]>>(2*(offset%4)))&0x3;
191 
192 #else // BITS==1
193  assert(false);
194 #endif
195 }
196 
198 {
199  if (fileOpen)
200  {
201  fclose(chunkFile);
202  chunkFile = 0;
203  //printf("Closing READ file\n");
204  fileOpen = false;
205  }
206 }
207 
208 uint8_t *DiskBitFile::ReadChunk(int bucket, int64_t offset, int numEntries, uint8_t *data)
209 {
210 
211  int64_t subBucket = (offset*BITS/8)>>subBucketBits;
212  offset -= subBucket*(1<<subBucketBits)*8/BITS;
213 
214  if (bucket == -1)
215  {
216  if (fileOpen)
217  {
218  fclose(chunkFile);
219  //printf("Closing READ file\n");
220  fileOpen = false;
221  }
222  return 0;
223  }
224  if (fileOpen && ((bucket != currBucket) || (subBucket != currSubBucket)))
225  {
226  fclose(chunkFile);
227  chunkFile = 0;
228  //printf("Closing READ file\n");
229  fileOpen = false;
230  }
231  if (!fileOpen)
232  {
233  //printf("Opening %s for READ only\n", getBucketFileName(bucket, subBucket));
234  chunkFile = fopen(getBucketFileName(bucket, subBucket), "r");
235  if (chunkFile == 0)
236  {
237  printf("Unable to open file %s\n", getBucketFileName(bucket, subBucket));
238  exit(0);
239  }
240  fileOffset = 0;
241  currBucket = bucket;
242  currSubBucket = subBucket;
243  fileOpen = true;
244  }
245 
246  fseek(chunkFile, offset*BITS/8-fileOffset, SEEK_CUR);
247  fileOffset+=offset*BITS/8-fileOffset;
248  // fseek(f, offset*BITS/8, SEEK_SET);
249 
250  int alignedSize = (numEntries*BITS+7)/8;
251  // uint8_t *data = GetMemoryChunk((openSize*BITS+7)/8); //new uint8_t[alignedSize];
252  assert(0 == offset%2);
253  fread(data, sizeof(uint8_t), alignedSize, chunkFile);
254  fileOffset += alignedSize;
255  bytesRead += alignedSize;
256  //chunksRead++;
257  return data;
258 }
259 
261 {
262  if (cacheChanged)
263  {
264  fseek(outputFile, cacheOffset-cacheFilePosition, SEEK_CUR);
265  //fseek(outputFile, cacheOffset, SEEK_SET);
267  fwrite(cache, sizeof(uint8_t), theCacheSize, outputFile);
269  //chunksWritten++;
271  cacheChanged = false;
272  }
273 }
274 
275 void DiskBitFile::Init(const std::vector<bucketData> &buckets)
276 {
277  int subBucket = 0;
278  for (unsigned int x = 0; x < buckets.size(); x++)
279  {
280  printf("Bucket %d has %" PRId64 " entries\n", x, buckets[x].theSize);
281 
282  FILE *f = fopen(getBucketFileName(x, subBucket), "w");
283  if (f == 0)
284  { printf("Error opening file '%s'\n", getBucketFileName(x, subBucket)); exit(0); }
285 
286  //fseek(f, offset, SEEK_CUR);
287  int64_t totalBytes = (buckets[x].theSize*BITS+7)/8;
288  uint8_t data[2048];
289  for (int y = 0; y < 2048; y++)
290  data[y] = 0xFF;
291  for (int64_t y = 0; y < totalBytes; y+=2048)
292  {
293  int currSubBucket = y>>subBucketBits;
294  if (currSubBucket != subBucket)
295  {
296  fclose(f);
297  subBucket = currSubBucket;
298  FILE *f = fopen(getBucketFileName(x, subBucket), "w");
299  if (f == 0)
300  { printf("Error opening file '%s'\n", getBucketFileName(x, subBucket)); exit(0); }
301  }
302  int64_t amnt = std::min(totalBytes-y, (int64_t)2048ll);
303  fwrite(data, sizeof(uint8_t), amnt, f);
304  }
305  fclose(f);
306  //buckets[x].data.resize(buckets[x].theSize);
307  fflush(stdout);
308  }
309 }
310 
311 const char *DiskBitFile::getBucketFileName(int bucket, int subBucket)
312 {
313  //static char fname[255];
314  assert(bucket >= 0);
315  sprintf(bucketFileName, "%s-%d-b%d.%d", prefix, BITS, bucket, subBucket);
316  return bucketFileName;
317 }
DiskBitFile::getBucketFileName
const char * getBucketFileName(int bucket, int subBucket)
Definition: DiskBitFile.cpp:311
DiskBitFile::cache
uint8_t cache[cacheSize]
Definition: DiskBitFile.h:62
DiskBitFile::currSubBucket
int64_t currSubBucket
Definition: DiskBitFile.h:55
DiskBitFile::outputSubBucket
int outputSubBucket
Definition: DiskBitFile.h:47
min
double min(double a, double b)
Definition: FPUtil.h:35
DiskBitFile::chunkFile
FILE * chunkFile
Definition: DiskBitFile.h:52
DiskBitFile::bytesRead
uint64_t bytesRead
Definition: DiskBitFile.h:64
DiskBitFile::cacheFilePosition
int64_t cacheFilePosition
Definition: DiskBitFile.h:60
DiskBitFile::CloseReadWriteFile
void CloseReadWriteFile()
Definition: DiskBitFile.cpp:41
DiskBitFile::CloseReadFile
void CloseReadFile()
Definition: DiskBitFile.cpp:197
DiskBitFile::WriteFileDepth
void WriteFileDepth(int bucket, int64_t offset, uint8_t value)
Definition: DiskBitFile.cpp:56
DiskBitFile::ReadFileDepth
int ReadFileDepth(int bucket, int64_t offset)
Definition: DiskBitFile.cpp:115
DiskBitFile::~DiskBitFile
~DiskBitFile()
Definition: DiskBitFile.cpp:35
DiskBitFile::fileOffset
int64_t fileOffset
Definition: DiskBitFile.h:53
DiskBitFile::bytesWritten
uint64_t bytesWritten
Definition: DiskBitFile.h:64
DiskBitFile::FlushCache
void FlushCache()
Definition: DiskBitFile.cpp:260
DiskBitFile::cacheChanged
bool cacheChanged
Definition: DiskBitFile.h:61
BITS
#define BITS
Definition: DiskBitFile.h:21
DiskBitFile::DiskBitFile
DiskBitFile(const char *pre)
Definition: DiskBitFile.cpp:11
DiskBitFile::Init
void Init(const std::vector< bucketData > &buckets)
Definition: DiskBitFile.cpp:275
DiskBitFile::fileOpen
bool fileOpen
Definition: DiskBitFile.h:51
DiskBitFile::bucketFileName
char bucketFileName[255]
Definition: DiskBitFile.h:65
DiskBitFile.h
DiskBitFile::outputBucket
int outputBucket
Definition: DiskBitFile.h:46
DiskBitFile::theCacheSize
int64_t theCacheSize
Definition: DiskBitFile.h:59
DiskBitFile::ReadChunk
uint8_t * ReadChunk(int bucket, int64_t offset, int numEntries, uint8_t *data)
Definition: DiskBitFile.cpp:208
DiskBitFile::outputFile
FILE * outputFile
Definition: DiskBitFile.h:45
DiskBitFile::subBucketBits
const static int subBucketBits
Definition: DiskBitFile.h:57
DiskBitFile::cacheSize
const static int64_t cacheSize
Definition: DiskBitFile.h:58
DiskBitFile::prefix
char prefix[64]
Definition: DiskBitFile.h:66
DiskBitFile::cacheOffset
int64_t cacheOffset
Definition: DiskBitFile.h:48
DiskBitFile::currBucket
int64_t currBucket
Definition: DiskBitFile.h:54