Pyrogenesis  13997
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
archive_builder.cpp
Go to the documentation of this file.
1 /* Copyright (c) 2010 Wildfire Games
2  *
3  * Permission is hereby granted, free of charge, to any person obtaining
4  * a copy of this software and associated documentation files (the
5  * "Software"), to deal in the Software without restriction, including
6  * without limitation the rights to use, copy, modify, merge, publish,
7  * distribute, sublicense, and/or sell copies of the Software, and to
8  * permit persons to whom the Software is furnished to do so, subject to
9  * the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included
12  * in all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
17  * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
18  * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
19  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
20  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
21  */
22 
23 /*
24  * automatically bundles files into archives in order of access to
25  * optimize I/O.
26  */
27 
28 #include "precompiled.h"
29 //#include "vfs_optimizer.h"
30 
31 #if 0
32 
33 #include <set>
34 #include <map>
35 #include <algorithm>
36 #include <ctime>
37 
38 
39 // enough for 64K unique files - ought to suffice.
40 typedef u16 FileId;
41 static const FileId NULL_ID = 0;
42 static const size_t MAX_IDS = 0x10000 -1; // -1 due to NULL_ID
43 
44 
45 struct FileNode
46 {
47  const char* atom_fn;
48 
49  FileId prev_id;
50  FileId next_id;
51  u32 visited : 1;
52  u32 output : 1;
53 
54  FileNode(const char* atom_fn_)
55  {
56  atom_fn = atom_fn_;
57 
58  prev_id = next_id = NULL_ID;
59  visited = output = 0;
60  }
61 };
62 
63 typedef std::vector<FileNode> FileNodes;
64 
65 //-----------------------------------------------------------------------------
66 
67 // check if the file is supposed to be added to archive.
68 // this avoids adding e.g. screenshots (wasteful because they're never used)
69 // or config (bad because they are written to and that's not supported for
70 // archived files).
71 static bool is_archivable(const void* mount)
72 {
73  return mount_is_archivable((Mount*)mount);
74 }
75 
76 class IdMgr
77 {
78  FileId cur;
79  typedef std::map<const char*, FileId> Map;
80  Map map;
81  FileNodes* nodes;
82 
83  // dummy return value so this can be called via for_each/mem_fun_ref
84  void associate_node_with_fn(const FileNode& node)
85  {
86  FileId id = id_from_node(&node);
87  const Map::value_type item = std::make_pair(node.atom_fn, id);
88  std::pair<Map::iterator, bool> ret = map.insert(item);
89  if(!ret.second)
90  debug_warn(L"atom_fn already associated with node");
91  }
92 
93 public:
94  FileId id_from_node(const FileNode* node) const
95  {
96  // +1 to skip NULL_ID value
97  FileId id = node - &((*nodes)[0]) +1;
98  ENSURE(id <= nodes->size());
99  return id;
100  }
101 
102  FileNode* node_from_id(FileId id) const
103  {
104  ENSURE(id != NULL_ID);
105  return &(*nodes)[id-1];
106  }
107 
108  FileId id_from_fn(const char* atom_fn) const
109  {
110  Map::const_iterator cit = map.find(atom_fn);
111  if(cit == map.end())
112  {
113  debug_warn(L"id_from_fn: not found");
114  return NULL_ID;
115  }
116  return cit->second;
117  }
118 
119  void init(FileNodes* nodes_)
120  {
121  cur = NULL_ID+1;
122  map.clear();
123  nodes = nodes_;
124 
125  // can't use for_each (mem_fun requires const function and
126  // non-reference-type argument)
127  for(FileNodes::const_iterator cit = nodes->begin(); cit != nodes->end(); ++cit)
128  {
129  const FileNode& node = *cit;
130  associate_node_with_fn(node);
131  }
132  }
133 };
134 static IdMgr id_mgr;
135 
136 //-----------------------------------------------------------------------------
137 
138 // build list of FileNode - exactly one per file in VFS.
139 //
140 // time cost: 13ms for 5500 files; we therefore do not bother with
141 // optimizations like reading from vfs_tree container directly.
142 class FileGatherer
143 {
144  static void EntCb(const char* path, const CFileInfo* ent, uintptr_t cbData)
145  {
146  FileNodes* file_nodes = (FileNodes*)cbData;
147 
148  // we only want files
149  if(ent->IsDirectory)
150  return;
151 
152  if(is_archivable(ent->mount))
153  {
154  const char* atom_fn = path_Pool()->UniqueCopy(path);
155  file_nodes->push_back(FileNode(atom_fn));
156  }
157  }
158 
159 public:
160  FileGatherer(FileNodes& file_nodes)
161  {
162  // jump-start allocation (avoids frequent initial reallocs)
163  file_nodes.reserve(500);
164 
165  // TODO: only add entries from mount points that have
166  // VFS_MOUNT_ARCHIVE flag set (avoids adding screenshots etc.)
167  dir_FilteredForEachEntry("", VFS_DIR_RECURSIVE, 0, EntCb, (uintptr_t)&file_nodes);
168 
169  // MAX_IDS is a rather large limit on number of files, but must not
170  // be exceeded (otherwise FileId overflows).
171  // check for this here and not in EntCb because it's not
172  // expected to happen.
173  if(file_nodes.size() > MAX_IDS)
174  {
175  // note: use this instead of resize because FileNode doesn't have
176  // a default ctor. NB: this is how resize is implemented anyway.
177  file_nodes.erase(file_nodes.begin() + MAX_IDS, file_nodes.end());
179  }
180  }
181 };
182 
183 //-----------------------------------------------------------------------------
184 
185 typedef u32 ConnectionId;
186 cassert(sizeof(FileId)*2 <= sizeof(ConnectionId));
187 static ConnectionId cid_make(FileId first, FileId second)
188 {
189  return u32_from_u16(first, second);
190 }
191 static FileId cid_first(ConnectionId id)
192 {
193  return u32_hi(id);
194 }
195 static FileId cid_second(ConnectionId id)
196 {
197  return u32_lo(id);
198 }
199 
200 struct Connection
201 {
202  ConnectionId id;
203  // repeated edges ("connections") are reflected in
204  // the 'occurrences' count; we optimize the ordering so that
205  // files with frequent connections are nearby.
206  size_t occurrences;
207 
208  Connection(ConnectionId id_)
209  : id(id_), occurrences(1) {}
210 };
211 
212 typedef std::vector<Connection> Connections;
213 
214 // builds a list of Connection-s (basically edges in the FileNode graph)
215 // defined by the trace.
216 //
217 // time cost: 70ms for 1000 trace entries. this is rather heavy;
218 // the main culprit is simulating file_cache to see if an IO would result.
219 class ConnectionBuilder
220 {
221  // functor: on every call except the first, adds a connection between
222  // the previous file (remembered here) and the current file.
223  // if the connection already exists, its occurrence count is incremented.
224  class ConnectionAdder
225  {
226  // speeds up "already exists" overhead from n*n to n*log(n).
227  typedef std::map<ConnectionId, Connection*> Map;
228  typedef std::pair<ConnectionId, Connection*> MapItem;
229  typedef Map::const_iterator MapCIt;
230  Map map;
231 
232  FileId prev_id;
233 
234  public:
235  ConnectionAdder() : prev_id(NULL_ID) {}
236 
237  void operator()(Connections& connections, const char* new_fn)
238  {
239  const bool was_first_call = (prev_id == NULL_ID);
240  FileId id = id_mgr.id_from_fn(new_fn);
241  const ConnectionId c_id = cid_make(prev_id, id);
242  prev_id = id;
243 
244  if(was_first_call)
245  return; // bail after setting prev_id
246 
247  // note: always insert-ing and checking return value would be
248  // more efficient (saves 1 iteration over map), but would not
249  // be safe: VC8's STL disallows &vector[0] if empty
250  // (even though memory has been reserved).
251  // it doesn't matter much anyway (decently fast and offline task).
252  MapCIt it = map.find(c_id);
253  const bool already_exists = (it != map.end());
254  if(already_exists)
255  {
256  Connection* c = it->second; // Map "payload"
257  c->occurrences++;
258  }
259  // seen this connection for the first time: add to map and list.
260  else
261  {
262  connections.push_back(Connection(c_id));
263  const MapItem item = std::make_pair(c_id, &connections.back());
264  map.insert(item);
265  }
266 
267  stats_ab_connection(already_exists);
268  }
269  };
270 
271  void add_connections_from_runs(const Trace& t, Connections& connections)
272  {
273  // (note: lifetime = entire connection build process; if re-created
274  // in between, entries in Connections will no longer be unique,
275  // which may break TourBuilder)
276  ConnectionAdder add_connection;
277 
278  // extract accesses from each run (starting with most recent
279  // first. this isn't critical, but may help a bit since
280  // files that are equally strongly 'connected' are ordered
281  // according to position in file_nodes. that means files from
282  // more recent traces tend to go first, which is good.)
283  for(size_t r = 0; r < t.num_runs; r++)
284  {
285  const TraceRun& run = t.runs[r];
286  for(size_t i = 0; i < run.num_ents; i++)
287  {
288  const TraceEntry* te = &run.ents[i];
289  // improvement: postprocess the trace and remove all IOs that would be
290  // satisfied by our cache. often repeated IOs would otherwise potentially
291  // be arranged badly.
292  if(trace_entry_causes_io(te))
293  {
294  // only add connection if this file exists and is in
295  // file_nodes list. otherwise, ConnectionAdder's
296  // id_from_fn call will fail.
297  // note: this happens when trace contains by now
298  // deleted or unarchivable files.
299  TFile* tf;
300  if(tree_lookup(te->atom_fn, &tf) == INFO::OK)
301  if(is_archivable(tf))
302  add_connection(connections, te->atom_fn);
303  }
304  }
305  }
306  }
307 
308 public:
309  Status run(const char* trace_filename, Connections& connections)
310  {
311  Trace t;
312  RETURN_STATUS_IF_ERR(trace_read_from_file(trace_filename, &t));
313 
314  // reserve memory for worst-case amount of connections (happens if
315  // all accesses are unique). this is necessary because we store
316  // pointers to Connection in the map, which would be invalidated if
317  // connections[] ever expands.
318  // may waste up to ~3x the memory (about 1mb) for a short time,
319  // which is ok.
320  connections.reserve(t.total_ents-1);
321 
322  add_connections_from_runs(t, connections);
323 
324  return INFO::OK;
325  }
326 };
327 
328 //-----------------------------------------------------------------------------
329 
330 // given graph and known edges, stitch together FileNodes so that
331 // Hamilton tour (TSP solution) length of the graph is minimized.
332 // heuristic is greedy adding edges sorted by decreasing 'occurrences'.
333 //
334 // time cost: 7ms for 1000 connections; quite fast despite DFS.
335 //
336 // could be improved (if there are lots of files) by storing in each node
337 // a pointer to end of list; if adding a new edge, check if end.endoflist
338 // is the start of edge.
339 class TourBuilder
340 {
341  // sort by decreasing occurrence
342  struct Occurrence_greater: public std::binary_function<const Connection&, const Connection&, bool>
343  {
344  bool operator()(const Connection& c1, const Connection& c2) const
345  {
346  return (c1.occurrences > c2.occurrences);
347  }
348  };
349 
350  bool has_cycle;
351  void detect_cycleR(FileId id)
352  {
353  FileNode* pnode = id_mgr.node_from_id(id);
354  pnode->visited = 1;
355  FileId next_id = pnode->next_id;
356  if(next_id != NULL_ID)
357  {
358  FileNode* pnext = id_mgr.node_from_id(next_id);
359  if(pnext->visited)
360  has_cycle = true;
361  else
362  detect_cycleR(next_id);
363  }
364  }
365  bool is_cycle_at(FileNodes& file_nodes, FileId node)
366  {
367  has_cycle = false;
368  for(FileNodes::iterator it = file_nodes.begin(); it != file_nodes.end(); ++it)
369  it->visited = 0;
370  detect_cycleR(node);
371  return has_cycle;
372  }
373 
374  void try_add_edge(FileNodes& file_nodes, const Connection& c)
375  {
376  FileId first_id = cid_first(c.id);
377  FileId second_id = cid_second(c.id);
378 
379  FileNode* first = id_mgr.node_from_id(first_id);
380  FileNode* second = id_mgr.node_from_id(second_id);
381  // one of them has already been hooked up - bail
382  if(first->next_id != NULL_ID || second->prev_id != NULL_ID)
383  return;
384 
385  first->next_id = second_id;
386  second->prev_id = first_id;
387 
388  const bool introduced_cycle = is_cycle_at(file_nodes, second_id);
389 #ifndef NDEBUG
390  ENSURE(introduced_cycle == is_cycle_at(file_nodes, first_id));
391 #endif
392  if(introduced_cycle)
393  {
394  // undo
395  first->next_id = second->prev_id = NULL_ID;
396  return;
397  }
398  }
399 
400 
401  void output_chain(FileNode& node, std::vector<const char*>& fn_vector)
402  {
403  // early out: if this access was already visited, so must the entire
404  // chain of which it is a part. bail to save lots of time.
405  if(node.output)
406  return;
407 
408  // follow prev links starting with c until no more are left;
409  // start ends up the beginning of the chain including <c>.
410  FileNode* start = &node;
411  while(start->prev_id != NULL_ID)
412  start = id_mgr.node_from_id(start->prev_id);
413 
414  // iterate over the chain - add to Filenames list and mark as visited
415  FileNode* cur = start;
416  for(;;)
417  {
418  if(!cur->output)
419  {
420  fn_vector.push_back(cur->atom_fn);
421  cur->output = 1;
422  }
423  if(cur->next_id == NULL_ID)
424  break;
425  cur = id_mgr.node_from_id(cur->next_id);
426  }
427  }
428 
429 public:
430  TourBuilder(FileNodes& file_nodes, Connections& connections, std::vector<const char*>& fn_vector)
431  {
432  std::stable_sort(connections.begin(), connections.end(), Occurrence_greater());
433 
434  for(Connections::iterator it = connections.begin(); it != connections.end(); ++it)
435  try_add_edge(file_nodes, *it);
436 
437  for(FileNodes::iterator it = file_nodes.begin(); it != file_nodes.end(); ++it)
438  output_chain(*it, fn_vector);
439  }
440 };
441 
442 
443 //-----------------------------------------------------------------------------
444 // autobuild logic: decides when to (re)build an archive.
445 //-----------------------------------------------------------------------------
446 
447 // for each loose or archived file encountered during mounting: add to a
448 // std::set; if there are more than *_THRESHOLD non-archived files, rebuild.
449 // this ends up costing 50ms for 5000 files, so disable it in final release.
450 #if CONFIG_FINAL
451 # define AB_COUNT_LOOSE_FILES 0
452 #else
453 # define AB_COUNT_LOOSE_FILES 1
454 #endif
455 // rebuild if the archive is much older than most recent VFS timestamp.
456 // this makes sense during development: the archive will periodically be
457 // rebuilt with the newest trace. however, it would be annoying in the
458 // final release, where users will frequently mod things, which should not
459 // end up rebuilding the main archive.
460 #if CONFIG_FINAL
461 # define AB_COMPARE_MTIME 0
462 #else
463 # define AB_COMPARE_MTIME 1
464 #endif
465 
466 #if AB_COUNT_LOOSE_FILES
467 static const ssize_t REBUILD_MAIN_ARCHIVE_THRESHOLD = 50;
468 static const ssize_t BUILD_MINI_ARCHIVE_THRESHOLD = 20;
469 
470 typedef std::set<const char*> FnSet;
471 static FnSet loose_files;
472 static FnSet archived_files;
473 #endif
474 
475 void vfs_opt_notify_loose_file(const char* atom_fn)
476 {
477 #if AB_COUNT_LOOSE_FILES
478  // note: files are added before archives, so we can't stop adding to
479  // set after one of the above thresholds are reached.
480  loose_files.insert(atom_fn);
481 #endif
482 }
483 
484 void vfs_opt_notify_archived_file(const char* atom_fn)
485 {
486 #if AB_COUNT_LOOSE_FILES
487  archived_files.insert(atom_fn);
488 #endif
489 }
490 
491 
492 static bool should_rebuild_main_archive(const char* trace_filename,
493  FilesystemEntries& existing_archives)
494 {
495  // if there's no trace file, no point in building a main archive.
496  // (we wouldn't know how to order the files)
497  if(!file_exists(trace_filename))
498  return false;
499 
500 #if AB_COUNT_LOOSE_FILES
501  // too many (eligible for archiving!) loose files not in archive: rebuild.
502  const ssize_t loose_files_only = (ssize_t)loose_files.size() - (ssize_t)archived_files.size();
503  if(loose_files_only >= REBUILD_MAIN_ARCHIVE_THRESHOLD)
504  return true;
505 #endif
506 
507  // scan dir and see what archives are already present..
508  {
509  time_t most_recent_archive_mtime = 0;
510  // note: a loop is more convenient than std::for_each, which would
511  // require referencing the returned functor (since param is a copy).
512  for(FilesystemEntries::const_iterator it = existing_archives.begin(); it != existing_archives.end(); ++it)
513  most_recent_archive_mtime = std::max(it->mtime, most_recent_archive_mtime);
514  // .. no archive yet OR 'lots' of them: rebuild so that they'll be
515  // merged into one archive and the rest deleted.
516  if(existing_archives.empty() || existing_archives.size() >= 4)
517  return true;
518 #if AB_COMPARE_MTIME
519  // .. archive is much older than most recent data: rebuild.
520  const double max_diff = 14*86400; // 14 days
521  if(difftime(tree_most_recent_mtime(), most_recent_archive_mtime) > max_diff)
522  return true;
523 #endif
524  }
525 
526  return false;
527 }
528 
529 //-----------------------------------------------------------------------------
530 
531 
532 static char archive_fn[PATH_MAX];
533 static ArchiveBuildState ab;
534 static std::vector<const char*> fn_vector;
535 static FilesystemEntries existing_archives; // and possibly other entries
536 
537 class IsArchive
538 {
539  const char* archive_ext;
540 
541 public:
542  IsArchive(const char* archive_fn)
543  {
544  archive_ext = path_extension(archive_fn);
545  }
546 
547  bool operator()(CFileInfo& ent) const
548  {
549  // remove if not file
550  if(ent.IsDirectory)
551  return true;
552 
553  // remove if not same extension
554  const char* ext = path_extension(ent.name);
555  if(strcasecmp(archive_ext, ext) != 0)
556  return true;
557 
558  // keep
559  return false;
560  }
561 };
562 
563 static Status vfs_opt_init(const char* trace_filename, const char* archive_fn_fmt, bool force_build)
564 {
565  Filesystem_Posix fsPosix;
566 
567  // get next not-yet-existing archive filename.
568  static NextNumberedFilenameState archive_nfi;
569  dir_NextNumberedFilename(&fsPosix, archive_fn_fmt, &archive_nfi, archive_fn);
570 
571  // get list of existing archives in root dir.
572  // note: this is needed by should_rebuild_main_archive and later in
573  // vfs_opt_continue; must be done here instead of inside the former
574  // because that is not called when force_build == true.
575  {
576  char dir[PATH_MAX];
577  path_dir_only(archive_fn_fmt, dir);
578  RETURN_STATUS_IF_ERR(dir_GatherSortedEntries(&fsPosix, dir, existing_archives));
579  DirEntIt new_end = std::remove_if(existing_archives.begin(), existing_archives.end(), IsArchive(archive_fn));
580  existing_archives.erase(new_end, existing_archives.end());
581  }
582 
583  // bail if we shouldn't rebuild the archive.
584  if(!force_build && !should_rebuild_main_archive(trace_filename, existing_archives))
585  return INFO::SKIPPED;
586 
587  // build 'graph' (nodes only) of all files that must be added.
588  FileNodes file_nodes;
589  FileGatherer gatherer(file_nodes);
590  if(file_nodes.empty())
591  WARN_RETURN(ERR::DIR_END);
592 
593  // scan nodes and add them to filename->FileId mapping.
594  id_mgr.init(&file_nodes);
595 
596  // build list of edges between FileNodes (referenced via FileId) that
597  // are defined by trace entries.
598  Connections connections;
599  ConnectionBuilder cbuilder;
600  RETURN_STATUS_IF_ERR(cbuilder.run(trace_filename, connections));
601 
602  // create output filename list by first adding the above edges (most
603  // frequent first) and then adding the rest sequentially.
604  TourBuilder builder(file_nodes, connections, fn_vector);
605  fn_vector.push_back(0); // 0-terminate for use as Filenames
606  Filenames V_fns = &fn_vector[0];
607 
608  RETURN_STATUS_IF_ERR(archive_build_init(archive_fn, V_fns, &ab));
609  return INFO::OK;
610 }
611 
612 
613 static int vfs_opt_continue()
614 {
615  int ret = archive_build_continue(&ab);
616  if(ret == INFO::OK)
617  {
618  // do NOT delete source files! some apps might want to
619  // keep them (e.g. for source control), or name them differently.
620 
621  mount_release_all_archives();
622 
623  // delete old archives
624  PathPackage pp; // need path to each existing_archive, not only name
625  {
626  char archive_dir[PATH_MAX];
627  path_dir_only(archive_fn, archive_dir);
628  (void)path_package_set_dir(&pp, archive_dir);
629  }
630  for(DirEntCIt it = existing_archives.begin(); it != existing_archives.end(); ++it)
631  {
632  (void)path_package_append_file(&pp, it->name);
633  (void)file_delete(pp.path);
634  }
635 
636  // rebuild is required due to mount_release_all_archives.
637  // the dir watcher may already have rebuilt the VFS once,
638  // which is a waste of time here.
639  (void)mount_rebuild();
640 
641  // it is believed that wiping out the file cache is not necessary.
642  // building archive doesn't change the game data files, and any
643  // cached contents of the previous archives are irrelevant.
644  }
645  return ret;
646 }
647 
648 
649 
650 
651 
652 static bool should_build_mini_archive(const char* UNUSED(mini_archive_fn_fmt))
653 {
654 #if AB_COUNT_LOOSE_FILES
655  // too many (eligible for archiving!) loose files not in archive
656  const ssize_t loose_files_only = (ssize_t)loose_files.size() - (ssize_t)archived_files.size();
657  if(loose_files_only >= BUILD_MINI_ARCHIVE_THRESHOLD)
658  return true;
659 #endif
660  return false;
661 }
662 
663 static Status build_mini_archive(const char* mini_archive_fn_fmt)
664 {
665  if(!should_build_mini_archive(mini_archive_fn_fmt))
666  return INFO::SKIPPED;
667 
668 #if AB_COUNT_LOOSE_FILES
669  Filenames V_fns = new const char*[loose_files.size()+1];
670  std::copy(loose_files.begin(), loose_files.end(), &V_fns[0]);
671  V_fns[loose_files.size()] = 0; // terminator
672 
673  // get new unused mini archive name at P_dst_path
674  char mini_archive_fn[PATH_MAX];
675  static NextNumberedFilenameState nfi;
676  Filesystem_Posix fsPosix;
677  dir_NextNumberedFilename(&fsPosix, mini_archive_fn_fmt, &nfi, mini_archive_fn);
678 
679  RETURN_STATUS_IF_ERR(archive_build(mini_archive_fn, V_fns));
680  delete[] V_fns;
681  return INFO::OK;
682 #else
683  return ERR::NOT_SUPPORTED;
684 #endif
685 }
686 
687 
688 
689 
690 
691 
692 //-----------------------------------------------------------------------------
693 
694 // array of pointers to VFS filenames (including path), terminated by a
695 // NULL entry.
696 typedef const char** Filenames;
697 
698 struct IArchiveWriter;
699 struct ICodec;
700 
701 // rationale: this is fairly lightweight and simple, so we don't bother
702 // making it opaque.
703 struct ArchiveBuildState
704 {
705  IArchiveWriter* archiveBuilder;
706  ICodec* codec;
707  Filenames V_fns;
708  size_t num_files; // number of filenames in V_fns (excluding final 0)
709  size_t i;
710 };
711 
712 
713 // create an archive (overwriting previous file) and fill it with the given
714 // files. compression method is chosen based on extension.
715 Status archive_build_init(const char* P_archive_filename, Filenames V_fns, ArchiveBuildState* ab)
716 {
717  ab->archiveBuilder = CreateArchiveBuilder_Zip(P_archive_filename);
718 
719  ab->codec = ab->archiveBuilder->CreateCompressor();
720 
721  ab->V_fns = V_fns;
722 
723  // count number of files (needed to estimate progress)
724  for(ab->num_files = 0; ab->V_fns[ab->num_files]; ab->num_files++) {}
725 
726  ab->i = 0;
727  return INFO::OK;
728 }
729 
730 
731 int archive_build_continue(ArchiveBuildState* ab)
732 {
733  const double end_time = timer_Time() + 200e-3;
734 
735  for(;;)
736  {
737  const char* V_fn = ab->V_fns[ab->i];
738  if(!V_fn)
739  break;
740 
741  IArchiveFile ent; const u8* file_contents; IoBuf buf;
742  if(read_and_compress_file(V_fn, ab->codec, ent, file_contents, buf) == INFO::OK)
743  {
744  (void)ab->archiveBuilder->AddFile(&ent, file_contents);
745  (void)file_cache_free(buf);
746  }
747 
748  ab->i++;
749 
750  if(timer_Time() > end_time)
751  {
752  int progress_percent = (ab->i*100 / ab->num_files);
753  // 0 means "finished", so don't return that!
754  if(progress_percent == 0)
755  progress_percent = 1;
756  ENSURE(0 < progress_percent && progress_percent <= 100);
757  return progress_percent;
758  }
759  }
760 
761  // note: this is currently known to fail if there are no files in the list
762  // - zlib.h says: Z_DATA_ERROR is returned if freed prematurely.
763  // safe to ignore.
764  SAFE_DELETE(ab->codec);
765  SAFE_DELETE(ab->archiveBuilder);
766 
767  return INFO::OK;
768 }
769 
770 
771 void archive_build_cancel(ArchiveBuildState* ab)
772 {
773  // note: the GUI may call us even though no build was ever in progress.
774  // be sure to make all steps no-op if <ab> is zeroed (initial state) or
775  // no build is in progress.
776 
777  SAFE_DELETE(ab->codec);
778  SAFE_DELETE(ab->archiveBuilder);
779 }
780 
781 
782 Status archive_build(const char* P_archive_filename, Filenames V_fns)
783 {
784  ArchiveBuildState ab;
785  RETURN_STATUS_IF_ERR(archive_build_init(P_archive_filename, V_fns, &ab));
786  for(;;)
787  {
788  int ret = archive_build_continue(&ab);
790  if(ret == INFO::OK)
791  return INFO::OK;
792  }
793 }
794 
795 
796 
797 
798 
799 
800 
801 
802 
803 
804 
805 
806 
807 
808 
809 
810 
811 static enum
812 {
813  DECIDE_IF_BUILD,
814  IN_PROGRESS,
815  NOP
816 }
817 state = DECIDE_IF_BUILD;
818 
820 {
821  archive_build_cancel(&ab);
822  state = NOP;
823 }
824 
825 int vfs_opt_auto_build(const char* trace_filename,
826  const char* archive_fn_fmt, const char* mini_archive_fn_fmt, bool force_build)
827 {
828  if(state == NOP)
829  return INFO::ALL_COMPLETE;
830 
831  if(state == DECIDE_IF_BUILD)
832  {
833  if(vfs_opt_init(trace_filename, archive_fn_fmt, force_build) != INFO::SKIPPED)
834  state = IN_PROGRESS;
835  else
836  {
837  // create mini-archive (if needed)
838  RETURN_STATUS_IF_ERR(build_mini_archive(mini_archive_fn_fmt));
839 
840  state = NOP;
841  return INFO::OK; // "finished"
842  }
843  }
844 
845  if(state == IN_PROGRESS)
846  {
847  int ret = vfs_opt_continue();
848  // just finished
849  if(ret == INFO::OK)
850  state = NOP;
851  return ret;
852  }
853 
854  UNREACHABLE;
855 }
856 
857 Status vfs_opt_rebuild_main_archive(const char* trace_filename, const char* archive_fn_fmt)
858 {
859  for(;;)
860  {
861  int ret = vfs_opt_auto_build(trace_filename, archive_fn_fmt, 0, true);
863  if(ret == INFO::OK)
864  return INFO::OK;
865  }
866 }
867 
868 
869 
870 
871 
872 
873 
874 
875 
876 
877 
878 
879 
880 
881 
882 class TraceRun
883 {
884 public:
885  TraceRun(const TraceEntry* entries, size_t numEntries)
886  : m_entries(entries), m_numEntries(numEntries)
887  {
888  }
889 
890  const TraceEntry* Entries() const
891  {
892  return m_entries;
893  }
894 
895  size_t NumEntries() const
896  {
897  return m_numEntries;
898  }
899 
900 private:
901  const TraceEntry* m_entries;
902  size_t m_numEntries;
903 };
904 
905 
906 struct Trace
907 {
908  // most recent first! (see rationale in source)
909  std::vector<TraceRun> runs;
910 
911  size_t total_ents;
912 };
913 
914 extern void trace_get(Trace* t);
915 extern Status trace_write_to_file(const char* trace_filename);
916 extern Status trace_read_from_file(const char* trace_filename, Trace* t);
917 
918 
919 // simulate carrying out the entry's TraceOp to determine
920 // whether this IO would be satisfied by the file_buf cache.
921 extern bool trace_entry_causes_io(const TraceEntry* ent);
922 
923 
924 // carry out all operations specified in the trace.
925 extern Status trace_run(const char* trace_filename);
926 
927 
928 
929 //-----------------------------------------------------------------------------
930 
931 // put all entries in one trace file: easier to handle; obviates FS enum code
932 // rationale: don't go through trace in order; instead, process most recent
933 // run first, to give more weight to it (TSP code should go with first entry
934 // when #occurrences are equal)
935 
936 #error "reimplement me"
937 // update: file enumeration really isn't hard, and splitting separate
938 // traces into files would greatly simplify deleting old traces after
939 // awhile and avoid indexing problems due to counting delimiter lines
940 // (#167). this section should be rewritten.
941 
942 
943 static const TraceEntry delimiter_entry =
944 {
945  0.0f, // timestamp
946  "------------------------------------------------------------",
947  0, // size
948  TO_FREE,// TraceOp (never seen by user; value doesn't matter)
949 };
950 
951 // storage for Trace.runs.
952 static std::vector<TraceRun> runs;
953 
954 // note: the last entry may be one past number of actual entries.
955 // WARNING: due to misfeature in DelimiterAdder, indices are added twice.
956 // this is fixed in trace_get; just don't rely on run_start_indices.size()!
957 static std::vector<size_t> run_start_indices;
958 
959 class DelimiterAdder
960 {
961 public:
962  enum Consequence
963  {
964  SKIP_ADD,
965  CONTINUE
966  };
967  Consequence operator()(size_t i, double timestamp, const char* P_path)
968  {
969  // this entry is a delimiter
970  if(!strcmp(P_path, delimiter_entry.vfsPathname))
971  {
972  run_start_indices.push_back(i+1); // skip this entry
973  // note: its timestamp is invalid, so don't set cur_timestamp!
974  return SKIP_ADD;
975  }
976 
977  const double last_timestamp = cur_timestamp;
978  cur_timestamp = timestamp;
979 
980  // first item is always start of a run
981  if((i == 0) ||
982  // timestamp started over from 0 (e.g. 29, 30, 1) -> start of new run.
983  (timestamp < last_timestamp))
984  run_start_indices.push_back(i);
985 
986  return CONTINUE;
987  }
988 private:
989  double cur_timestamp;
990 };
991 
992 
993 
994 //-----------------------------------------------------------------------------
995 
996 
997 void trace_get(Trace* t)
998 {
999  const TraceEntry* ents; size_t num_ents;
1000  trace_get_raw_ents(ents, num_ents);
1001 
1002  // nobody had split ents up into runs; just create one big 'run'.
1003  if(run_start_indices.empty())
1004  run_start_indices.push_back(0);
1005 
1006  t->runs = runs;
1007  t->num_runs = 0; // counted up
1008  t->total_ents = num_ents;
1009 
1010  size_t last_start_idx = num_ents;
1011 
1012  std::vector<size_t>::reverse_iterator it;
1013  for(it = run_start_indices.rbegin(); it != run_start_indices.rend(); ++it)
1014  {
1015  const size_t start_idx = *it;
1016  // run_start_indices.back() may be = num_ents (could happen if
1017  // a zero-length run gets written out); skip that to avoid
1018  // zero-length run here.
1019  // also fixes DelimiterAdder misbehavior of adding 2 indices per run.
1020  if(last_start_idx == start_idx)
1021  continue;
1022 
1023  ENSURE(start_idx < t->total_ents);
1024 
1025  TraceRun& run = runs[t->num_runs++];
1026  run.num_ents = last_start_idx - start_idx;
1027  run.ents = &ents[start_idx];
1028  last_start_idx = start_idx;
1029 
1030  if(t->num_runs == MAX_RUNS)
1031  break;
1032  }
1033 
1034  ENSURE(t->num_runs != 0);
1035 }
1036 
1037 
1038 //-----------------------------------------------------------------------------
1039 
1040 // simulate carrying out the entry's TraceOp to determine
1041 // whether this IO would be satisfied by the file cache.
1042 bool trace_entry_causes_io(FileCache& simulatedCache, const TraceEntry* ent)
1043 {
1044  FileCacheData buf;
1045  const size_t size = ent->size;
1046  const char* vfsPathname = ent->vfsPathname;
1047  switch(ent->op)
1048  {
1049  case TO_STORE:
1050  break;
1051 
1052  case TO_LOAD:
1053  // cache miss
1054  if(!simulatedCache.Retrieve(vfsPathname, size))
1055  {
1056  // TODO: simulatedCache never evicts anything..
1057  simulatedCache.Reserve();
1058  simulatedCache.MarkComplete();
1059  return true;
1060  }
1061  break;
1062 
1063  case TO_FREE:
1064  simulatedCache.Release(vfsPathname);
1065  break;
1066 
1067  default:
1068  debug_warn(L"unknown TraceOp");
1069  }
1070 
1071  return false;
1072 }
1073 
1074 // one run per file
1075 
1076 
1077 
1078 // enabled by default. by the time we can decide whether a trace needs to
1079 // be generated (see should_rebuild_main_archive), file accesses will
1080 // already have occurred; hence default enabled and disable if not needed.
1081 
1082 
1083 
1084 
1085 
1086 {
1087  // carry out this entry's operation
1088  switch(ent->op)
1089  {
1090  // do not 'run' writes - we'd destroy the existing data.
1091  case TO_STORE:
1092  break;
1093  case TO_LOAD:
1094  {
1095  IoBuf buf; size_t size;
1096  (void)vfs_load(ent->vfsPathname, buf, size, ent->flags);
1097  break;
1098  }
1099  case TO_FREE:
1100  fileCache.Release(ent->vfsPathname);
1101  break;
1102  default:
1103  debug_warn(L"unknown TraceOp");
1104  }
1105 }
1106 
1107 
1108 
1109 
1110 
1111 Status trace_run(const char* osPathname)
1112 {
1113  Trace trace;
1114  RETURN_STATUS_IF_ERR(trace.Load(osPathname));
1115  for(size_t i = 0; i < trace.NumEntries(); i++)
1116  trace.Entries()[i]->Run();
1117  return INFO::OK;
1118 }
1119 
1120 #endif
Definition: codec.h:34
#define u8
Definition: types.h:39
#define UNUSED(param)
mark a function parameter as unused and avoid the corresponding compiler warning. ...
#define WARN_IF_ERR(expression)
Definition: status.h:265
const Status OK
Definition: status.h:386
tuple output
Definition: tests.py:116
u16 u32_lo(u32 x)
return upper 16-bits
Definition: lib.cpp:59
virtual const TraceEntry * Entries() const
Definition: trace.cpp:204
virtual Status Load(const OsPath &pathname)
load entries from file.
Definition: trace.cpp:168
static Node nodes[os_cpu_MaxProcessors]
Definition: wnuma.cpp:56
bool Retrieve(const VfsPath &pathname, shared_ptr< u8 > &data, size_t &size)
Attempt to retrieve a file&#39;s contents from the file cache.
Definition: file_cache.cpp:250
const Status ALL_COMPLETE
Definition: status.h:400
void vfs_opt_auto_build_cancel()
OsPath name
Definition: file_system.h:69
const Status NOT_SUPPORTED
Definition: status.h:429
u32 u32_from_u16(u16 hi, u16 lo)
assemble u64 from u32
Definition: lib.cpp:73
virtual size_t NumEntries() const
Definition: trace.cpp:209
u16 u32_hi(u32 x)
return lower 32-bits
Definition: lib.cpp:54
cache of file contents with support for zero-copy IO.
Definition: file_cache.h:48
#define ENSURE(expr)
ensure the expression &lt;expr&gt; evaluates to non-zero.
Definition: debug.h:282
const Status LIMIT
Definition: status.h:428
int vfs_opt_auto_build(const char *trace_filename, const char *archive_fn_fmt, const char *mini_archive_fn_fmt, bool force_build=false)
void vfs_opt_notify_archived_file(const char *atom_fn)
#define UNREACHABLE
&quot;unreachable code&quot; helpers
Definition: trace.cpp:139
#define SAFE_DELETE(p)
delete memory ensuing from new and set the pointer to zero (thus making double-frees safe / a no-op) ...
static const u8 * visited[maxVisited]
Definition: wdbg_sym.cpp:1143
i64 Status
Error handling system.
Definition: status.h:171
double timer_Time()
Definition: timer.cpp:98
void vfs_opt_notify_loose_file(const char *atom_fn)
#define PATH_MAX
Definition: wposix_types.h:101
intptr_t ssize_t
Definition: wposix_types.h:82
Status vfs_opt_rebuild_main_archive(const char *trace_filename, const char *archive_fn_fmt)
#define u16
Definition: types.h:40
shared_ptr< u8 > Reserve(size_t size)
Reserve a chunk of the cache&#39;s memory region.
Definition: file_cache.cpp:235
const Status SKIPPED
Definition: status.h:392
#define u32
Definition: types.h:41
#define WARN_RETURN(status)
Definition: status.h:255
#define debug_warn(expr)
display the error dialog with the given text.
Definition: debug.h:324
#define cassert(expr)
Compile-time assertion.
static enum @41 state
#define stats_ab_connection(already_exists)
Definition: file_stats.h:113
#define RETURN_STATUS_IF_ERR(expression)
Definition: status.h:276