Audacity 3.2.0
ProjectSerializer.cpp
Go to the documentation of this file.
1/**********************************************************************
2
3 Audacity: A Digital Audio Editor
4 Audacity(R) is copyright (c) 1999-2010 Audacity Team.
5 License: GPL v2 or later. See License.txt.
6
7 ProjectSerializer.cpp
8
9*******************************************************************//********************************************************************/
15
16#include "ProjectSerializer.h"
17
18#include <algorithm>
19#include <cstdint>
20#include <mutex>
21#include <wx/ustring.h>
22#include <codecvt>
23#include <locale>
24#include <deque>
25
26#include <wx/log.h>
27
29#include "MemoryX.h"
30
34
35// Simple "binary xml" format used exclusively for project documents.
36//
37// It is not intended that the user view or modify the file.
38//
39// It IS intended that very little work be done during auto save, so numbers
40// and strings are written in their native format. They will be converted
41// during recovery.
42//
43// The file has 3 main sections:
44//
45// character size 1 (UTF-8), 2 (UTF-16) or 4 (UTF-32)
46// name dictionary dictionary of all names used in the document
47// data fields the "encoded" XML document
48//
49// If a subtree is added, it will be preceded with FT_Push to tell the decoder
50// to preserve the active dictionary. The decoder will then restore the
51// dictionary when an FT_Pop is encountered. Nesting is unlimited.
52//
53// To save space, each name (attribute or element) encountered is stored in
54// the name dictionary and replaced with the assigned 2-byte identifier.
55//
56// All strings are in native unicode format, 2-byte or 4-byte.
57//
58// All name "lengths" are 2-byte signed, so are limited to 32767 bytes long.
59// All string/data "lengths" are 4-byte signed.
60
62{
63 FT_CharSize, // type, ID, value
64 FT_StartTag, // type, ID
65 FT_EndTag, // type, ID
66 FT_String, // type, ID, string length, string
67 FT_Int, // type, ID, value
68 FT_Bool, // type, ID, value
69 FT_Long, // type, ID, value
70 FT_LongLong, // type, ID, value
71 FT_SizeT, // type, ID, value
72 FT_Float, // type, ID, value, digits
73 FT_Double, // type, ID, value, digits
74 FT_Data, // type, string length, string
75 FT_Raw, // type, string length, string
76 FT_Push, // type only
77 FT_Pop, // type only
78 FT_Name // type, ID, name length, name
79};
80
81// Static so that the dict can be reused each time.
82//
83// If entries get added later, like when an envelope node (for example)
84// is written and then the envelope is later removed, the dict will still
85// contain the envelope name, but that's not a problem.
86
89
91{
92 return
93XO("This recovery file was saved by Audacity 2.3.0 or before.\n"
94 "You need to run that version of Audacity to recover the project." );
95}
96
97namespace
98{
99// Aliases for the FIXED-WIDTH integer types that are used in the file
100// format.
101
102// Chosen so that among the four build types (32 bit Windows, 64
103// bit Windows, 64 bit Mac clang, Linux g++) presently done (3.0.0
104// development), we use the narrowest width of the type on any of them, so
105// that anything saved on one build will be read back identically on all
106// builds. (Although this means that very large values on some systems might
107// be saved and then read back with loss.)
108
109// In fact the only types for which this matters are long (only 32 bits on
110// 32 and 64 bit Windows) and size_t (only 32 bits on 32 bit Windows).
111
112using UShort = std::uint16_t;
113using Int = std::int32_t;
114
115using Long = std::int32_t; // To save long values
116using ULong = std::uint32_t; // To save size_t values
117
118using LongLong = std::int64_t;
119
120// In C++20 this could be
121// constexpr bool IsLittleEndian = (std::endian::native == std::endian::little);
122// static_assert( IsLittleEndian || (std::endian::native == std::endian::big),
123// "Oh no! I'm mixed-endian!" );
124
125// Functions that can read and write native integer types to a canonicalized
126// little-endian file format. (We don't bother to do the same for floating
127// point numbers.)
128
129// Write native little-endian to little-endian file format
130template <typename Number>
131void WriteLittleEndian(MemoryStream& out, Number value)
132{
133 out.AppendData(&value, sizeof(value));
134}
135
136// Write native big-endian to little-endian file format
137template <typename Number> void WriteBigEndian(MemoryStream& out, Number value)
138{
139 auto begin = static_cast<unsigned char*>(static_cast<void*>(&value));
140 std::reverse(begin, begin + sizeof(value));
141 out.AppendData(&value, sizeof(value));
142}
143
144// Read little-endian file format to native little-endian
145template <typename Number> Number ReadLittleEndian(BufferedStreamReader& in)
146{
147 Number result;
148 in.ReadValue(result);
149 return result;
150}
151
152// Read little-endian file format to native big-endian
153template <typename Number> Number ReadBigEndian(BufferedStreamReader& in)
154{
155 Number result;
156 in.ReadValue(result);
157 auto begin = static_cast<unsigned char*>(static_cast<void*>(&result));
158 std::reverse(begin, begin + sizeof(result));
159 return result;
160}
161
162// Choose between implementations!
163static const auto WriteUShort =
164 IsLittleEndian() ? &WriteLittleEndian<UShort> : &WriteBigEndian<UShort>;
165static const auto WriteInt =
166 IsLittleEndian() ? &WriteLittleEndian<Int> : &WriteBigEndian<Int>;
167static const auto WriteLong =
168 IsLittleEndian() ? &WriteLittleEndian<Long> : &WriteBigEndian<Long>;
169static const auto WriteULong =
170 IsLittleEndian() ? &WriteLittleEndian<ULong> : &WriteBigEndian<ULong>;
171static const auto WriteLongLong =
172 IsLittleEndian() ? &WriteLittleEndian<LongLong> : &WriteBigEndian<LongLong>;
173
174static const auto ReadUShort =
175 IsLittleEndian() ? &ReadLittleEndian<UShort> : &ReadBigEndian<UShort>;
176static const auto ReadInt =
177 IsLittleEndian() ? &ReadLittleEndian<Int> : &ReadBigEndian<Int>;
178static const auto ReadLong =
179 IsLittleEndian() ? &ReadLittleEndian<Long> : &ReadBigEndian<Long>;
180static const auto ReadULong =
181 IsLittleEndian() ? &ReadLittleEndian<ULong> : &ReadBigEndian<ULong>;
182static const auto ReadLongLong =
183 IsLittleEndian() ? &ReadLittleEndian<LongLong> : &ReadBigEndian<LongLong>;
184
185// Functions to read and write certain lengths -- maybe we will change
186// our choices for widths or signedness?
187
188using Length = Int; // Instead, as wide as size_t?
189static const auto WriteLength = WriteInt;
190static const auto ReadLength = ReadInt;
191
192using Digits = Int; // Instead, just an unsigned char?
193static const auto WriteDigits = WriteInt;
194static const auto ReadDigits = ReadInt;
195
197{
198public:
200 : mBaseHandler(handler)
201 {
202 }
203
204 void EmitStartTag(const std::string_view& name)
205 {
206 if (mInTag)
207 EmitStartTag();
208
209 mCurrentTagName = name;
210 mInTag = true;
211 }
212
213 void EndTag(const std::string_view& name)
214 {
215 if (mInTag)
216 EmitStartTag();
217
218 if (XMLTagHandler* const handler = mHandlers.back())
219 handler->HandleXMLEndTag(name);
220
221 mHandlers.pop_back();
222 }
223
224 void WriteAttr(const std::string_view& name, std::string value)
225 {
226 assert(mInTag);
227
228 if (!mInTag)
229 return;
230
231 mAttributes.emplace_back(name, CacheString(std::move(value)));
232 }
233
234 template <typename T> void WriteAttr(const std::string_view& name, T value)
235 {
236 assert(mInTag);
237
238 if (!mInTag)
239 return;
240
241 mAttributes.emplace_back(name, XMLAttributeValueView(value));
242 }
243
244 void WriteData(std::string value)
245 {
246 if (mInTag)
247 EmitStartTag();
248
249 if (XMLTagHandler* const handler = mHandlers.back())
250 handler->HandleXMLContent(CacheString(std::move(value)));
251 }
252
253 void WriteRaw(std::string)
254 {
255 // This method is intentionally left empty.
256 // The only data that is serialized by FT_Raw
257 // is the boilerplate code like <?xml > and <!DOCTYPE>
258 // which are ignored
259 }
260
261 bool Finalize()
262 {
263 if (mInTag)
264 {
265 EmitStartTag();
266 EndTag(mCurrentTagName);
267 }
268
269 return mBaseHandler != nullptr;
270 }
271
272private:
274 {
275 if (mHandlers.empty())
276 {
277 mHandlers.push_back(mBaseHandler);
278 }
279 else
280 {
281 if (XMLTagHandler* const handler = mHandlers.back())
282 mHandlers.push_back(handler->HandleXMLChild(mCurrentTagName));
283 else
284 mHandlers.push_back(NULL);
285 }
286
287 if (XMLTagHandler*& handler = mHandlers.back())
288 {
289 if (!handler->HandleXMLTag(mCurrentTagName, mAttributes))
290 {
291 handler = nullptr;
292
293 if (mHandlers.size() == 1)
294 mBaseHandler = nullptr;
295 }
296 }
297
298 mStringsCache.clear();
299 mAttributes.clear();
300 mInTag = false;
301 }
302
303 std::string_view CacheString(std::string string)
304 {
305 mStringsCache.emplace_back(std::move(string));
306 return mStringsCache.back();
307 }
308
310
311 std::vector<XMLTagHandler*> mHandlers;
312
313 std::string_view mCurrentTagName;
314
315 std::deque<std::string> mStringsCache;
317
318 bool mInTag { false };
319};
320
321// template<typename BaseCharType>
322// std::string FastStringConvertFromAscii(const BaseCharType* begin, const BaseCharType* end)
323// {
324//
325// }
326
327template<typename BaseCharType>
328std::string FastStringConvert(const void* bytes, int bytesCount)
329{
330 constexpr int charSize = sizeof(BaseCharType);
331
332 assert(bytesCount % charSize == 0);
333
334 const auto begin = static_cast<const BaseCharType*>(bytes);
335 const auto end = begin + bytesCount / charSize;
336
337 const bool isAscii = std::all_of(
338 begin, end,
339 [](BaseCharType c)
340 { return static_cast<std::make_unsigned_t<BaseCharType>>(c) < 0x7f; });
341
342 if (isAscii)
343 return std::string(begin, end);
344
345 return std::wstring_convert<std::codecvt_utf8<BaseCharType>, BaseCharType>()
346 .to_bytes(begin, end);
347}
348} // namespace
349
351{
352 static std::once_flag flag;
353 std::call_once(flag, []{
354 // Just once per run, store header information in the unique static
355 // dictionary that will be written into each project that is saved.
356 // Store the size of "wxStringCharType" so we can convert during recovery
357 // in case the file is used on a system with a different character size.
358 char size = sizeof(wxStringCharType);
360 mDict.AppendData(&size, 1);
361 });
362
363 mDictChanged = false;
364}
365
367{
368}
369
370void ProjectSerializer::StartTag(const wxString & name)
371{
374}
375
376void ProjectSerializer::EndTag(const wxString & name)
377{
380}
381
382void ProjectSerializer::WriteAttr(const wxString & name, const wxChar *value)
383{
384 WriteAttr(name, wxString(value));
385}
386
387void ProjectSerializer::WriteAttr(const wxString & name, const wxString & value)
388{
391
392 const Length len = value.length() * sizeof(wxStringCharType);
393 WriteLength( mBuffer, len );
394 mBuffer.AppendData(value.wx_str(), len);
395}
396
397void ProjectSerializer::WriteAttr(const wxString & name, int value)
398{
401
402 WriteInt( mBuffer, value );
403}
404
405void ProjectSerializer::WriteAttr(const wxString & name, bool value)
406{
409
410 mBuffer.AppendByte(value);
411}
412
413void ProjectSerializer::WriteAttr(const wxString & name, long value)
414{
417
418 WriteLong( mBuffer, value );
419}
420
421void ProjectSerializer::WriteAttr(const wxString & name, long long value)
422{
425
426 WriteLongLong( mBuffer, value );
427}
428
429void ProjectSerializer::WriteAttr(const wxString & name, size_t value)
430{
433
434 WriteULong( mBuffer, value );
435}
436
437void ProjectSerializer::WriteAttr(const wxString & name, float value, int digits)
438{
441
442 mBuffer.AppendData(&value, sizeof(value));
443 WriteDigits( mBuffer, digits );
444}
445
446void ProjectSerializer::WriteAttr(const wxString & name, double value, int digits)
447{
450
451 mBuffer.AppendData(&value, sizeof(value));
452 WriteDigits( mBuffer, digits );
453}
454
455void ProjectSerializer::WriteData(const wxString & value)
456{
458
459 Length len = value.length() * sizeof(wxStringCharType);
460 WriteLength( mBuffer, len );
461 mBuffer.AppendData(value.wx_str(), len);
462}
463
464void ProjectSerializer::Write(const wxString & value)
465{
467 Length len = value.length() * sizeof(wxStringCharType);
468 WriteLength( mBuffer, len );
469 mBuffer.AppendData(value.wx_str(), len);
470}
471
472void ProjectSerializer::WriteName(const wxString & name)
473{
474 wxASSERT(name.length() * sizeof(wxStringCharType) <= SHRT_MAX);
475 UShort id;
476
477 auto nameiter = mNames.find(name);
478 if (nameiter != mNames.end())
479 {
480 id = nameiter->second;
481 }
482 else
483 {
484 // mNames is static. This appends each name to static mDict only once
485 // in each run.
486 UShort len = name.length() * sizeof(wxStringCharType);
487
488 id = mNames.size();
489 mNames[name] = id;
490
492 WriteUShort( mDict, id );
493 WriteUShort( mDict, len );
494 mDict.AppendData(name.wx_str(), len);
495
496 mDictChanged = true;
497 }
498
499 WriteUShort( mBuffer, id );
500}
501
503{
504 return mDict;
505}
506
508{
509 return mBuffer;
510}
511
513{
514 return mBuffer.GetSize() == 0;
515}
516
518{
519 return mDictChanged;
520}
521
522// See ProjectFileIO::LoadProject() for explanation of the blockids arg
524{
525 if (handler == nullptr)
526 return false;
527
528 XMLTagHandlerAdapter adapter(handler);
529
530 std::vector<char> bytes;
531 IdMap mIds;
532 std::vector<IdMap> mIdStack;
533 char mCharSize = 0;
534
535 mIds.clear();
536
537 struct Error{}; // exception type for short-range try/catch
538 auto Lookup = [&mIds]( UShort id ) -> std::string_view
539 {
540 auto iter = mIds.find( id );
541 if (iter == mIds.end())
542 {
543 throw Error{};
544 }
545
546 return iter->second;
547 };
548
549 int64_t stringsCount = 0;
550 int64_t stringsLength = 0;
551
552 auto ReadString = [&mCharSize, &in, &bytes, &stringsCount, &stringsLength](int len) -> std::string
553 {
554 bytes.reserve( len );
555 auto data = bytes.data();
556 in.Read( data, len );
557
558 stringsCount++;
559 stringsLength += len;
560
561 switch (mCharSize)
562 {
563 case 1:
564 return std::string(bytes.data(), len);
565
566 case 2:
567 return FastStringConvert<char16_t>(bytes.data(), len);
568
569 case 4:
570 return FastStringConvert<char32_t>(bytes.data(), len);
571
572 default:
573 wxASSERT_MSG(false, wxT("Characters size not 1, 2, or 4"));
574 break;
575 }
576
577 return {};
578 };
579
580 try
581 {
582 while (!in.Eof())
583 {
584 UShort id;
585
586 switch (in.GetC())
587 {
588 case FT_Push:
589 {
590 mIdStack.push_back(mIds);
591 mIds.clear();
592 }
593 break;
594
595 case FT_Pop:
596 {
597 mIds = mIdStack.back();
598 mIdStack.pop_back();
599 }
600 break;
601
602 case FT_Name:
603 {
604 id = ReadUShort( in );
605 auto len = ReadUShort( in );
606 mIds[id] = ReadString(len);
607 }
608 break;
609
610 case FT_StartTag:
611 {
612 id = ReadUShort( in );
613
614 adapter.EmitStartTag(Lookup(id));
615 }
616 break;
617
618 case FT_EndTag:
619 {
620 id = ReadUShort( in );
621
622 adapter.EndTag(Lookup(id));
623 }
624 break;
625
626 case FT_String:
627 {
628 id = ReadUShort( in );
629 int len = ReadLength( in );
630
631 adapter.WriteAttr(Lookup(id), ReadString(len));
632 }
633 break;
634
635 case FT_Float:
636 {
637 float val;
638
639 id = ReadUShort( in );
640 in.Read(&val, sizeof(val));
641 /* int dig = */ReadDigits(in);
642
643 adapter.WriteAttr(Lookup(id), val);
644 }
645 break;
646
647 case FT_Double:
648 {
649 double val;
650
651 id = ReadUShort( in );
652 in.Read(&val, sizeof(val));
653 /*int dig = */ReadDigits(in);
654
655 adapter.WriteAttr(Lookup(id), val);
656 }
657 break;
658
659 case FT_Int:
660 {
661 id = ReadUShort( in );
662 int val = ReadInt( in );
663
664 adapter.WriteAttr(Lookup(id), val);
665 }
666 break;
667
668 case FT_Bool:
669 {
670 unsigned char val;
671
672 id = ReadUShort( in );
673 in.Read(&val, 1);
674
675 adapter.WriteAttr(Lookup(id), val);
676 }
677 break;
678
679 case FT_Long:
680 {
681 id = ReadUShort( in );
682 long val = ReadLong( in );
683
684 adapter.WriteAttr(Lookup(id), val);
685 }
686 break;
687
688 case FT_LongLong:
689 {
690 id = ReadUShort( in );
691 long long val = ReadLongLong( in );
692 adapter.WriteAttr(Lookup(id), val);
693 }
694 break;
695
696 case FT_SizeT:
697 {
698 id = ReadUShort( in );
699 size_t val = ReadULong( in );
700
701 adapter.WriteAttr(Lookup(id), val);
702 }
703 break;
704
705 case FT_Data:
706 {
707 int len = ReadLength( in );
708 adapter.WriteData(ReadString(len));
709 }
710 break;
711
712 case FT_Raw:
713 {
714 int len = ReadLength( in );
715 adapter.WriteRaw(ReadString(len));
716 }
717 break;
718
719 case FT_CharSize:
720 {
721 in.Read(&mCharSize, 1);
722 }
723 break;
724
725 default:
726 wxASSERT(true);
727 break;
728 }
729 }
730 }
731 catch( const Error& )
732 {
733 // Document was corrupt, or platform differences in size or endianness
734 // were not well canonicalized
735 return false;
736 }
737
738 wxLogInfo(
739 "Loaded %lld string %f Kb in size", stringsCount, stringsLength / 1024.0);
740
741 return adapter.Finalize();
742}
wxT("CloseDown"))
XO("Cut/Copy/Paste")
bool IsLittleEndian() noexcept
Check that machine is little-endian.
Definition: MemoryX.h:368
wxString FilePath
Definition: Project.h:21
@ FT_Int
@ FT_String
@ FT_EndTag
@ FT_Push
@ FT_Raw
@ FT_Float
@ FT_CharSize
@ FT_Bool
@ FT_Name
@ FT_Double
@ FT_Pop
@ FT_SizeT
@ FT_Data
@ FT_LongLong
@ FT_StartTag
@ FT_Long
std::unordered_map< unsigned short, std::string > IdMap
std::unordered_map< wxString, unsigned short > NameMap
wxString name
Definition: TagsEditor.cpp:166
int id
static std::once_flag flag
std::vector< Attribute > AttributesList
Definition: XMLTagHandler.h:40
A facade-like class, that implements buffered reading from the underlying data stream.
size_t Read(void *buffer, size_t maxBytes)
Read up to maxBytes into the buffer. Returns the number of bytes read.
A low overhead memory stream with O(1) append, low heap fragmentation and a linear memory view.
const size_t GetSize() const noexcept
void AppendData(const void *data, const size_t length)
void AppendByte(char data)
void StartTag(const wxString &name) override
void EndTag(const wxString &name) override
static bool Decode(BufferedStreamReader &in, XMLTagHandler *handler)
static MemoryStream mDict
static TranslatableString FailureMessage(const FilePath &filePath)
const MemoryStream & GetData() const
static NameMap mNames
void WriteName(const wxString &name)
void WriteAttr(const wxString &name, const wxString &value) override
const MemoryStream & GetDict() const
void WriteData(const wxString &value) override
void Write(const wxString &data) override
MemoryStream mBuffer
ProjectSerializer(size_t allocSize=1024 *1024)
Holds a msgid for the translation catalog; may also bind format arguments.
A view into an attribute value. The class does not take the ownership of the data.
This class is an interface which should be implemented by classes which wish to be able to load and s...
Definition: XMLTagHandler.h:42
void WriteAttr(const std::string_view &name, std::string value)
NUMERIC_FORMATS_API NumericFormatSymbol Lookup(const FormatterContext &context, const NumericConverterType &type, const NumericFormatID &formatIdentifier)
Looks up the format, returns Default for the type if the format is not registered.
Number ReadLittleEndian(BufferedStreamReader &in)
std::string FastStringConvert(const void *bytes, int bytesCount)
void WriteLittleEndian(MemoryStream &out, Number value)
Number ReadBigEndian(BufferedStreamReader &in)
void WriteBigEndian(MemoryStream &out, Number value)
BuiltinEffectsModule::Registration< Reverse > reverse
const char * end(const char *str) noexcept
Definition: StringUtils.h:106
const char * begin(const char *str) noexcept
Definition: StringUtils.h:101