Audacity 3.2.0
ProjectSerializer.cpp
Go to the documentation of this file.
1/**********************************************************************
2
3 Audacity: A Digital Audio Editor
4 Audacity(R) is copyright (c) 1999-2010 Audacity Team.
5 License: GPL v2 or later. See License.txt.
6
7 ProjectSerializer.cpp
8
9*******************************************************************//********************************************************************/
15
16#include "ProjectSerializer.h"
17
18#include <algorithm>
19#include <cstdint>
20#include <mutex>
21#include <wx/ustring.h>
22#include <codecvt>
23#include <locale>
24#include <deque>
25
26#include <wx/log.h>
27
29
33
34// Simple "binary xml" format used exclusively for project documents.
35//
36// It is not intended that the user view or modify the file.
37//
38// It IS intended that very little work be done during auto save, so numbers
39// and strings are written in their native format. They will be converted
40// during recovery.
41//
42// The file has 3 main sections:
43//
44// character size 1 (UTF-8), 2 (UTF-16) or 4 (UTF-32)
45// name dictionary dictionary of all names used in the document
46// data fields the "encoded" XML document
47//
48// If a subtree is added, it will be preceded with FT_Push to tell the decoder
49// to preserve the active dictionary. The decoder will then restore the
50// dictionary when an FT_Pop is encountered. Nesting is unlimited.
51//
52// To save space, each name (attribute or element) encountered is stored in
53// the name dictionary and replaced with the assigned 2-byte identifier.
54//
55// All strings are in native unicode format, 2-byte or 4-byte.
56//
57// All name "lengths" are 2-byte signed, so are limited to 32767 bytes long.
58// All string/data "lengths" are 4-byte signed.
59
61{
62 FT_CharSize, // type, ID, value
63 FT_StartTag, // type, ID
64 FT_EndTag, // type, ID
65 FT_String, // type, ID, string length, string
66 FT_Int, // type, ID, value
67 FT_Bool, // type, ID, value
68 FT_Long, // type, ID, value
69 FT_LongLong, // type, ID, value
70 FT_SizeT, // type, ID, value
71 FT_Float, // type, ID, value, digits
72 FT_Double, // type, ID, value, digits
73 FT_Data, // type, string length, string
74 FT_Raw, // type, string length, string
75 FT_Push, // type only
76 FT_Pop, // type only
77 FT_Name // type, ID, name length, name
78};
79
80// Static so that the dict can be reused each time.
81//
82// If entries get added later, like when an envelope node (for example)
83// is written and then the envelope is later removed, the dict will still
84// contain the envelope name, but that's not a problem.
85
88
90{
91 return
92XO("This recovery file was saved by Audacity 2.3.0 or before.\n"
93 "You need to run that version of Audacity to recover the project." );
94}
95
96namespace
97{
98// Aliases for the FIXED-WIDTH integer types that are used in the file
99// format.
100
101// Chosen so that among the four build types (32 bit Windows, 64
102// bit Windows, 64 bit Mac clang, Linux g++) presently done (3.0.0
103// development), we use the narrowest width of the type on any of them, so
104// that anything saved on one build will be read back identically on all
105// builds. (Although this means that very large values on some systems might
106// be saved and then read back with loss.)
107
108// In fact the only types for which this matters are long (only 32 bits on
109// 32 and 64 bit Windows) and size_t (only 32 bits on 32 bit Windows).
110
111using UShort = std::uint16_t;
112using Int = std::int32_t;
113
114using Long = std::int32_t; // To save long values
115using ULong = std::uint32_t; // To save size_t values
116
117using LongLong = std::int64_t;
118
119// Detect this computer's endianness
121{
122 const std::uint32_t x = 1u;
123 return static_cast<const unsigned char*>(static_cast<const void*>(&x))[0];
124 // We will assume the same for other widths!
125}
126// In C++20 this could be
127// constexpr bool IsLittleEndian = (std::endian::native == std::endian::little);
128// static_assert( IsLittleEndian || (std::endian::native == std::endian::big),
129// "Oh no! I'm mixed-endian!" );
130
131// Functions that can read and write native integer types to a canonicalized
132// little-endian file format. (We don't bother to do the same for floating
133// point numbers.)
134
135// Write native little-endian to little-endian file format
136template <typename Number>
137void WriteLittleEndian(MemoryStream& out, Number value)
138{
139 out.AppendData(&value, sizeof(value));
140}
141
142// Write native big-endian to little-endian file format
143template <typename Number> void WriteBigEndian(MemoryStream& out, Number value)
144{
145 auto begin = static_cast<unsigned char*>(static_cast<void*>(&value));
146 std::reverse(begin, begin + sizeof(value));
147 out.AppendData(&value, sizeof(value));
148}
149
150// Read little-endian file format to native little-endian
151template <typename Number> Number ReadLittleEndian(BufferedStreamReader& in)
152{
153 Number result;
154 in.ReadValue(result);
155 return result;
156}
157
158// Read little-endian file format to native big-endian
159template <typename Number> Number ReadBigEndian(BufferedStreamReader& in)
160{
161 Number result;
162 in.ReadValue(result);
163 auto begin = static_cast<unsigned char*>(static_cast<void*>(&result));
164 std::reverse(begin, begin + sizeof(result));
165 return result;
166}
167
168// Choose between implementations!
169static const auto WriteUShort =
170 IsLittleEndian() ? &WriteLittleEndian<UShort> : &WriteBigEndian<UShort>;
171static const auto WriteInt =
172 IsLittleEndian() ? &WriteLittleEndian<Int> : &WriteBigEndian<Int>;
173static const auto WriteLong =
174 IsLittleEndian() ? &WriteLittleEndian<Long> : &WriteBigEndian<Long>;
175static const auto WriteULong =
176 IsLittleEndian() ? &WriteLittleEndian<ULong> : &WriteBigEndian<ULong>;
177static const auto WriteLongLong =
178 IsLittleEndian() ? &WriteLittleEndian<LongLong> : &WriteBigEndian<LongLong>;
179
180static const auto ReadUShort =
181 IsLittleEndian() ? &ReadLittleEndian<UShort> : &ReadBigEndian<UShort>;
182static const auto ReadInt =
183 IsLittleEndian() ? &ReadLittleEndian<Int> : &ReadBigEndian<Int>;
184static const auto ReadLong =
185 IsLittleEndian() ? &ReadLittleEndian<Long> : &ReadBigEndian<Long>;
186static const auto ReadULong =
187 IsLittleEndian() ? &ReadLittleEndian<ULong> : &ReadBigEndian<ULong>;
188static const auto ReadLongLong =
189 IsLittleEndian() ? &ReadLittleEndian<LongLong> : &ReadBigEndian<LongLong>;
190
191// Functions to read and write certain lengths -- maybe we will change
192// our choices for widths or signedness?
193
194using Length = Int; // Instead, as wide as size_t?
195static const auto WriteLength = WriteInt;
196static const auto ReadLength = ReadInt;
197
198using Digits = Int; // Instead, just an unsigned char?
199static const auto WriteDigits = WriteInt;
200static const auto ReadDigits = ReadInt;
201
203{
204public:
206 : mBaseHandler(handler)
207 {
208 }
209
210 void EmitStartTag(const std::string_view& name)
211 {
212 if (mInTag)
213 EmitStartTag();
214
215 mCurrentTagName = name;
216 mInTag = true;
217 }
218
219 void EndTag(const std::string_view& name)
220 {
221 if (mInTag)
222 EmitStartTag();
223
224 if (XMLTagHandler* const handler = mHandlers.back())
225 handler->HandleXMLEndTag(name);
226
227 mHandlers.pop_back();
228 }
229
230 void WriteAttr(const std::string_view& name, std::string value)
231 {
232 assert(mInTag);
233
234 if (!mInTag)
235 return;
236
237 mAttributes.emplace_back(name, CacheString(std::move(value)));
238 }
239
240 template <typename T> void WriteAttr(const std::string_view& name, T value)
241 {
242 assert(mInTag);
243
244 if (!mInTag)
245 return;
246
247 mAttributes.emplace_back(name, XMLAttributeValueView(value));
248 }
249
250 void WriteData(std::string value)
251 {
252 if (mInTag)
253 EmitStartTag();
254
255 if (XMLTagHandler* const handler = mHandlers.back())
256 handler->HandleXMLContent(CacheString(std::move(value)));
257 }
258
259 void WriteRaw(std::string)
260 {
261 // This method is intentionally left empty.
262 // The only data that is serialized by FT_Raw
263 // is the boilerplate code like <?xml > and <!DOCTYPE>
264 // which are ignored
265 }
266
267 bool Finalize()
268 {
269 if (mInTag)
270 {
271 EmitStartTag();
272 EndTag(mCurrentTagName);
273 }
274
275 return mBaseHandler != nullptr;
276 }
277
278private:
280 {
281 if (mHandlers.empty())
282 {
283 mHandlers.push_back(mBaseHandler);
284 }
285 else
286 {
287 if (XMLTagHandler* const handler = mHandlers.back())
288 mHandlers.push_back(handler->HandleXMLChild(mCurrentTagName));
289 else
290 mHandlers.push_back(NULL);
291 }
292
293 if (XMLTagHandler*& handler = mHandlers.back())
294 {
295 if (!handler->HandleXMLTag(mCurrentTagName, mAttributes))
296 {
297 handler = nullptr;
298
299 if (mHandlers.size() == 1)
300 mBaseHandler = nullptr;
301 }
302 }
303
304 mStringsCache.clear();
305 mAttributes.clear();
306 mInTag = false;
307 }
308
309 std::string_view CacheString(std::string string)
310 {
311 mStringsCache.emplace_back(std::move(string));
312 return mStringsCache.back();
313 }
314
316
317 std::vector<XMLTagHandler*> mHandlers;
318
319 std::string_view mCurrentTagName;
320
321 std::deque<std::string> mStringsCache;
323
324 bool mInTag { false };
325};
326
327// template<typename BaseCharType>
328// std::string FastStringConvertFromAscii(const BaseCharType* begin, const BaseCharType* end)
329// {
330//
331// }
332
333template<typename BaseCharType>
334std::string FastStringConvert(const void* bytes, int bytesCount)
335{
336 constexpr int charSize = sizeof(BaseCharType);
337
338 assert(bytesCount % charSize == 0);
339
340 const auto begin = static_cast<const BaseCharType*>(bytes);
341 const auto end = begin + bytesCount / charSize;
342
343 const bool isAscii = std::all_of(
344 begin, end,
345 [](BaseCharType c)
346 { return static_cast<std::make_unsigned_t<BaseCharType>>(c) < 0x7f; });
347
348 if (isAscii)
349 return std::string(begin, end);
350
351 return std::wstring_convert<std::codecvt_utf8<BaseCharType>, BaseCharType>()
352 .to_bytes(begin, end);
353}
354} // namespace
355
357{
358 static std::once_flag flag;
359 std::call_once(flag, []{
360 // Just once per run, store header information in the unique static
361 // dictionary that will be written into each project that is saved.
362 // Store the size of "wxStringCharType" so we can convert during recovery
363 // in case the file is used on a system with a different character size.
364 char size = sizeof(wxStringCharType);
366 mDict.AppendData(&size, 1);
367 });
368
369 mDictChanged = false;
370}
371
373{
374}
375
376void ProjectSerializer::StartTag(const wxString & name)
377{
380}
381
382void ProjectSerializer::EndTag(const wxString & name)
383{
386}
387
388void ProjectSerializer::WriteAttr(const wxString & name, const wxChar *value)
389{
390 WriteAttr(name, wxString(value));
391}
392
393void ProjectSerializer::WriteAttr(const wxString & name, const wxString & value)
394{
397
398 const Length len = value.length() * sizeof(wxStringCharType);
399 WriteLength( mBuffer, len );
400 mBuffer.AppendData(value.wx_str(), len);
401}
402
403void ProjectSerializer::WriteAttr(const wxString & name, int value)
404{
407
408 WriteInt( mBuffer, value );
409}
410
411void ProjectSerializer::WriteAttr(const wxString & name, bool value)
412{
415
416 mBuffer.AppendByte(value);
417}
418
419void ProjectSerializer::WriteAttr(const wxString & name, long value)
420{
423
424 WriteLong( mBuffer, value );
425}
426
427void ProjectSerializer::WriteAttr(const wxString & name, long long value)
428{
431
432 WriteLongLong( mBuffer, value );
433}
434
435void ProjectSerializer::WriteAttr(const wxString & name, size_t value)
436{
439
440 WriteULong( mBuffer, value );
441}
442
443void ProjectSerializer::WriteAttr(const wxString & name, float value, int digits)
444{
447
448 mBuffer.AppendData(&value, sizeof(value));
449 WriteDigits( mBuffer, digits );
450}
451
452void ProjectSerializer::WriteAttr(const wxString & name, double value, int digits)
453{
456
457 mBuffer.AppendData(&value, sizeof(value));
458 WriteDigits( mBuffer, digits );
459}
460
461void ProjectSerializer::WriteData(const wxString & value)
462{
464
465 Length len = value.length() * sizeof(wxStringCharType);
466 WriteLength( mBuffer, len );
467 mBuffer.AppendData(value.wx_str(), len);
468}
469
470void ProjectSerializer::Write(const wxString & value)
471{
473 Length len = value.length() * sizeof(wxStringCharType);
474 WriteLength( mBuffer, len );
475 mBuffer.AppendData(value.wx_str(), len);
476}
477
478void ProjectSerializer::WriteName(const wxString & name)
479{
480 wxASSERT(name.length() * sizeof(wxStringCharType) <= SHRT_MAX);
481 UShort id;
482
483 auto nameiter = mNames.find(name);
484 if (nameiter != mNames.end())
485 {
486 id = nameiter->second;
487 }
488 else
489 {
490 // mNames is static. This appends each name to static mDict only once
491 // in each run.
492 UShort len = name.length() * sizeof(wxStringCharType);
493
494 id = mNames.size();
495 mNames[name] = id;
496
498 WriteUShort( mDict, id );
499 WriteUShort( mDict, len );
500 mDict.AppendData(name.wx_str(), len);
501
502 mDictChanged = true;
503 }
504
505 WriteUShort( mBuffer, id );
506}
507
509{
510 return mDict;
511}
512
514{
515 return mBuffer;
516}
517
519{
520 return mBuffer.GetSize() == 0;
521}
522
524{
525 return mDictChanged;
526}
527
528// See ProjectFileIO::LoadProject() for explanation of the blockids arg
530{
531 if (handler == nullptr)
532 return false;
533
534 XMLTagHandlerAdapter adapter(handler);
535
536 std::vector<char> bytes;
537 IdMap mIds;
538 std::vector<IdMap> mIdStack;
539 char mCharSize = 0;
540
541 mIds.clear();
542
543 struct Error{}; // exception type for short-range try/catch
544 auto Lookup = [&mIds]( UShort id ) -> std::string_view
545 {
546 auto iter = mIds.find( id );
547 if (iter == mIds.end())
548 {
549 throw Error{};
550 }
551
552 return iter->second;
553 };
554
555 int64_t stringsCount = 0;
556 int64_t stringsLength = 0;
557
558 auto ReadString = [&mCharSize, &in, &bytes, &stringsCount, &stringsLength](int len) -> std::string
559 {
560 bytes.reserve( len );
561 auto data = bytes.data();
562 in.Read( data, len );
563
564 stringsCount++;
565 stringsLength += len;
566
567 switch (mCharSize)
568 {
569 case 1:
570 return std::string(bytes.data(), len);
571
572 case 2:
573 return FastStringConvert<char16_t>(bytes.data(), len);
574
575 case 4:
576 return FastStringConvert<char32_t>(bytes.data(), len);
577
578 default:
579 wxASSERT_MSG(false, wxT("Characters size not 1, 2, or 4"));
580 break;
581 }
582
583 return {};
584 };
585
586 try
587 {
588 while (!in.Eof())
589 {
590 UShort id;
591
592 switch (in.GetC())
593 {
594 case FT_Push:
595 {
596 mIdStack.push_back(mIds);
597 mIds.clear();
598 }
599 break;
600
601 case FT_Pop:
602 {
603 mIds = mIdStack.back();
604 mIdStack.pop_back();
605 }
606 break;
607
608 case FT_Name:
609 {
610 id = ReadUShort( in );
611 auto len = ReadUShort( in );
612 mIds[id] = ReadString(len);
613 }
614 break;
615
616 case FT_StartTag:
617 {
618 id = ReadUShort( in );
619
620 adapter.EmitStartTag(Lookup(id));
621 }
622 break;
623
624 case FT_EndTag:
625 {
626 id = ReadUShort( in );
627
628 adapter.EndTag(Lookup(id));
629 }
630 break;
631
632 case FT_String:
633 {
634 id = ReadUShort( in );
635 int len = ReadLength( in );
636
637 adapter.WriteAttr(Lookup(id), ReadString(len));
638 }
639 break;
640
641 case FT_Float:
642 {
643 float val;
644
645 id = ReadUShort( in );
646 in.Read(&val, sizeof(val));
647 /* int dig = */ReadDigits(in);
648
649 adapter.WriteAttr(Lookup(id), val);
650 }
651 break;
652
653 case FT_Double:
654 {
655 double val;
656
657 id = ReadUShort( in );
658 in.Read(&val, sizeof(val));
659 /*int dig = */ReadDigits(in);
660
661 adapter.WriteAttr(Lookup(id), val);
662 }
663 break;
664
665 case FT_Int:
666 {
667 id = ReadUShort( in );
668 int val = ReadInt( in );
669
670 adapter.WriteAttr(Lookup(id), val);
671 }
672 break;
673
674 case FT_Bool:
675 {
676 unsigned char val;
677
678 id = ReadUShort( in );
679 in.Read(&val, 1);
680
681 adapter.WriteAttr(Lookup(id), val);
682 }
683 break;
684
685 case FT_Long:
686 {
687 id = ReadUShort( in );
688 long val = ReadLong( in );
689
690 adapter.WriteAttr(Lookup(id), val);
691 }
692 break;
693
694 case FT_LongLong:
695 {
696 id = ReadUShort( in );
697 long long val = ReadLongLong( in );
698 adapter.WriteAttr(Lookup(id), val);
699 }
700 break;
701
702 case FT_SizeT:
703 {
704 id = ReadUShort( in );
705 size_t val = ReadULong( in );
706
707 adapter.WriteAttr(Lookup(id), val);
708 }
709 break;
710
711 case FT_Data:
712 {
713 int len = ReadLength( in );
714 adapter.WriteData(ReadString(len));
715 }
716 break;
717
718 case FT_Raw:
719 {
720 int len = ReadLength( in );
721 adapter.WriteRaw(ReadString(len));
722 }
723 break;
724
725 case FT_CharSize:
726 {
727 in.Read(&mCharSize, 1);
728 }
729 break;
730
731 default:
732 wxASSERT(true);
733 break;
734 }
735 }
736 }
737 catch( const Error& )
738 {
739 // Document was corrupt, or platform differences in size or endianness
740 // were not well canonicalized
741 return false;
742 }
743
744 wxLogInfo(
745 "Loaded %lld string %f Kb in size", stringsCount, stringsLength / 1024.0);
746
747 return adapter.Finalize();
748}
wxT("CloseDown"))
const TranslatableString name
Definition: Distortion.cpp:76
XO("Cut/Copy/Paste")
wxString FilePath
Definition: Project.h:21
@ FT_Int
@ FT_String
@ FT_EndTag
@ FT_Push
@ FT_Raw
@ FT_Float
@ FT_CharSize
@ FT_Bool
@ FT_Name
@ FT_Double
@ FT_Pop
@ FT_SizeT
@ FT_Data
@ FT_LongLong
@ FT_StartTag
@ FT_Long
std::unordered_map< unsigned short, std::string > IdMap
std::unordered_map< wxString, unsigned short > NameMap
int id
static std::once_flag flag
std::vector< Attribute > AttributesList
Definition: XMLTagHandler.h:40
A facade-like class, that implements buffered reading from the underlying data stream.
size_t Read(void *buffer, size_t maxBytes)
Read up to maxBytes into the buffer. Returns the number of bytes read.
A low overhead memory stream with O(1) append, low heap fragmentation and a linear memory view.
const size_t GetSize() const noexcept
void AppendData(const void *data, const size_t length)
void AppendByte(char data)
void StartTag(const wxString &name) override
void EndTag(const wxString &name) override
static bool Decode(BufferedStreamReader &in, XMLTagHandler *handler)
static MemoryStream mDict
static TranslatableString FailureMessage(const FilePath &filePath)
const MemoryStream & GetData() const
static NameMap mNames
void WriteName(const wxString &name)
void WriteAttr(const wxString &name, const wxString &value) override
const MemoryStream & GetDict() const
void WriteData(const wxString &value) override
void Write(const wxString &data) override
MemoryStream mBuffer
ProjectSerializer(size_t allocSize=1024 *1024)
Holds a msgid for the translation catalog; may also bind format arguments.
A view into an attribute value. The class does not take the ownership of the data.
This class is an interface which should be implemented by classes which wish to be able to load and s...
Definition: XMLTagHandler.h:42
void WriteAttr(const std::string_view &name, std::string value)
NUMERIC_FORMATS_API NumericFormatSymbol Lookup(const FormatterContext &context, const NumericConverterType &type, const NumericFormatID &formatIdentifier)
Looks up the format, returns Default for the type if the format is not registered.
auto end(const Ptr< Type, BaseDeleter > &p)
Enables range-for.
Definition: PackedArray.h:159
auto begin(const Ptr< Type, BaseDeleter > &p)
Enables range-for.
Definition: PackedArray.h:150
Number ReadLittleEndian(BufferedStreamReader &in)
std::string FastStringConvert(const void *bytes, int bytesCount)
void WriteLittleEndian(MemoryStream &out, Number value)
Number ReadBigEndian(BufferedStreamReader &in)
void WriteBigEndian(MemoryStream &out, Number value)