Audacity 3.2.0
ProjectSerializer.cpp
Go to the documentation of this file.
1/**********************************************************************
2
3 Audacity: A Digital Audio Editor
4 Audacity(R) is copyright (c) 1999-2010 Audacity Team.
5 License: GPL v2 or later. See License.txt.
6
7 ProjectSerializer.cpp
8
9*******************************************************************//********************************************************************/
15
16
17#include "ProjectSerializer.h"
18
19#include <algorithm>
20#include <cstdint>
21#include <mutex>
22#include <wx/ustring.h>
23#include <codecvt>
24#include <locale>
25#include <deque>
26
27#include <wx/log.h>
28
30
34
35// Simple "binary xml" format used exclusively for project documents.
36//
37// It is not intended that the user view or modify the file.
38//
39// It IS intended that very little work be done during auto save, so numbers
40// and strings are written in their native format. They will be converted
41// during recovery.
42//
43// The file has 3 main sections:
44//
45// character size 1 (UTF-8), 2 (UTF-16) or 4 (UTF-32)
46// name dictionary dictionary of all names used in the document
47// data fields the "encoded" XML document
48//
49// If a subtree is added, it will be preceded with FT_Push to tell the decoder
50// to preserve the active dictionary. The decoder will then restore the
51// dictionary when an FT_Pop is encountered. Nesting is unlimited.
52//
53// To save space, each name (attribute or element) encountered is stored in
54// the name dictionary and replaced with the assigned 2-byte identifier.
55//
56// All strings are in native unicode format, 2-byte or 4-byte.
57//
58// All name "lengths" are 2-byte signed, so are limited to 32767 bytes long.
59// All string/data "lengths" are 4-byte signed.
60
62{
63 FT_CharSize, // type, ID, value
64 FT_StartTag, // type, ID
65 FT_EndTag, // type, ID
66 FT_String, // type, ID, string length, string
67 FT_Int, // type, ID, value
68 FT_Bool, // type, ID, value
69 FT_Long, // type, ID, value
70 FT_LongLong, // type, ID, value
71 FT_SizeT, // type, ID, value
72 FT_Float, // type, ID, value, digits
73 FT_Double, // type, ID, value, digits
74 FT_Data, // type, string length, string
75 FT_Raw, // type, string length, string
76 FT_Push, // type only
77 FT_Pop, // type only
78 FT_Name // type, ID, name length, name
79};
80
81// Static so that the dict can be reused each time.
82//
83// If entries get added later, like when an envelope node (for example)
84// is written and then the envelope is later removed, the dict will still
85// contain the envelope name, but that's not a problem.
86
89
91{
92 return
93XO("This recovery file was saved by Audacity 2.3.0 or before.\n"
94 "You need to run that version of Audacity to recover the project." );
95}
96
97namespace
98{
99// Aliases for the FIXED-WIDTH integer types that are used in the file
100// format.
101
102// Chosen so that among the four build types (32 bit Windows, 64
103// bit Windows, 64 bit Mac clang, Linux g++) presently done (3.0.0
104// development), we use the narrowest width of the type on any of them, so
105// that anything saved on one build will be read back identically on all
106// builds. (Although this means that very large values on some systems might
107// be saved and then read back with loss.)
108
109// In fact the only types for which this matters are long (only 32 bits on
110// 32 and 64 bit Windows) and size_t (only 32 bits on 32 bit Windows).
111
112using UShort = std::uint16_t;
113using Int = std::int32_t;
114
115using Long = std::int32_t; // To save long values
116using ULong = std::uint32_t; // To save size_t values
117
118using LongLong = std::int64_t;
119
120// Detect this computer's endianness
122{
123 const std::uint32_t x = 1u;
124 return static_cast<const unsigned char*>(static_cast<const void*>(&x))[0];
125 // We will assume the same for other widths!
126}
127// In C++20 this could be
128// constexpr bool IsLittleEndian = (std::endian::native == std::endian::little);
129// static_assert( IsLittleEndian || (std::endian::native == std::endian::big),
130// "Oh no! I'm mixed-endian!" );
131
132// Functions that can read and write native integer types to a canonicalized
133// little-endian file format. (We don't bother to do the same for floating
134// point numbers.)
135
136// Write native little-endian to little-endian file format
137template <typename Number>
138void WriteLittleEndian(MemoryStream& out, Number value)
139{
140 out.AppendData(&value, sizeof(value));
141}
142
143// Write native big-endian to little-endian file format
144template <typename Number> void WriteBigEndian(MemoryStream& out, Number value)
145{
146 auto begin = static_cast<unsigned char*>(static_cast<void*>(&value));
147 std::reverse(begin, begin + sizeof(value));
148 out.AppendData(&value, sizeof(value));
149}
150
151// Read little-endian file format to native little-endian
152template <typename Number> Number ReadLittleEndian(BufferedStreamReader& in)
153{
154 Number result;
155 in.ReadValue(result);
156 return result;
157}
158
159// Read little-endian file format to native big-endian
160template <typename Number> Number ReadBigEndian(BufferedStreamReader& in)
161{
162 Number result;
163 in.ReadValue(result);
164 auto begin = static_cast<unsigned char*>(static_cast<void*>(&result));
165 std::reverse(begin, begin + sizeof(result));
166 return result;
167}
168
169// Choose between implementations!
170static const auto WriteUShort =
171 IsLittleEndian() ? &WriteLittleEndian<UShort> : &WriteBigEndian<UShort>;
172static const auto WriteInt =
173 IsLittleEndian() ? &WriteLittleEndian<Int> : &WriteBigEndian<Int>;
174static const auto WriteLong =
175 IsLittleEndian() ? &WriteLittleEndian<Long> : &WriteBigEndian<Long>;
176static const auto WriteULong =
177 IsLittleEndian() ? &WriteLittleEndian<ULong> : &WriteBigEndian<ULong>;
178static const auto WriteLongLong =
179 IsLittleEndian() ? &WriteLittleEndian<LongLong> : &WriteBigEndian<LongLong>;
180
181static const auto ReadUShort =
182 IsLittleEndian() ? &ReadLittleEndian<UShort> : &ReadBigEndian<UShort>;
183static const auto ReadInt =
184 IsLittleEndian() ? &ReadLittleEndian<Int> : &ReadBigEndian<Int>;
185static const auto ReadLong =
186 IsLittleEndian() ? &ReadLittleEndian<Long> : &ReadBigEndian<Long>;
187static const auto ReadULong =
188 IsLittleEndian() ? &ReadLittleEndian<ULong> : &ReadBigEndian<ULong>;
189static const auto ReadLongLong =
190 IsLittleEndian() ? &ReadLittleEndian<LongLong> : &ReadBigEndian<LongLong>;
191
192// Functions to read and write certain lengths -- maybe we will change
193// our choices for widths or signedness?
194
195using Length = Int; // Instead, as wide as size_t?
196static const auto WriteLength = WriteInt;
197static const auto ReadLength = ReadInt;
198
199using Digits = Int; // Instead, just an unsigned char?
200static const auto WriteDigits = WriteInt;
201static const auto ReadDigits = ReadInt;
202
204{
205public:
206 explicit XMLTagHandlerAdapter(XMLTagHandler* handler) noexcept
207 : mBaseHandler(handler)
208 {
209 }
210
211 void EmitStartTag(const std::string_view& name)
212 {
213 if (mInTag)
214 EmitStartTag();
215
216 mCurrentTagName = name;
217 mInTag = true;
218 }
219
220 void EndTag(const std::string_view& name)
221 {
222 if (mInTag)
223 EmitStartTag();
224
225 if (XMLTagHandler* const handler = mHandlers.back())
226 handler->HandleXMLEndTag(name);
227
228 mHandlers.pop_back();
229 }
230
231 void WriteAttr(const std::string_view& name, std::string value)
232 {
233 assert(mInTag);
234
235 if (!mInTag)
236 return;
237
238 mAttributes.emplace_back(name, CacheString(std::move(value)));
239 }
240
241 template <typename T> void WriteAttr(const std::string_view& name, T value)
242 {
243 assert(mInTag);
244
245 if (!mInTag)
246 return;
247
248 mAttributes.emplace_back(name, XMLAttributeValueView(value));
249 }
250
251 void WriteData(std::string value)
252 {
253 if (mInTag)
254 EmitStartTag();
255
256 if (XMLTagHandler* const handler = mHandlers.back())
257 handler->HandleXMLContent(CacheString(std::move(value)));
258 }
259
260 void WriteRaw(std::string)
261 {
262 // This method is intentionally left empty.
263 // The only data that is serialized by FT_Raw
264 // is the boilerplate code like <?xml > and <!DOCTYPE>
265 // which are ignored
266 }
267
268 bool Finalize()
269 {
270 if (mInTag)
271 {
272 EmitStartTag();
273 EndTag(mCurrentTagName);
274 }
275
276 return mBaseHandler != nullptr;
277 }
278
279private:
281 {
282 if (mHandlers.empty())
283 {
284 mHandlers.push_back(mBaseHandler);
285 }
286 else
287 {
288 if (XMLTagHandler* const handler = mHandlers.back())
289 mHandlers.push_back(handler->HandleXMLChild(mCurrentTagName));
290 else
291 mHandlers.push_back(NULL);
292 }
293
294 if (XMLTagHandler*& handler = mHandlers.back())
295 {
296 if (!handler->HandleXMLTag(mCurrentTagName, mAttributes))
297 {
298 handler = nullptr;
299
300 if (mHandlers.size() == 1)
301 mBaseHandler = nullptr;
302 }
303 }
304
305 mStringsCache.clear();
306 mAttributes.clear();
307 mInTag = false;
308 }
309
310 std::string_view CacheString(std::string string)
311 {
312 mStringsCache.emplace_back(std::move(string));
313 return mStringsCache.back();
314 }
315
317
318 std::vector<XMLTagHandler*> mHandlers;
319
320 std::string_view mCurrentTagName;
321
322 std::deque<std::string> mStringsCache;
324
325 bool mInTag { false };
326};
327
328// template<typename BaseCharType>
329// std::string FastStringConvertFromAscii(const BaseCharType* begin, const BaseCharType* end)
330// {
331//
332// }
333
334template<typename BaseCharType>
335std::string FastStringConvert(const void* bytes, int bytesCount)
336{
337 constexpr int charSize = sizeof(BaseCharType);
338
339 assert(bytesCount % charSize == 0);
340
341 const auto begin = static_cast<const BaseCharType*>(bytes);
342 const auto end = begin + bytesCount / charSize;
343
344 const bool isAscii = std::all_of(
345 begin, end,
346 [](BaseCharType c)
347 { return static_cast<std::make_unsigned_t<BaseCharType>>(c) < 0x7f; });
348
349 if (isAscii)
350 return std::string(begin, end);
351
352 return std::wstring_convert<std::codecvt_utf8<BaseCharType>, BaseCharType>()
353 .to_bytes(begin, end);
354}
355} // namespace
356
358{
359 static std::once_flag flag;
360 std::call_once(flag, []{
361 // Just once per run, store header information in the unique static
362 // dictionary that will be written into each project that is saved.
363 // Store the size of "wxStringCharType" so we can convert during recovery
364 // in case the file is used on a system with a different character size.
365 char size = sizeof(wxStringCharType);
367 mDict.AppendData(&size, 1);
368 });
369
370 mDictChanged = false;
371}
372
374{
375}
376
377void ProjectSerializer::StartTag(const wxString & name)
378{
381}
382
383void ProjectSerializer::EndTag(const wxString & name)
384{
387}
388
389void ProjectSerializer::WriteAttr(const wxString & name, const wxChar *value)
390{
391 WriteAttr(name, wxString(value));
392}
393
394void ProjectSerializer::WriteAttr(const wxString & name, const wxString & value)
395{
398
399 const Length len = value.length() * sizeof(wxStringCharType);
400 WriteLength( mBuffer, len );
401 mBuffer.AppendData(value.wx_str(), len);
402}
403
404void ProjectSerializer::WriteAttr(const wxString & name, int value)
405{
408
409 WriteInt( mBuffer, value );
410}
411
412void ProjectSerializer::WriteAttr(const wxString & name, bool value)
413{
416
417 mBuffer.AppendByte(value);
418}
419
420void ProjectSerializer::WriteAttr(const wxString & name, long value)
421{
424
425 WriteLong( mBuffer, value );
426}
427
428void ProjectSerializer::WriteAttr(const wxString & name, long long value)
429{
432
433 WriteLongLong( mBuffer, value );
434}
435
436void ProjectSerializer::WriteAttr(const wxString & name, size_t value)
437{
440
441 WriteULong( mBuffer, value );
442}
443
444void ProjectSerializer::WriteAttr(const wxString & name, float value, int digits)
445{
448
449 mBuffer.AppendData(&value, sizeof(value));
450 WriteDigits( mBuffer, digits );
451}
452
453void ProjectSerializer::WriteAttr(const wxString & name, double value, int digits)
454{
457
458 mBuffer.AppendData(&value, sizeof(value));
459 WriteDigits( mBuffer, digits );
460}
461
462void ProjectSerializer::WriteData(const wxString & value)
463{
465
466 Length len = value.length() * sizeof(wxStringCharType);
467 WriteLength( mBuffer, len );
468 mBuffer.AppendData(value.wx_str(), len);
469}
470
471void ProjectSerializer::Write(const wxString & value)
472{
474 Length len = value.length() * sizeof(wxStringCharType);
475 WriteLength( mBuffer, len );
476 mBuffer.AppendData(value.wx_str(), len);
477}
478
479void ProjectSerializer::WriteName(const wxString & name)
480{
481 wxASSERT(name.length() * sizeof(wxStringCharType) <= SHRT_MAX);
482 UShort id;
483
484 auto nameiter = mNames.find(name);
485 if (nameiter != mNames.end())
486 {
487 id = nameiter->second;
488 }
489 else
490 {
491 // mNames is static. This appends each name to static mDict only once
492 // in each run.
493 UShort len = name.length() * sizeof(wxStringCharType);
494
495 id = mNames.size();
496 mNames[name] = id;
497
499 WriteUShort( mDict, id );
500 WriteUShort( mDict, len );
501 mDict.AppendData(name.wx_str(), len);
502
503 mDictChanged = true;
504 }
505
506 WriteUShort( mBuffer, id );
507}
508
510{
511 return mDict;
512}
513
515{
516 return mBuffer;
517}
518
520{
521 return mBuffer.GetSize() == 0;
522}
523
525{
526 return mDictChanged;
527}
528
529// See ProjectFileIO::LoadProject() for explanation of the blockids arg
531{
532 if (handler == nullptr)
533 return false;
534
535 XMLTagHandlerAdapter adapter(handler);
536
537 std::vector<char> bytes;
538 IdMap mIds;
539 std::vector<IdMap> mIdStack;
540 char mCharSize = 0;
541
542 mIds.clear();
543
544 struct Error{}; // exception type for short-range try/catch
545 auto Lookup = [&mIds]( UShort id ) -> std::string_view
546 {
547 auto iter = mIds.find( id );
548 if (iter == mIds.end())
549 {
550 throw Error{};
551 }
552
553 return iter->second;
554 };
555
556 int64_t stringsCount = 0;
557 int64_t stringsLength = 0;
558
559 auto ReadString = [&mCharSize, &in, &bytes, &stringsCount, &stringsLength](int len) -> std::string
560 {
561 bytes.reserve( len );
562 auto data = bytes.data();
563 in.Read( data, len );
564
565 stringsCount++;
566 stringsLength += len;
567
568 switch (mCharSize)
569 {
570 case 1:
571 return std::string(bytes.data(), len);
572
573 case 2:
574 return FastStringConvert<char16_t>(bytes.data(), len);
575
576 case 4:
577 return FastStringConvert<char32_t>(bytes.data(), len);
578
579 default:
580 wxASSERT_MSG(false, wxT("Characters size not 1, 2, or 4"));
581 break;
582 }
583
584 return {};
585 };
586
587 try
588 {
589 while (!in.Eof())
590 {
591 UShort id;
592
593 switch (in.GetC())
594 {
595 case FT_Push:
596 {
597 mIdStack.push_back(mIds);
598 mIds.clear();
599 }
600 break;
601
602 case FT_Pop:
603 {
604 mIds = mIdStack.back();
605 mIdStack.pop_back();
606 }
607 break;
608
609 case FT_Name:
610 {
611 id = ReadUShort( in );
612 auto len = ReadUShort( in );
613 mIds[id] = ReadString(len);
614 }
615 break;
616
617 case FT_StartTag:
618 {
619 id = ReadUShort( in );
620
621 adapter.EmitStartTag(Lookup(id));
622 }
623 break;
624
625 case FT_EndTag:
626 {
627 id = ReadUShort( in );
628
629 adapter.EndTag(Lookup(id));
630 }
631 break;
632
633 case FT_String:
634 {
635 id = ReadUShort( in );
636 int len = ReadLength( in );
637
638 adapter.WriteAttr(Lookup(id), ReadString(len));
639 }
640 break;
641
642 case FT_Float:
643 {
644 float val;
645
646 id = ReadUShort( in );
647 in.Read(&val, sizeof(val));
648 /* int dig = */ReadDigits(in);
649
650 adapter.WriteAttr(Lookup(id), val);
651 }
652 break;
653
654 case FT_Double:
655 {
656 double val;
657
658 id = ReadUShort( in );
659 in.Read(&val, sizeof(val));
660 /*int dig = */ReadDigits(in);
661
662 adapter.WriteAttr(Lookup(id), val);
663 }
664 break;
665
666 case FT_Int:
667 {
668 id = ReadUShort( in );
669 int val = ReadInt( in );
670
671 adapter.WriteAttr(Lookup(id), val);
672 }
673 break;
674
675 case FT_Bool:
676 {
677 unsigned char val;
678
679 id = ReadUShort( in );
680 in.Read(&val, 1);
681
682 adapter.WriteAttr(Lookup(id), val);
683 }
684 break;
685
686 case FT_Long:
687 {
688 id = ReadUShort( in );
689 long val = ReadLong( in );
690
691 adapter.WriteAttr(Lookup(id), val);
692 }
693 break;
694
695 case FT_LongLong:
696 {
697 id = ReadUShort( in );
698 long long val = ReadLongLong( in );
699 adapter.WriteAttr(Lookup(id), val);
700 }
701 break;
702
703 case FT_SizeT:
704 {
705 id = ReadUShort( in );
706 size_t val = ReadULong( in );
707
708 adapter.WriteAttr(Lookup(id), val);
709 }
710 break;
711
712 case FT_Data:
713 {
714 int len = ReadLength( in );
715 adapter.WriteData(ReadString(len));
716 }
717 break;
718
719 case FT_Raw:
720 {
721 int len = ReadLength( in );
722 adapter.WriteRaw(ReadString(len));
723 }
724 break;
725
726 case FT_CharSize:
727 {
728 in.Read(&mCharSize, 1);
729 }
730 break;
731
732 default:
733 wxASSERT(true);
734 break;
735 }
736 }
737 }
738 catch( const Error& )
739 {
740 // Document was corrupt, or platform differences in size or endianness
741 // were not well canonicalized
742 return false;
743 }
744
745 wxLogInfo(
746 "Loaded %lld string %f Kb in size", stringsCount, stringsLength / 1024.0);
747
748 return adapter.Finalize();
749}
const TranslatableString name
Definition: Distortion.cpp:82
#define XO(s)
Definition: Internat.h:31
wxString FilePath
Definition: Project.h:20
@ FT_Int
@ FT_String
@ FT_EndTag
@ FT_Push
@ FT_Raw
@ FT_Float
@ FT_CharSize
@ FT_Bool
@ FT_Name
@ FT_Double
@ FT_Pop
@ FT_SizeT
@ FT_Data
@ FT_LongLong
@ FT_StartTag
@ FT_Long
std::unordered_map< unsigned short, std::string > IdMap
std::unordered_map< wxString, unsigned short > NameMap
int id
static std::once_flag flag
std::vector< Attribute > AttributesList
Definition: XMLTagHandler.h:40
A facade-like class, that implements buffered reading from the underlying data stream.
size_t Read(void *buffer, size_t maxBytes)
Read up to maxBytes into the buffer. Returns the number of bytes read.
A low overhead memory stream with O(1) append, low heap fragmentation and a linear memory view.
const size_t GetSize() const noexcept
void AppendData(const void *data, const size_t length)
void StartTag(const wxString &name) override
void EndTag(const wxString &name) override
static bool Decode(BufferedStreamReader &in, XMLTagHandler *handler)
static MemoryStream mDict
static TranslatableString FailureMessage(const FilePath &filePath)
const MemoryStream & GetData() const
static NameMap mNames
void WriteName(const wxString &name)
void WriteAttr(const wxString &name, const wxString &value) override
const MemoryStream & GetDict() const
void WriteData(const wxString &value) override
void Write(const wxString &data) override
MemoryStream mBuffer
ProjectSerializer(size_t allocSize=1024 *1024)
Holds a msgid for the translation catalog; may also bind format arguments.
A view into an attribute value. The class does not take the ownership of the data.
This class is an interface which should be implemented by classes which wish to be able to load and s...
Definition: XMLTagHandler.h:42
void WriteAttr(const std::string_view &name, std::string value)
auto end(const Ptr< Type, BaseDeleter > &p)
Enables range-for.
Definition: PackedArray.h:159
auto begin(const Ptr< Type, BaseDeleter > &p)
Enables range-for.
Definition: PackedArray.h:150
Number ReadLittleEndian(BufferedStreamReader &in)
std::string FastStringConvert(const void *bytes, int bytesCount)
void WriteLittleEndian(MemoryStream &out, Number value)
Number ReadBigEndian(BufferedStreamReader &in)
void WriteBigEndian(MemoryStream &out, Number value)