LineByLineDependencyScanner.cpp 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250
  1. /*
  2. * Copyright (c) Contributors to the Open 3D Engine Project.
  3. * For complete copyright and license terms please see the LICENSE at the root of this distribution.
  4. *
  5. * SPDX-License-Identifier: Apache-2.0 OR MIT
  6. *
  7. */
  8. #include "LineByLineDependencyScanner.h"
  9. #include "assetprocessor.h"
  10. #include "PotentialDependencies.h"
  11. namespace AssetProcessor
  12. {
  13. class RegexComplexityAssertAbsorber
  14. : public AZ::Debug::TraceMessageBus::Handler
  15. {
  16. public:
  17. RegexComplexityAssertAbsorber()
  18. {
  19. AZ::Debug::TraceMessageBus::Handler::BusConnect();
  20. }
  21. virtual ~RegexComplexityAssertAbsorber()
  22. {
  23. AZ::Debug::TraceMessageBus::Handler::BusDisconnect();
  24. }
  25. bool OnPreAssert(const char* /*fileName*/, int /*line*/, const char* /*func*/, const char* message) override
  26. {
  27. // Ignore the regex complexity assert, there's no reason for the asset processor to crash when running a complex regex.
  28. const char* complexityError = AZStd::Internal::RegexError(AZStd::regex_constants::error_complexity);
  29. if (strncmp(message, complexityError, strlen(complexityError)) == 0)
  30. {
  31. return true;
  32. }
  33. else
  34. {
  35. return false;
  36. }
  37. }
  38. };
  39. LineByLineDependencyScanner::SearchResult GlobalSearch(const AZStd::string& scanString, int maxScanIteration, const AZStd::regex& regex, AZStd::function<void(const AZStd::smatch&)> callback)
  40. {
  41. AZStd::smatch result;
  42. AZStd::string::const_iterator searchStart = scanString.begin();
  43. bool useScanTimeout = maxScanIteration > 0;
  44. // Some binary files can cause the regex system to emit an assert that they are too complex to scan.
  45. // There's no harm in this case failing here, so ignore that assert if it occurs.
  46. RegexComplexityAssertAbsorber assertAbsorber;
  47. while (AZStd::regex_search(searchStart, scanString.end(), result, regex) && (!useScanTimeout || maxScanIteration > 0))
  48. {
  49. callback(result);
  50. searchStart = result[0].second;
  51. --maxScanIteration;
  52. }
  53. if (useScanTimeout && maxScanIteration == 0)
  54. {
  55. return LineByLineDependencyScanner::SearchResult::ScanLimitHit;
  56. }
  57. return LineByLineDependencyScanner::SearchResult::Completed;
  58. }
  59. LineByLineDependencyScanner::SearchResult LineByLineDependencyScanner::ScanStringForMissingDependencies(
  60. const AZStd::string& scanString,
  61. int maxScanIteration,
  62. const AZStd::regex& subIdRegex,
  63. const AZStd::regex& uuidRegex,
  64. const AZStd::regex& pathRegex,
  65. PotentialDependencies& potentialDependencies)
  66. {
  67. SearchResult assetIdSearchResult = GlobalSearch(scanString, maxScanIteration, subIdRegex, [this, &potentialDependencies](const AZStd::smatch& assetIdMatchResult)
  68. {
  69. AZ::Uuid uuid(assetIdMatchResult[1].str().c_str());
  70. AZ::u32 subId = AZStd::stoi(assetIdMatchResult[3].str());
  71. AZ::Data::AssetId assetId(uuid, subId);
  72. AZStd::string assetIdAsInFile(AZStd::string::format("%s%s%s",
  73. assetIdMatchResult[1].str().c_str(),
  74. assetIdMatchResult[2].str().c_str(),
  75. assetIdMatchResult[3].str().c_str()));
  76. // If one asset ID appears multiple times, only report it once to avoid too much repetitive output.
  77. PotentialDependencyMetaData dependencyMetaData(assetIdAsInFile, shared_from_this());
  78. potentialDependencies.m_assetIds[assetId] = dependencyMetaData;
  79. });
  80. SearchResult uuidSearchResult = GlobalSearch(scanString, maxScanIteration, uuidRegex, [this, &potentialDependencies, &subIdRegex](const AZStd::smatch& uuidMatchResult)
  81. {
  82. AZStd::string uuidStr = uuidMatchResult[1].str();
  83. AZ::Uuid uuid(uuidStr.c_str());
  84. // If one UUID appears multiple times, only report it once to avoid too much repetitive output.
  85. AZStd::smatch assetIdResult;
  86. const AZStd::string original{ uuidMatchResult.m_original };
  87. if (AZStd::regex_search(original.begin(), original.end(), assetIdResult, subIdRegex)) // check to see if this UUID is part of it a full asset id (which would be caught above)
  88. {
  89. if (assetIdResult.position(0) > uuidMatchResult.position(0))
  90. {
  91. PotentialDependencyMetaData dependencyMetaData(uuidStr, shared_from_this());
  92. potentialDependencies.m_uuids[uuid] = dependencyMetaData;
  93. }
  94. }
  95. else
  96. {
  97. PotentialDependencyMetaData dependencyMetaData(uuidStr, shared_from_this());
  98. potentialDependencies.m_uuids[uuid] = dependencyMetaData;
  99. }
  100. });
  101. // We'll first break up the input string into blocks that *could* contain a path. This is a faster and simpler regex test
  102. // For each block, we'll do a quick string check to see if it contains a path separator or a file extension (.)
  103. // Only if we find one will we do the more expensive path regex check
  104. SearchResult pathSearchResult = GlobalSearch(scanString, maxScanIteration, AZStd::regex(R"~(([^:*?<>|" ]+))~"), [this, &maxScanIteration, &potentialDependencies, &pathRegex](const AZStd::smatch& matchResult)
  105. {
  106. AZStd::string stringSection = matchResult[1].str();
  107. if(stringSection.find('\\') != AZStd::string::npos || stringSection.find('/') != AZStd::string::npos || stringSection.find('.') != AZStd::string::npos)
  108. {
  109. return GlobalSearch(stringSection, maxScanIteration, pathRegex, [this, &potentialDependencies](const AZStd::smatch& pathMatchResult)
  110. {
  111. AZStd::string potentialPath = pathMatchResult[1].str();
  112. PotentialDependencyMetaData dependencyMetaData(potentialPath, shared_from_this());
  113. potentialDependencies.m_paths.insert(dependencyMetaData);
  114. });
  115. }
  116. return SearchResult::Completed;
  117. });
  118. // If any scan did not complete, return that result.
  119. // There should only be one warning per file.
  120. if (assetIdSearchResult != SearchResult::Completed)
  121. {
  122. return assetIdSearchResult;
  123. }
  124. if (uuidSearchResult != SearchResult::Completed)
  125. {
  126. return uuidSearchResult;
  127. }
  128. if (pathSearchResult != SearchResult::Completed)
  129. {
  130. return pathSearchResult;
  131. }
  132. return SearchResult::Completed;
  133. }
  134. // A UUID is groups of hexadecimal digits, that may or may not be separated every 8, 4, 4, 4, 12 characters by a dash.
  135. AZStd::string GetUUIDRegex()
  136. {
  137. const char validUUIDVals[] = R"([\da-fA-F])";
  138. AZStd::string uuidSearchString = AZStd::string::format("\\b(%s{8}-?%s{4}-?%s{4}-?%s{4}-?%s{12})",
  139. validUUIDVals,
  140. validUUIDVals,
  141. validUUIDVals,
  142. validUUIDVals,
  143. validUUIDVals);
  144. return uuidSearchString;
  145. }
  146. bool LineByLineDependencyScanner::ScanFileForPotentialDependencies(
  147. AZ::IO::GenericStream& fileStream,
  148. PotentialDependencies& potentialDependencies,
  149. int maxScanIteration)
  150. {
  151. // An empty file will have no missing dependencies.
  152. AZ::IO::SizeType length = fileStream.GetLength();
  153. if (length == 0)
  154. {
  155. return true;
  156. }
  157. AZStd::vector<char> charBuffer;
  158. charBuffer.resize_no_construct(length + 1);
  159. fileStream.Read(length, charBuffer.data());
  160. charBuffer.back() = 0;
  161. // Search the file line by line. This won't catch cases where a missing
  162. // dependency uses data from multiple lines, but the regexes in use here also wouldn't catch that.
  163. AZStd::vector<AZStd::string> fileLines;
  164. AzFramework::StringFunc::Tokenize(charBuffer.data(), fileLines, "\r\n");
  165. AZStd::string uuidRegexStr(GetUUIDRegex());
  166. AZStd::regex uuidRegex(AZStd::string::format("%s(\\b)", uuidRegexStr.c_str()));
  167. // The sub ID may be immediately after the UUID, or there may be a character separating, like }.
  168. // There is a colon or dash character that separates the sub ID from the asset ID.
  169. // The sub ID may or may not be wrapped in braces of some kind, like [5] or {4}.
  170. // This will match things like:
  171. // {A4844298-8495-4E2A-B587-C6E8ED9552AB}:5
  172. // aaaaaaaa84954E2AB587C6E8ED9552AB-[5]
  173. AZStd::regex subIdRegex(AZStd::string::format(R"(%s(.?[-:][\{\(\[]?)(\d+))", uuidRegexStr.c_str()));
  174. // Don't use a greedy search, a given line may have multiple start/end quotes, find the smallest
  175. // thing that looks like a path. This search won't find things that look like paths without file extensions.
  176. AZStd::regex pathRegex(R"(([\w\\/-]*?\.[\w\d\.-]*))");
  177. int currentLineIndex = 1; // Most file editing software starts at line 1, not 0.
  178. for (const AZStd::string& line : fileLines)
  179. {
  180. SearchResult searchResult = ScanStringForMissingDependencies(
  181. line,
  182. maxScanIteration,
  183. subIdRegex,
  184. uuidRegex,
  185. pathRegex,
  186. potentialDependencies);
  187. switch (searchResult)
  188. {
  189. case SearchResult::ScanLimitHit:
  190. // This doesn't print the actual line in question out because it's likely a line complex enough to hit this limit isn't going to be print friendly.
  191. AZ_Printf(AssetProcessor::ConsoleChannel,
  192. "\tFile will only be partially scanned, line %d matched more than the scan limit allows. To perform a more complete and lengthy scan, use the '--dependencyScanMaxIteration' setting.\n",
  193. currentLineIndex);
  194. break;
  195. default:
  196. break;
  197. }
  198. ++currentLineIndex;
  199. }
  200. return true;
  201. }
  202. bool LineByLineDependencyScanner::DoesScannerMatchFileData(AZ::IO::GenericStream& /*fileStream*/)
  203. {
  204. // This scanner can handle any file.
  205. return true;
  206. }
  207. bool LineByLineDependencyScanner::DoesScannerMatchFileExtension(const AZStd::string& /*fullPath*/)
  208. {
  209. // This scanner can handle any file.
  210. return true;
  211. }
  212. AZStd::string LineByLineDependencyScanner::GetVersion() const
  213. {
  214. return "1.0.0";
  215. }
  216. AZStd::string LineByLineDependencyScanner::GetName() const
  217. {
  218. return "Line by line scanner";
  219. }
  220. AZ::Crc32 LineByLineDependencyScanner::GetScannerCRC() const
  221. {
  222. return AZ::Crc32(GetName().c_str());
  223. }
  224. }