SpreadsheetReader.php 8.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349
  1. <?php
  2. /**
  3. * Main class for spreadsheet reading
  4. *
  5. * @version 0.5.10
  6. * @author Martins Pilsetnieks
  7. */
  8. class SpreadsheetReader implements SeekableIterator, Countable
  9. {
  10. const TYPE_XLSX = 'XLSX';
  11. const TYPE_XLS = 'XLS';
  12. const TYPE_CSV = 'CSV';
  13. const TYPE_ODS = 'ODS';
  14. private $Options = array(
  15. 'Delimiter' => '',
  16. 'Enclosure' => '"'
  17. );
  18. /**
  19. * @var int Current row in the file
  20. */
  21. private $Index = 0;
  22. /**
  23. * @var SpreadsheetReader_* Handle for the reader object
  24. */
  25. private $Handle = array();
  26. /**
  27. * @var TYPE_* Type of the contained spreadsheet
  28. */
  29. private $Type = false;
  30. /**
  31. * @param string Path to file
  32. * @param string Original filename (in case of an uploaded file), used to determine file type, optional
  33. * @param string MIME type from an upload, used to determine file type, optional
  34. */
  35. public function __construct($Filepath, $OriginalFilename = false, $MimeType = false)
  36. {
  37. if (!is_readable($Filepath))
  38. {
  39. throw new Exception('SpreadsheetReader: File ('.$Filepath.') not readable');
  40. }
  41. // To avoid timezone warnings and exceptions for formatting dates retrieved from files
  42. $DefaultTZ = @date_default_timezone_get();
  43. if ($DefaultTZ)
  44. {
  45. date_default_timezone_set($DefaultTZ);
  46. }
  47. // Checking the other parameters for correctness
  48. // This should be a check for string but we're lenient
  49. if (!empty($OriginalFilename) && !is_scalar($OriginalFilename))
  50. {
  51. throw new Exception('SpreadsheetReader: Original file (2nd parameter) path is not a string or a scalar value.');
  52. }
  53. if (!empty($MimeType) && !is_scalar($MimeType))
  54. {
  55. throw new Exception('SpreadsheetReader: Mime type (3nd parameter) path is not a string or a scalar value.');
  56. }
  57. // 1. Determine type
  58. if (!$OriginalFilename)
  59. {
  60. $OriginalFilename = $Filepath;
  61. }
  62. $Extension = strtolower(pathinfo($OriginalFilename, PATHINFO_EXTENSION));
  63. switch ($MimeType)
  64. {
  65. case 'text/csv':
  66. case 'text/comma-separated-values':
  67. case 'text/plain':
  68. $this -> Type = self::TYPE_CSV;
  69. break;
  70. case 'application/vnd.ms-excel':
  71. case 'application/msexcel':
  72. case 'application/x-msexcel':
  73. case 'application/x-ms-excel':
  74. case 'application/vnd.ms-excel':
  75. case 'application/x-excel':
  76. case 'application/x-dos_ms_excel':
  77. case 'application/xls':
  78. case 'application/xlt':
  79. case 'application/x-xls':
  80. // Excel does weird stuff
  81. if (in_array($Extension, array('csv', 'tsv', 'txt')))
  82. {
  83. $this -> Type = self::TYPE_CSV;
  84. }
  85. else
  86. {
  87. $this -> Type = self::TYPE_XLS;
  88. }
  89. break;
  90. case 'application/vnd.oasis.opendocument.spreadsheet':
  91. case 'application/vnd.oasis.opendocument.spreadsheet-template':
  92. $this -> Type = self::TYPE_ODS;
  93. break;
  94. case 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet':
  95. case 'application/vnd.openxmlformats-officedocument.spreadsheetml.template':
  96. case 'application/xlsx':
  97. case 'application/xltx':
  98. $this -> Type = self::TYPE_XLSX;
  99. break;
  100. case 'application/xml':
  101. // Excel 2004 xml format uses this
  102. break;
  103. }
  104. if (!$this -> Type)
  105. {
  106. switch ($Extension)
  107. {
  108. case 'xlsx':
  109. case 'xltx': // XLSX template
  110. case 'xlsm': // Macro-enabled XLSX
  111. case 'xltm': // Macro-enabled XLSX template
  112. $this -> Type = self::TYPE_XLSX;
  113. break;
  114. case 'xls':
  115. case 'xlt':
  116. $this -> Type = self::TYPE_XLS;
  117. break;
  118. case 'ods':
  119. case 'odt':
  120. $this -> Type = self::TYPE_ODS;
  121. break;
  122. default:
  123. $this -> Type = self::TYPE_CSV;
  124. break;
  125. }
  126. }
  127. // Pre-checking XLS files, in case they are renamed CSV or XLSX files
  128. if ($this -> Type == self::TYPE_XLS)
  129. {
  130. self::Load(self::TYPE_XLS);
  131. $this -> Handle = new SpreadsheetReader_XLS($Filepath);
  132. if ($this -> Handle -> Error)
  133. {
  134. $this -> Handle -> __destruct();
  135. if (is_resource($ZipHandle = zip_open($Filepath)))
  136. {
  137. $this -> Type = self::TYPE_XLSX;
  138. zip_close($ZipHandle);
  139. }
  140. else
  141. {
  142. $this -> Type = self::TYPE_CSV;
  143. }
  144. }
  145. }
  146. // 2. Create handle
  147. switch ($this -> Type)
  148. {
  149. case self::TYPE_XLSX:
  150. self::Load(self::TYPE_XLSX);
  151. $this -> Handle = new SpreadsheetReader_XLSX($Filepath);
  152. break;
  153. case self::TYPE_CSV:
  154. self::Load(self::TYPE_CSV);
  155. $this -> Handle = new SpreadsheetReader_CSV($Filepath, $this -> Options);
  156. break;
  157. case self::TYPE_XLS:
  158. // Everything already happens above
  159. break;
  160. case self::TYPE_ODS:
  161. self::Load(self::TYPE_ODS);
  162. $this -> Handle = new SpreadsheetReader_ODS($Filepath, $this -> Options);
  163. break;
  164. }
  165. }
  166. /**
  167. * Gets information about separate sheets in the given file
  168. *
  169. * @return array Associative array where key is sheet index and value is sheet name
  170. */
  171. public function Sheets()
  172. {
  173. return $this -> Handle -> Sheets();
  174. }
  175. /**
  176. * Changes the current sheet to another from the file.
  177. * Note that changing the sheet will rewind the file to the beginning, even if
  178. * the current sheet index is provided.
  179. *
  180. * @param int Sheet index
  181. *
  182. * @return bool True if sheet could be changed to the specified one,
  183. * false if not (for example, if incorrect index was provided.
  184. */
  185. public function ChangeSheet($Index)
  186. {
  187. return $this -> Handle -> ChangeSheet($Index);
  188. }
  189. /**
  190. * Autoloads the required class for the particular spreadsheet type
  191. *
  192. * @param TYPE_* Spreadsheet type, one of TYPE_* constants of this class
  193. */
  194. private static function Load($Type)
  195. {
  196. if (!in_array($Type, array(self::TYPE_XLSX, self::TYPE_XLS, self::TYPE_CSV, self::TYPE_ODS)))
  197. {
  198. throw new Exception('SpreadsheetReader: Invalid type ('.$Type.')');
  199. }
  200. // 2nd parameter is to prevent autoloading for the class.
  201. // If autoload works, the require line is unnecessary, if it doesn't, it ends badly.
  202. if (!class_exists('SpreadsheetReader_'.$Type, false))
  203. {
  204. require(dirname(__FILE__).DIRECTORY_SEPARATOR.'SpreadsheetReader_'.$Type.'.php');
  205. }
  206. }
  207. // !Iterator interface methods
  208. /**
  209. * Rewind the Iterator to the first element.
  210. * Similar to the reset() function for arrays in PHP
  211. */
  212. public function rewind()
  213. {
  214. $this -> Index = 0;
  215. if ($this -> Handle)
  216. {
  217. $this -> Handle -> rewind();
  218. }
  219. }
  220. /**
  221. * Return the current element.
  222. * Similar to the current() function for arrays in PHP
  223. *
  224. * @return mixed current element from the collection
  225. */
  226. public function current()
  227. {
  228. if ($this -> Handle)
  229. {
  230. return $this -> Handle -> current();
  231. }
  232. return null;
  233. }
  234. /**
  235. * Move forward to next element.
  236. * Similar to the next() function for arrays in PHP
  237. */
  238. public function next()
  239. {
  240. if ($this -> Handle)
  241. {
  242. $this -> Index++;
  243. return $this -> Handle -> next();
  244. }
  245. return null;
  246. }
  247. /**
  248. * Return the identifying key of the current element.
  249. * Similar to the key() function for arrays in PHP
  250. *
  251. * @return mixed either an integer or a string
  252. */
  253. public function key()
  254. {
  255. if ($this -> Handle)
  256. {
  257. return $this -> Handle -> key();
  258. }
  259. return null;
  260. }
  261. /**
  262. * Check if there is a current element after calls to rewind() or next().
  263. * Used to check if we've iterated to the end of the collection
  264. *
  265. * @return boolean FALSE if there's nothing more to iterate over
  266. */
  267. public function valid()
  268. {
  269. if ($this -> Handle)
  270. {
  271. return $this -> Handle -> valid();
  272. }
  273. return false;
  274. }
  275. // !Countable interface method
  276. public function count()
  277. {
  278. if ($this -> Handle)
  279. {
  280. return $this -> Handle -> count();
  281. }
  282. return 0;
  283. }
  284. /**
  285. * Method for SeekableIterator interface. Takes a posiiton and traverses the file to that position
  286. * The value can be retrieved with a `current()` call afterwards.
  287. *
  288. * @param int Position in file
  289. */
  290. public function seek($Position)
  291. {
  292. if (!$this -> Handle)
  293. {
  294. throw new OutOfBoundsException('SpreadsheetReader: No file opened');
  295. }
  296. $CurrentIndex = $this -> Handle -> key();
  297. if ($CurrentIndex != $Position)
  298. {
  299. if ($Position < $CurrentIndex || is_null($CurrentIndex) || $Position == 0)
  300. {
  301. $this -> rewind();
  302. }
  303. while ($this -> Handle -> valid() && ($Position > $this -> Handle -> key()))
  304. {
  305. $this -> Handle -> next();
  306. }
  307. if (!$this -> Handle -> valid())
  308. {
  309. throw new OutOfBoundsException('SpreadsheetError: Position '.$Position.' not found');
  310. }
  311. }
  312. return null;
  313. }
  314. }
  315. ?>