mmap.c 47 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625
  1. /*
  2. * fs/scfs/mmap.c
  3. *
  4. * Copyright (C) 2014 Samsung Electronics Co., Ltd.
  5. * Authors: Sunghwan Yun <sunghwan.yun@samsung.com>
  6. * Jongmin Kim <jm45.kim@samsung.com>
  7. * Sangwoo Lee <sangwoo2.lee@samsung.com>
  8. * Inbae Lee <inbae.lee@samsung.com>
  9. *
  10. * This program has been developed as a stackable file system based on
  11. * the WrapFS, which was written by:
  12. *
  13. * Copyright (C) 1997-2003 Erez Zadok
  14. * Copyright (C) 2001-2003 Stony Brook University
  15. * Copyright (C) 2004-2006 International Business Machines Corp.
  16. * Author(s): Michael A. Halcrow <mahalcro@us.ibm.com>
  17. * Michael C. Thompson <mcthomps@us.ibm.com>
  18. *
  19. * This program is free software: you can redistribute it and/or modify
  20. * it under the terms of the GNU General Public License as published by
  21. * the Free Software Foundation, either version 2 of the License, or
  22. * (at your option) any later version.
  23. *
  24. * This program is distributed in the hope that it will be useful, but
  25. * WITHOUT ANY WARRANTY; without even the implied warranty of
  26. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  27. * General Public License for more details.
  28. *
  29. * You should have received a copy of the GNU General Public License
  30. * along with this program. If not, see <http://www.gnu.org/licenses/>.
  31. */
  32. #include "scfs.h"
  33. #include <linux/lzo.h>
  34. #ifdef SCFS_ASYNC_READ_PAGES
  35. #include <linux/freezer.h>
  36. #include <linux/kthread.h>
  37. #include <linux/gfp.h>
  38. #include <linux/sched.h>
  39. #endif
  40. #ifdef SCFS_MULTI_THREAD_COMPRESSION
  41. extern struct kmem_cache *scfs_cbm_cache;
  42. #endif
  43. /**
  44. * scfs_readpage
  45. *
  46. * Parameters:
  47. * @file: upper file
  48. * @page: upper page from SCFS inode mapping, data will be copied in here
  49. *
  50. * Return:
  51. * SCFS_SUCCESS if success, otherwise if error
  52. *
  53. * Description:
  54. * - Read in a page by reading a cluster from the file's lower file.
  55. * (Reading in a cluster for just a single page read is inevitable, but this
  56. * "amplified read" and decompressing overhead should be amortized when
  57. * other pages in that same cluster is accessed later, and only incurs
  58. * memcpy from the cached cluster buffer.)
  59. * - Recently accessed clusters ("buffer_cache") are cached for later reads.
  60. */
  61. static inline int _scfs_readpage(struct file *file, struct page *page, int pref_index)
  62. {
  63. struct scfs_inode_info *sii = SCFS_I(page->mapping->host);
  64. struct scfs_sb_info *sbi = SCFS_S(page->mapping->host->i_sb);
  65. struct scfs_cluster_buffer buffer = {NULL, NULL, NULL, NULL, 0};
  66. int ret = 0, compressed = 0;
  67. int alloc_membuffer = 1;
  68. int allocated_index = -1;
  69. int i;
  70. char *virt;
  71. SCFS_PRINT("f:%s i:%d c:0x%x u:0x%x\n",
  72. file->f_path.dentry->d_name.name,
  73. page->index, buffer.c_buffer, buffer.u_buffer);
  74. ASSERT(sii->cluster_size <= SCFS_CLUSTER_SIZE_MAX);
  75. #ifdef SCFS_ASYNC_READ_PROFILE
  76. sbi->scfs_readpage_total_count++;
  77. #endif
  78. #if MAX_BUFFER_CACHE
  79. /* search buffer_cache first in case the cluster is left cached */
  80. if (pref_index >= 0 &&
  81. sbi->buffer_cache[pref_index].ino == sii->vfs_inode.i_ino &&
  82. sbi->buffer_cache[pref_index].clust_num ==
  83. PAGE_TO_CLUSTER_INDEX(page, sii) &&
  84. atomic_read(&sbi->buffer_cache[pref_index].is_used) != 1) {
  85. spin_lock(&sbi->buffer_cache_lock);
  86. /* this pref_index is used for another page */
  87. if (sbi->buffer_cache[pref_index].ino != sii->vfs_inode.i_ino ||
  88. sbi->buffer_cache[pref_index].clust_num !=
  89. PAGE_TO_CLUSTER_INDEX(page, sii) ||
  90. atomic_read(&sbi->buffer_cache[pref_index].is_used) == 1) {
  91. spin_unlock(&sbi->buffer_cache_lock);
  92. sbi->buffer_cache_reclaimed_before_used_count++;
  93. goto pick_slot;
  94. }
  95. atomic_set(&sbi->buffer_cache[pref_index].is_used, 1);
  96. spin_unlock(&sbi->buffer_cache_lock);
  97. virt = kmap_atomic(page);
  98. if (sbi->buffer_cache[pref_index].is_compressed)
  99. memcpy(virt, page_address(sbi->buffer_cache[pref_index].u_page) +
  100. PGOFF_IN_CLUSTER(page, sii) * PAGE_SIZE, PAGE_SIZE);
  101. else
  102. memcpy(virt, page_address(sbi->buffer_cache[pref_index].c_page) +
  103. PGOFF_IN_CLUSTER(page, sii) * PAGE_SIZE, PAGE_SIZE);
  104. atomic_set(&sbi->buffer_cache[pref_index].is_used, 0);
  105. kunmap_atomic(virt);
  106. SetPageUptodate(page);
  107. unlock_page(page);
  108. SCFS_PRINT("%s<h> %d\n",file->f_path.dentry->d_name.name, page->index);
  109. return pref_index + 1;
  110. } else if (pref_index >= 0) {
  111. sbi->buffer_cache_reclaimed_before_used_count++;
  112. goto pick_slot;
  113. }
  114. /* search buffer_cache first in case the cluster is left cached */
  115. for (i = 0; i < MAX_BUFFER_CACHE; i++) {
  116. if (sbi->buffer_cache[i].ino == sii->vfs_inode.i_ino &&
  117. sbi->buffer_cache[i].clust_num ==
  118. PAGE_TO_CLUSTER_INDEX(page, sii) &&
  119. atomic_read(&sbi->buffer_cache[i].is_used) != 1) {
  120. spin_lock(&sbi->buffer_cache_lock);
  121. if (sbi->buffer_cache[i].ino == sii->vfs_inode.i_ino &&
  122. sbi->buffer_cache[i].clust_num ==
  123. PAGE_TO_CLUSTER_INDEX(page, sii) &&
  124. atomic_read(&sbi->buffer_cache[i].is_used) == 1) {
  125. spin_unlock(&sbi->buffer_cache_lock);
  126. goto pick_slot;
  127. }
  128. atomic_set(&sbi->buffer_cache[i].is_used, 1);
  129. spin_unlock(&sbi->buffer_cache_lock);
  130. virt = kmap_atomic(page);
  131. if (sbi->buffer_cache[i].is_compressed)
  132. memcpy(virt, page_address(sbi->buffer_cache[i].u_page) +
  133. PGOFF_IN_CLUSTER(page, sii) * PAGE_SIZE, PAGE_SIZE);
  134. else
  135. memcpy(virt, page_address(sbi->buffer_cache[i].c_page) +
  136. PGOFF_IN_CLUSTER(page, sii) * PAGE_SIZE, PAGE_SIZE);
  137. atomic_set(&sbi->buffer_cache[i].is_used, 0);
  138. kunmap_atomic(virt);
  139. SetPageUptodate(page);
  140. unlock_page(page);
  141. SCFS_PRINT("%s<h> %d\n",
  142. file->f_path.dentry->d_name.name, page->index);
  143. return i + 1;
  144. }
  145. }
  146. pick_slot:
  147. /* pick a slot in buffer_cache to use */
  148. if (atomic_read(&sbi->buffer_cache[sbi->read_buffer_index].is_used) != 1) {
  149. spin_lock(&sbi->buffer_cache_lock);
  150. /* this index is used for another page */
  151. if (atomic_read(&sbi->buffer_cache[sbi->read_buffer_index].is_used) == 1) {
  152. spin_unlock(&sbi->buffer_cache_lock);
  153. goto pick_slot_full;
  154. }
  155. atomic_set(&sbi->buffer_cache[sbi->read_buffer_index].is_used, 1);
  156. allocated_index = sbi->read_buffer_index++;
  157. if (sbi->read_buffer_index >= MAX_BUFFER_CACHE)
  158. sbi->read_buffer_index = 0;
  159. spin_unlock(&sbi->buffer_cache_lock);
  160. buffer.c_page = sbi->buffer_cache[allocated_index].c_page;
  161. buffer.u_page = sbi->buffer_cache[allocated_index].u_page;
  162. sbi->buffer_cache[allocated_index].ino = sii->vfs_inode.i_ino;
  163. sbi->buffer_cache[allocated_index].clust_num =
  164. PAGE_TO_CLUSTER_INDEX(page, sii);
  165. alloc_membuffer = 0;
  166. goto real_io;
  167. }
  168. pick_slot_full:
  169. for (i = 0; i < MAX_BUFFER_CACHE; i++) {
  170. if (atomic_read(&sbi->buffer_cache[i].is_used) != 1) {
  171. spin_lock(&sbi->buffer_cache_lock);
  172. /* this index is used for another page */
  173. if (atomic_read(&sbi->buffer_cache[i].is_used) == 1) {
  174. spin_unlock(&sbi->buffer_cache_lock);
  175. continue;
  176. }
  177. atomic_set(&sbi->buffer_cache[i].is_used, 1);
  178. sbi->read_buffer_index = i + 1;
  179. if (sbi->read_buffer_index >= MAX_BUFFER_CACHE)
  180. sbi->read_buffer_index = 0;
  181. spin_unlock(&sbi->buffer_cache_lock);
  182. buffer.c_page = sbi->buffer_cache[i].c_page;
  183. buffer.u_page = sbi->buffer_cache[i].u_page;
  184. sbi->buffer_cache[i].ino = sii->vfs_inode.i_ino;
  185. sbi->buffer_cache[i].clust_num =
  186. PAGE_TO_CLUSTER_INDEX(page, sii);
  187. allocated_index = i;
  188. alloc_membuffer = 0;
  189. break;
  190. }
  191. }
  192. #endif
  193. real_io:
  194. #ifdef SCFS_ASYNC_READ_PROFILE
  195. sbi->scfs_readpage_io_count++;
  196. #endif
  197. /* sanity check & prepare buffers for scfs_read_cluster */
  198. if (alloc_membuffer == 1 && (buffer.c_page || buffer.c_buffer))
  199. ASSERT(0);
  200. if (!buffer.c_page)
  201. buffer.c_page = scfs_alloc_mempool_buffer(sbi);
  202. if (!buffer.c_page) {
  203. SCFS_PRINT_ERROR("c_page malloc failed\n");
  204. ret = -ENOMEM;
  205. goto out;
  206. }
  207. if (!buffer.c_buffer)
  208. buffer.c_buffer = page_address(buffer.c_page);
  209. if (!buffer.c_buffer) {
  210. SCFS_PRINT_ERROR("c_buffer malloc failed\n");
  211. ret = -ENOMEM;
  212. goto out;
  213. }
  214. if (!buffer.u_page)
  215. buffer.u_page = scfs_alloc_mempool_buffer(sbi);
  216. if (!buffer.u_page) {
  217. SCFS_PRINT_ERROR("u_page malloc failed\n");
  218. ret = -ENOMEM;
  219. goto out;
  220. }
  221. if (!buffer.u_buffer)
  222. buffer.u_buffer = page_address(buffer.u_page);
  223. if (!buffer.u_buffer) {
  224. SCFS_PRINT_ERROR("u_buffer malloc failed\n");
  225. ret = -ENOMEM;
  226. goto out;
  227. }
  228. /* read cluster from lower */
  229. ret = scfs_read_cluster(file, page, buffer.c_buffer, &buffer.u_buffer, &compressed);
  230. if (ret) {
  231. if (ret == -ERANGE)
  232. SCFS_PRINT_ERROR("file %s error on readpage, OOB. ret %x\n",
  233. file->f_path.dentry->d_name.name, ret);
  234. else
  235. SCFS_PRINT_ERROR("read cluster failed, "
  236. "file %s page->index %u ret %d\n",
  237. file->f_path.dentry->d_name.name, page->index, ret);
  238. goto out;
  239. }
  240. #if MAX_BUFFER_CACHE
  241. /* don't need to spinlock, we have is_used=1 for this buffer */
  242. if (alloc_membuffer != 1)
  243. sbi->buffer_cache[allocated_index].is_compressed = compressed;
  244. #endif
  245. #ifdef SCFS_REMOVE_NO_COMPRESSED_UPPER_MEMCPY
  246. /* fill page cache with the decompressed or original page */
  247. if (compressed) {
  248. virt = kmap_atomic(page);
  249. memcpy(virt, page_address(buffer.u_page) +
  250. PGOFF_IN_CLUSTER(page, sii) * PAGE_SIZE, PAGE_SIZE);
  251. kunmap_atomic(virt);
  252. }
  253. #else
  254. /* fill page cache with the decompressed/original data */
  255. virt = kmap_atomic(page);
  256. if (compressed)
  257. memcpy(virt, page_address(buffer.u_page) +
  258. PGOFF_IN_CLUSTER(page, sii) * PAGE_SIZE, PAGE_SIZE);
  259. else
  260. memcpy(virt, page_address(buffer.c_page) +
  261. PGOFF_IN_CLUSTER(page, sii) * PAGE_SIZE, PAGE_SIZE);
  262. kunmap_atomic(virt);
  263. #endif
  264. SetPageUptodate(page);
  265. #if MAX_BUFFER_CACHE
  266. #ifndef SCFS_REMOVE_NO_COMPRESSED_UPPER_MEMCPY
  267. if (alloc_membuffer != 1) {
  268. atomic_set(&sbi->buffer_cache[allocated_index].is_used, 0);
  269. }
  270. #else
  271. if (alloc_membuffer != 1 && compressed) {
  272. atomic_set(&sbi->buffer_cache[allocated_index].is_used, 0);
  273. } else if (alloc_membuffer != 1) {
  274. spin_lock(&sbi->buffer_cache_lock);
  275. sbi->buffer_cache[allocated_index].ino = -1;
  276. sbi->buffer_cache[allocated_index].clust_num = -1;
  277. sbi->buffer_cache[allocated_index].is_compressed = -1;
  278. atomic_set(&sbi->buffer_cache[allocated_index].is_used, -1);
  279. spin_unlock(&sbi->buffer_cache_lock);
  280. }
  281. #endif
  282. #endif
  283. out:
  284. unlock_page(page);
  285. if (alloc_membuffer == 1) {
  286. sbi->buffer_cache_overflow_count_smb++;
  287. scfs_free_mempool_buffer(buffer.c_page, sbi);
  288. scfs_free_mempool_buffer(buffer.u_page, sbi);
  289. }
  290. SCFS_PRINT("-f:%s i:%d c:0x%x u:0x%x\n",
  291. file->f_path.dentry->d_name.name,
  292. page->index, buffer.c_buffer, buffer.u_buffer);
  293. SCFS_PRINT("%s<r> %d\n",file->f_path.dentry->d_name.name, page->index);
  294. if (ret < 0)
  295. return ret;
  296. else if (alloc_membuffer != 1)
  297. return allocated_index + 1;
  298. else
  299. return 0;
  300. }
  301. static int scfs_readpage(struct file *file, struct page *page)
  302. {
  303. int ret;
  304. #ifdef SCFS_ASYNC_READ_PROFILE
  305. struct scfs_sb_info *sbi = SCFS_S(file->f_mapping->host->i_sb);
  306. atomic_inc(&sbi->scfs_standby_readpage_count);
  307. #endif
  308. ret = _scfs_readpage(file, page, -1);
  309. #ifdef SCFS_ASYNC_READ_PROFILE
  310. atomic_dec(&sbi->scfs_standby_readpage_count);
  311. #endif
  312. return (ret >= 0 ? 0 : ret);
  313. }
  314. #ifdef SCFS_ASYNC_READ_PAGES
  315. int smb_init(struct scfs_sb_info *sbi)
  316. {
  317. int i, j;
  318. for (i = 0; i < NR_CPUS; i++) {
  319. sbi->smb_task[i] = kthread_run(smb_thread, sbi, "scfs_mb%d", i);
  320. if (IS_ERR(sbi->smb_task[i])) {
  321. SCFS_PRINT_ERROR("smb_init: creating kthread failed\n");
  322. for (j = 0; j < i; j++)
  323. kthread_stop(sbi->smb_task[j]);
  324. return -ENOMEM;
  325. }
  326. #ifdef SCFS_SMB_THREAD_CPU_AFFINITY
  327. {
  328. struct cpumask cpus[NR_CPUS];
  329. cpumask_clear(&cpus[i]);
  330. cpumask_set_cpu(i, &cpus[i]);
  331. if (sched_setaffinity(sbi->smb_task[i]->pid, &cpus[i])) {
  332. SCFS_PRINT_ERROR("smb_init: set CPU affinity failed\n");
  333. }
  334. }
  335. #endif
  336. }
  337. return 0;
  338. }
  339. void smb_destroy(struct scfs_sb_info *sbi)
  340. {
  341. int i;
  342. for(i = 0 ; i < NR_CPUS ; i++) {
  343. if(sbi->smb_task[i])
  344. kthread_stop(sbi->smb_task[i]);
  345. sbi->smb_task[i] = NULL;
  346. }
  347. }
  348. /* scaling # of threads will be woken up, on-demand */
  349. void wakeup_smb_thread(struct scfs_sb_info *sbi)
  350. {
  351. u32 length = 0, io_index, filling_index;
  352. spin_lock(&sbi->spinlock_smb);
  353. io_index = sbi->page_buffer_next_io_index_smb;
  354. filling_index = sbi->page_buffer_next_filling_index_smb;
  355. spin_unlock(&sbi->spinlock_smb);
  356. if (filling_index == MAX_PAGE_BUFFER_SIZE_SMB)
  357. length = MAX_PAGE_BUFFER_SIZE_SMB;
  358. else if (filling_index > io_index)
  359. length = filling_index - io_index;
  360. else if (filling_index < io_index)
  361. length = (MAX_PAGE_BUFFER_SIZE_SMB - io_index) + filling_index;
  362. else if (filling_index == io_index)
  363. length = 0;
  364. if (length > 0 && sbi->smb_task[0] && !sbi->smb_task_status[0])
  365. wake_up_process(sbi->smb_task[0]);
  366. #if (NR_CPUS > 1)
  367. if (length >= SMB_THREAD_THRESHOLD_2 &&sbi->smb_task[1] && !sbi->smb_task_status[1])
  368. wake_up_process(sbi->smb_task[1]);
  369. #if (NR_CPUS > 2)
  370. if (length >= SMB_THREAD_THRESHOLD_3 && sbi->smb_task[2] && !sbi->smb_task_status[2])
  371. wake_up_process(sbi->smb_task[2]);
  372. #if (NR_CPUS > 3)
  373. if (length >= SMB_THREAD_THRESHOLD_4 && sbi->smb_task[3] && !sbi->smb_task_status[3])
  374. wake_up_process(sbi->smb_task[3]);
  375. #endif
  376. #endif
  377. #endif
  378. }
  379. int smb_thread(void *data)
  380. {
  381. u32 length = 0, io_index, filling_index;
  382. struct scfs_sb_info *sbi = (struct scfs_sb_info *)data;
  383. struct page *page;
  384. struct page *temp_page;
  385. struct page *page_buffer[3] = {NULL, NULL, NULL};
  386. struct file *file;
  387. struct file *temp_file = NULL;
  388. struct scfs_inode_info *sii;
  389. int cluster_number = -1;
  390. int page_buffer_count = 0;
  391. int i;
  392. int prev_cbi = 0;
  393. set_freezable();
  394. /* handle any queued-up read requests, or else go back to sleep */
  395. while (!kthread_should_stop()) {
  396. set_current_state(TASK_INTERRUPTIBLE);
  397. spin_lock(&sbi->spinlock_smb);
  398. /* calculate number of pages of page buffer */
  399. io_index = sbi->page_buffer_next_io_index_smb;
  400. filling_index = sbi->page_buffer_next_filling_index_smb;
  401. if (filling_index == MAX_PAGE_BUFFER_SIZE_SMB) {
  402. length = MAX_PAGE_BUFFER_SIZE_SMB;
  403. sbi->page_buffer_next_filling_index_smb =
  404. sbi->page_buffer_next_io_index_smb;
  405. } else if (filling_index > io_index)
  406. length = filling_index - io_index;
  407. else if (filling_index < io_index)
  408. length = (MAX_PAGE_BUFFER_SIZE_SMB - io_index) + filling_index;
  409. else if (filling_index == io_index)
  410. length = 0;
  411. page_buffer_count = 0;
  412. /* the requested page, as well as subsequent pages in the same cluster,
  413. * will be serviced, in two separate readpage calls
  414. */
  415. if (length > 0) {
  416. __set_current_state(TASK_RUNNING);
  417. page = sbi->page_buffer_smb[sbi->page_buffer_next_io_index_smb];
  418. file = sbi->file_buffer_smb[sbi->page_buffer_next_io_index_smb];
  419. sbi->page_buffer_next_io_index_smb++;
  420. if (sbi->page_buffer_next_io_index_smb >= MAX_PAGE_BUFFER_SIZE_SMB)
  421. sbi->page_buffer_next_io_index_smb = 0;
  422. length--;
  423. sii = SCFS_I(page->mapping->host);
  424. cluster_number = PAGE_TO_CLUSTER_INDEX(page, sii);
  425. while (length-- > 0) {
  426. temp_page = sbi->page_buffer_smb[sbi->page_buffer_next_io_index_smb];
  427. temp_file = sbi->file_buffer_smb[sbi->page_buffer_next_io_index_smb];
  428. if ((temp_file == file) &&
  429. (cluster_number == PAGE_TO_CLUSTER_INDEX(temp_page, sii))) {
  430. page_buffer[page_buffer_count++] = temp_page;
  431. sbi->page_buffer_next_io_index_smb++;
  432. if (sbi->page_buffer_next_io_index_smb >=
  433. MAX_PAGE_BUFFER_SIZE_SMB)
  434. sbi->page_buffer_next_io_index_smb = 0;
  435. } else
  436. break;
  437. }
  438. spin_unlock(&sbi->spinlock_smb);
  439. /* read first page */
  440. prev_cbi = _scfs_readpage(file, page, -1);
  441. fput(SCFS_F(file)->lower_file);
  442. fput(file);
  443. page_cache_release(page);
  444. /* read related pages with cluster of first page*/
  445. for (i = 0; i < page_buffer_count; i++) {
  446. prev_cbi = _scfs_readpage(file, page_buffer[i], prev_cbi - 1);
  447. fput(SCFS_F(file)->lower_file);
  448. fput(file);
  449. page_cache_release(page_buffer[i]);
  450. }
  451. } else {
  452. //sbi->smb_task_status[xx] = 0;
  453. spin_unlock(&sbi->spinlock_smb);
  454. schedule();
  455. //sbi->smb_task_status[xx] = 1;
  456. }
  457. }
  458. return 0;
  459. }
  460. /**
  461. * scfs_readpages
  462. *
  463. * Parameters:
  464. * @file: upper file
  465. * @*mapping: address_space struct for the file
  466. * @*pages: list of pages to read in
  467. * @nr_pages: number of pages to read in
  468. *
  469. * Return:
  470. * SCFS_SUCCESS if success, otherwise if error
  471. *
  472. * Description:
  473. * - Asynchronously read pages for readahead. A scaling number of background threads
  474. * will read & decompress them in a slightly deferred but parallelized manner.
  475. */
  476. static int
  477. scfs_readpages(struct file *file, struct address_space *mapping,
  478. struct list_head *pages, unsigned nr_pages)
  479. {
  480. struct scfs_inode_info *sii = SCFS_I(file->f_mapping->host);
  481. struct scfs_sb_info *sbi = SCFS_S(file->f_mapping->host->i_sb);
  482. struct file *lower_file = NULL;
  483. struct page *page;
  484. struct scfs_cinfo cinfo;
  485. loff_t i_size;
  486. pgoff_t start, end;
  487. int page_idx, page_idx_readahead = 1024, ret = 0;
  488. int readahead_page = 0;
  489. int prev_cbi = 0;
  490. int prev_cluster = -1, cur_cluster = -1;
  491. int cluster_idx = 0;
  492. i_size = i_size_read(&sii->vfs_inode);
  493. if (!i_size) {
  494. SCFS_PRINT("file %s: i_size is zero, "
  495. "flags 0x%x sii->clust_info_size %d\n",
  496. file->f_path.dentry->d_name.name, sii->flags,
  497. sii->cinfo_array_size);
  498. return 0;
  499. }
  500. #ifdef SCFS_ASYNC_READ_PROFILE
  501. atomic_add(nr_pages, &sbi->scfs_standby_readpage_count);
  502. #endif
  503. #ifdef SCFS_NOTIFY_RANDOM_READ
  504. lower_file = scfs_lower_file(file);
  505. if (!lower_file) {
  506. SCFS_PRINT_ERROR("file %s: lower file is null!\n",
  507. file->f_path.dentry->d_name.name);
  508. return -EINVAL;
  509. }
  510. /* if the read request was random (enough), hint it to the lower file.
  511. * scfs_sequential_page_number is the tunable threshold.
  512. * filemap.c will later on refer to this FMODE_RANDOM flag.
  513. */
  514. spin_lock(&lower_file->f_lock);
  515. if (nr_pages > sbi->scfs_sequential_page_number)
  516. lower_file->f_mode &= ~FMODE_RANDOM;
  517. else
  518. lower_file->f_mode |= FMODE_RANDOM;
  519. spin_unlock(&lower_file->f_lock);
  520. #endif
  521. lower_file = scfs_lower_file(file);
  522. page = list_entry(pages->prev, struct page, lru);
  523. cluster_idx = page->index / (sii->cluster_size / PAGE_SIZE);
  524. if (sii->compressed) {
  525. mutex_lock(&sii->cinfo_mutex);
  526. ret = get_cluster_info(file, cluster_idx, &cinfo);
  527. mutex_unlock(&sii->cinfo_mutex);
  528. if (ret) {
  529. SCFS_PRINT_ERROR("err in get_cluster_info, ret : %d,"
  530. "i_size %lld\n", ret, i_size);
  531. return ret;
  532. }
  533. if (!cinfo.size || cinfo.size > sii->cluster_size) {
  534. SCFS_PRINT_ERROR("file %s: cinfo is invalid, "
  535. "clust %u cinfo.size %u\n",
  536. file->f_path.dentry->d_name.name,
  537. cluster_idx, cinfo.size);
  538. return -EINVAL;
  539. }
  540. start = (pgoff_t)(cinfo.offset / PAGE_SIZE);
  541. } else {
  542. start = (pgoff_t)(cluster_idx * sii->cluster_size / PAGE_SIZE);
  543. }
  544. cluster_idx = (page->index + nr_pages - 1) / (sii->cluster_size / PAGE_SIZE);
  545. if (sii->compressed) {
  546. mutex_lock(&sii->cinfo_mutex);
  547. ret = get_cluster_info(file, cluster_idx, &cinfo);
  548. mutex_unlock(&sii->cinfo_mutex);
  549. if (ret) {
  550. SCFS_PRINT_ERROR("err in get_cluster_info, ret : %d,"
  551. "i_size %lld\n", ret, i_size);
  552. return ret;
  553. }
  554. if (!cinfo.size || cinfo.size > sii->cluster_size) {
  555. SCFS_PRINT_ERROR("file %s: cinfo is invalid, "
  556. "clust %u cinfo.size %u\n",
  557. file->f_path.dentry->d_name.name,
  558. cluster_idx, cinfo.size);
  559. return -EINVAL;
  560. }
  561. end = (pgoff_t)((cinfo.offset + cinfo.size -1) / PAGE_SIZE);
  562. } else {
  563. end = (pgoff_t)(((cluster_idx + 1) * sii->cluster_size - 1) / PAGE_SIZE);
  564. /* check upper inode size */
  565. /* out of range? on compressed file, it is handled returning error,
  566. which one is right? */
  567. if (end > (i_size / PAGE_SIZE))
  568. end = (i_size / PAGE_SIZE);
  569. }
  570. force_page_cache_readahead(lower_file->f_mapping, lower_file,
  571. start, (unsigned long)(end - start +1));
  572. for (page_idx = 0; page_idx < nr_pages; page_idx++) {
  573. page = list_entry(pages->prev, struct page, lru);
  574. list_del(&page->lru);
  575. if (PageReadahead(page))
  576. page_idx_readahead = page_idx;
  577. ret = add_to_page_cache_lru(page, mapping,
  578. page->index, GFP_KERNEL);
  579. if (ret) {
  580. SCFS_PRINT("adding to page cache failed, "
  581. "page %x page->idx %d ret %d\n",
  582. page, page->index, ret);
  583. page_cache_release(page);
  584. continue;
  585. }
  586. /* memory buffer is full or synchronous read request -
  587. call scfs_readpage to read now */
  588. if (sbi->page_buffer_next_filling_index_smb ==
  589. MAX_PAGE_BUFFER_SIZE_SMB || page_idx < page_idx_readahead) {
  590. cur_cluster = PAGE_TO_CLUSTER_INDEX(page, sii);
  591. if (prev_cluster == cur_cluster && prev_cbi > 0)
  592. prev_cbi = _scfs_readpage(file, page, prev_cbi - 1);
  593. else
  594. prev_cbi = _scfs_readpage(file, page, -1);
  595. prev_cluster = cur_cluster;
  596. page_cache_release(page); /* refer line 701 */
  597. } else {
  598. spin_lock(&sbi->spinlock_smb);
  599. /* Queue is not full so add the page into the queue.
  600. Also, here we increase file->f_count to protect
  601. the file structs from multi-threaded accesses */
  602. atomic_long_inc(&SCFS_F(file)->lower_file->f_count);
  603. atomic_long_inc(&file->f_count);
  604. sbi->page_buffer_smb[sbi->page_buffer_next_filling_index_smb] = page;
  605. sbi->file_buffer_smb[sbi->page_buffer_next_filling_index_smb++] = file;
  606. /* check whether page buffer is full and set page buffer full if needed */
  607. if (((sbi->page_buffer_next_filling_index_smb == MAX_PAGE_BUFFER_SIZE_SMB) &&
  608. sbi->page_buffer_next_io_index_smb == 0) ||
  609. (sbi->page_buffer_next_filling_index_smb ==
  610. sbi->page_buffer_next_io_index_smb))
  611. sbi->page_buffer_next_filling_index_smb = MAX_PAGE_BUFFER_SIZE_SMB;
  612. else if (sbi->page_buffer_next_filling_index_smb == MAX_PAGE_BUFFER_SIZE_SMB)
  613. sbi->page_buffer_next_filling_index_smb = 0;
  614. spin_unlock(&sbi->spinlock_smb);
  615. ++readahead_page;
  616. }
  617. //page_cache_release(page);
  618. }
  619. if (readahead_page > 0)
  620. wakeup_smb_thread(sbi);
  621. SCFS_PRINT("<e>\n");
  622. #ifdef SCFS_ASYNC_READ_PROFILE
  623. atomic_sub(nr_pages, &sbi->scfs_standby_readpage_count);
  624. #endif
  625. return 0;
  626. }
  627. #endif /* SCFS_ASYNC_READ_PAGES */
  628. /**
  629. * scfs_write_begin
  630. * @file: The scfs file
  631. * @mapping: The file's address_space
  632. * @pos: The write starting position, in bytes
  633. * @len: Bytes to write in this page
  634. * @flags: Various flags
  635. * @pagep: Pointer to return the page
  636. * @fsdata: Pointer to return fs-specific data (unused)
  637. *
  638. * Description:
  639. * - Prepare a page write, which may require a cluster read and re-compression
  640. * for partially written clusters at the end of a given file. Cluster info list,
  641. * as well as the cluster buffer for the cluster to be written, shall be prepped
  642. * accordingly.
  643. * - Currently SCFS doesn't support random writes, so this function will return
  644. * -EINVAL if pos < i_size.
  645. *
  646. */
  647. static int scfs_write_begin(struct file *file, struct address_space *mapping,
  648. loff_t pos, unsigned len, unsigned flags,
  649. struct page **pagep, void **fsdata)
  650. {
  651. pgoff_t index = pos >> PAGE_CACHE_SHIFT;
  652. struct page *page;
  653. struct file *lower_file = NULL;
  654. struct scfs_inode_info *sii = SCFS_I(mapping->host);
  655. struct scfs_cinfo clust_info;
  656. int ret = 0;
  657. SCFS_PRINT("f:%s pos:%lld, len:%d s:%lld\n",
  658. file->f_path.dentry->d_name.name, pos, len,
  659. i_size_read(&sii->vfs_inode));
  660. page = grab_cache_page_write_begin(mapping, index, flags);
  661. if (!page)
  662. return -ENOMEM;
  663. *pagep = page;
  664. if (pos != i_size_read(&sii->vfs_inode)) {
  665. SCFS_PRINT("File %s RANDOM write access! pos = %lld, i_size = %lld\n",
  666. file->f_path.dentry->d_name.name,pos,i_size_read(&sii->vfs_inode));
  667. ret = -EINVAL;
  668. goto out;
  669. }
  670. lower_file = scfs_lower_file(file);
  671. if (!lower_file) {
  672. SCFS_PRINT_ERROR("lower file is null!\n");
  673. ret = -EIO;
  674. goto out;
  675. }
  676. if (IS_COMPRESSABLE(sii)) {
  677. struct cinfo_entry *info_entry;
  678. ret = scfs_get_comp_buffer(sii);
  679. if (ret)
  680. goto out;
  681. mutex_lock(&sii->cinfo_mutex);
  682. if (list_empty(&sii->cinfo_list)) {
  683. /* first cluster write, probably, since create or open */
  684. info_entry = scfs_alloc_cinfo_entry(PAGE_TO_CLUSTER_INDEX(page,sii), sii);
  685. if (!info_entry) {
  686. mutex_unlock(&sii->cinfo_mutex);
  687. SCFS_PRINT_ERROR("Cannot alloc new cluster_info.");
  688. ret = -ENOMEM;
  689. goto out;
  690. }
  691. if (PAGE_TO_CLUSTER_INDEX(page,sii) == 0)
  692. info_entry->cinfo.offset = 0;
  693. /* lower cluster already exists so we must be writing on the last cluster */
  694. if (IS_CLUSTER_EXIST(sii, PAGE_TO_CLUSTER_INDEX(page,sii))) {
  695. ret = get_cluster_info(file, PAGE_TO_CLUSTER_INDEX(page,sii),
  696. &clust_info);
  697. if (ret) {
  698. mutex_unlock(&sii->cinfo_mutex);
  699. SCFS_PRINT_ERROR("page is in file, " \
  700. "but cannot get cluster info.");
  701. goto out;
  702. }
  703. info_entry->cinfo.offset = clust_info.offset;
  704. ret = scfs_get_cluster_from_lower(file, clust_info);
  705. if (ret) {
  706. mutex_unlock(&sii->cinfo_mutex);
  707. SCFS_PRINT_ERROR("Fail to get lower data.");
  708. goto out;
  709. }
  710. if (!PageUptodate(page))
  711. sync_page_from_buffer(page, sii->cluster_buffer.u_buffer);
  712. } else if (PAGE_TO_CLUSTER_INDEX(page,sii) > 0
  713. && IS_CLUSTER_EXIST(sii, PAGE_TO_CLUSTER_INDEX(page,sii) - 1)) {
  714. /* we must be adding a new cluster with this page write */
  715. ret = get_cluster_info(file, PAGE_TO_CLUSTER_INDEX(page,sii) - 1,
  716. &clust_info);
  717. if (ret) {
  718. mutex_unlock(&sii->cinfo_mutex);
  719. SCFS_PRINT_ERROR("page is in file, but cannot get cluster info.");
  720. goto out;
  721. }
  722. info_entry->cinfo.offset = clust_info.offset+clust_info.size;
  723. if (clust_info.size%SCFS_CLUSTER_ALIGN_BYTE) {
  724. info_entry->cinfo.offset += (SCFS_CLUSTER_ALIGN_BYTE -
  725. (clust_info.size%SCFS_CLUSTER_ALIGN_BYTE));
  726. }
  727. }
  728. } else {
  729. /* cinfo list is not empty, cluster writes must have happened */
  730. struct cinfo_entry *new_list = NULL;
  731. if (!PageUptodate(page) && pos & (PAGE_CACHE_SIZE - 1)) {
  732. SCFS_PRINT_ERROR("Current page was reclaimed " \
  733. "before be written to lower\n");
  734. ASSERT(0);
  735. }
  736. info_entry = list_entry(sii->cinfo_list.prev,
  737. struct cinfo_entry, entry);
  738. if (info_entry->current_cluster_idx != PAGE_TO_CLUSTER_INDEX(page,sii)) {
  739. if (info_entry->current_cluster_idx ==
  740. PAGE_TO_CLUSTER_INDEX(page,sii) - 1)
  741. new_list = scfs_alloc_cinfo_entry(PAGE_TO_CLUSTER_INDEX(page,sii),
  742. sii);
  743. else
  744. info_entry = NULL;
  745. }
  746. if (!info_entry) {
  747. mutex_unlock(&sii->cinfo_mutex);
  748. SCFS_PRINT_ERROR("Cannot alloc new cluster_info.");
  749. ret = -ENOMEM;
  750. goto out;
  751. }
  752. #ifndef SCFS_MULTI_THREAD_COMPRESSION
  753. if (new_list) {
  754. new_list->cinfo.offset =
  755. info_entry->cinfo.offset +
  756. info_entry->cinfo.size;
  757. if (info_entry->cinfo.size % SCFS_CLUSTER_ALIGN_BYTE)
  758. new_list->cinfo.offset +=
  759. (SCFS_CLUSTER_ALIGN_BYTE -
  760. (info_entry->cinfo.size % SCFS_CLUSTER_ALIGN_BYTE));
  761. }
  762. #endif
  763. }
  764. mutex_unlock(&sii->cinfo_mutex);
  765. } else {
  766. /* for uncompressable files, we need at least one cinfo_entry in the list
  767. because write_meta depends on it, when determining whether to
  768. append with scfs_meta. */
  769. if (list_empty(&sii->cinfo_list)) {
  770. struct cinfo_entry *info_entry = NULL;
  771. info_entry = scfs_alloc_cinfo_entry(PAGE_TO_CLUSTER_INDEX(page,sii),
  772. sii);
  773. sii->cluster_buffer.original_size = 0;
  774. }
  775. if (!PageUptodate(page)) {
  776. unsigned pos_in_page = pos & (PAGE_CACHE_SIZE - 1);
  777. //TODO: read existing page data from lower if page is not up-to-date
  778. if (pos_in_page) {
  779. char *source_addr;
  780. loff_t lower_pos;
  781. lower_pos = pos - pos_in_page;
  782. source_addr = (char*)kmap(page);
  783. ret = scfs_lower_read(lower_file, source_addr, pos_in_page, &lower_pos);
  784. if (ret < 0) {
  785. SCFS_PRINT_ERROR("read fail. ret = %d, size=%d\n", ret, len);
  786. lower_pos -= ret;
  787. } else
  788. ret = 0;
  789. kunmap(page);
  790. }
  791. }
  792. }
  793. SetPageUptodate(page);
  794. out:
  795. if (unlikely(ret)) {
  796. unlock_page(page);
  797. page_cache_release(page);
  798. *pagep = NULL;
  799. }
  800. return ret;
  801. }
  802. extern struct kmem_cache *scfs_info_entry_list;
  803. /**
  804. * scfs_write_end
  805. * @file: The scfs file
  806. * @mapping: The file's address_space
  807. * @pos: The write starting position, in bytes
  808. * @len: Bytes to write in this page
  809. * @copied: Bytes actually written
  810. * @page: Page to be written
  811. * @fsdata: Fs-specific data (unused)
  812. *
  813. * Description:
  814. * - Finishes page write via scfs_lower_write. If the lower folder/partition
  815. * is actually out of space and thus unable to satisfy the write request,
  816. * write will fail by returning -ENOSPC.
  817. * - If a cluster is ready for write and compressable, then we compress it
  818. * before writing it.
  819. * - Note that this will only finish writing the data given from the user, and
  820. * SCFS metadata will be kept in-memory until scfs_put_lower_file (via fput())
  821. * writes it down in a deferred manner.
  822. *
  823. */
  824. static int scfs_write_end(struct file *file, struct address_space *mapping,
  825. loff_t pos, unsigned len, unsigned copied,
  826. struct page *page, void *fsdata)
  827. {
  828. unsigned from = pos & (PAGE_CACHE_SIZE - 1);
  829. unsigned to = from + copied;
  830. struct scfs_inode_info *sii = SCFS_I(mapping->host);
  831. struct file *lower_file = NULL;
  832. struct scfs_sb_info *sb_info = SCFS_S(sii->vfs_inode.i_sb);
  833. loff_t lower_pos;
  834. int ret;
  835. struct cinfo_entry *info_entry = NULL;
  836. size_t tmp_len;
  837. SCFS_PRINT("f:%s pos:%lld, len:%d s:%lld i:%d\n",
  838. file->f_path.dentry->d_name.name, pos, copied,
  839. i_size_read(&sii->vfs_inode), page->index);
  840. lower_file = scfs_lower_file(file);
  841. if (!lower_file) {
  842. SCFS_PRINT_ERROR("lower file is null!\n");
  843. ret = -EINVAL;
  844. goto out;
  845. }
  846. if (IS_COMPRESSABLE(sii)) {
  847. ret = scfs_get_comp_buffer(sii);
  848. if (ret)
  849. goto out;
  850. sii->cluster_buffer.original_size += copied;
  851. atomic64_add(copied, &sb_info->current_data_size);
  852. ret = scfs_check_space(sb_info, file->f_dentry);
  853. if(ret < 0) {
  854. SCFS_PRINT_ERROR("No more space in lower-storage\n");
  855. goto out;
  856. }
  857. sync_page_to_buffer(page, sii->cluster_buffer.u_buffer);
  858. if (PGOFF_IN_CLUSTER(page, sii) + 1 == sii->cluster_size / PAGE_CACHE_SIZE &&
  859. to == PAGE_CACHE_SIZE) {
  860. #ifdef SCFS_MULTI_THREAD_COMPRESSION
  861. struct scfs_cluster_buffer_mtc *cbm = kmem_cache_alloc(scfs_cbm_cache, GFP_KERNEL);
  862. if (!cbm) {
  863. SCFS_PRINT_ERROR("scfs_cluster_buffer_mtc alloc failed\n");
  864. ret = -ENOMEM;
  865. goto out;
  866. }
  867. memcpy(&cbm->entry, &sii->cluster_buffer, sizeof(struct scfs_cluster_buffer));
  868. cbm->is_compress_write_done = 0;
  869. while (sii->cbm_list_write_count > SMTC_PENDING_THRESHOLD)
  870. msleep(10);
  871. /* this lock is necessary? */
  872. mutex_lock(&sii->cinfo_mutex);
  873. if (list_empty(&sii->cinfo_list)) {
  874. mutex_unlock(&sii->cinfo_mutex);
  875. SCFS_PRINT_ERROR("cinfo list is empty\n");
  876. ret = -EINVAL;
  877. goto out;
  878. }
  879. info_entry = list_entry(sii->cinfo_list.prev,
  880. struct cinfo_entry, entry);
  881. if (info_entry->current_cluster_idx !=
  882. PAGE_TO_CLUSTER_INDEX(page,sii)) {
  883. SCFS_PRINT_ERROR("Cannot find cluster info entry" \
  884. "for cluster idx %d\n", PAGE_TO_CLUSTER_INDEX(page,sii));
  885. ASSERT(0);
  886. }
  887. mutex_unlock(&sii->cinfo_mutex);
  888. /* add this cluster_buffer to cbm_list.
  889. Later, we should decrease current_data_size */
  890. spin_lock(&sb_info->sii_list_lock);
  891. list_add_tail(&cbm->list, &sii->cbm_list);
  892. SCFS_PRINT("cbm = 0x%08x pos = %d\n" , cbm, pos);
  893. cbm->info_entry = info_entry;
  894. sii->cbm_list_comp_count++;
  895. sb_info->cbm_list_total_count++;
  896. if (!sii->is_inserted_to_sii_list) {
  897. sii->is_inserted_to_sii_list = 1;
  898. list_add_tail(&sii->mtc_list, &sb_info->sii_list);
  899. sii->cbm_list_comp = &cbm->list;
  900. sii->cbm_list_write = &cbm->list;
  901. } else if (sii->cbm_list_comp_count == 1) {
  902. sii->cbm_list_comp = &cbm->list;
  903. }
  904. spin_unlock(&sb_info->sii_list_lock);
  905. wakeup_smtc_thread(sb_info);
  906. /* initialize sii->cluster_buffer */
  907. memset(&sii->cluster_buffer, 0, sizeof(struct scfs_cluster_buffer));
  908. atomic_sub(1, &sb_info->current_file_count);
  909. #else
  910. mutex_lock(&sii->cinfo_mutex);
  911. if (list_empty(&sii->cinfo_list)) {
  912. mutex_unlock(&sii->cinfo_mutex);
  913. SCFS_PRINT_ERROR("cinfo list is empty\n");
  914. ret = -EINVAL;
  915. goto out;
  916. }
  917. info_entry = list_entry(sii->cinfo_list.prev,
  918. struct cinfo_entry, entry);
  919. if (info_entry->current_cluster_idx !=
  920. PAGE_TO_CLUSTER_INDEX(page,sii)) {
  921. SCFS_PRINT_ERROR("Cannot find cluster info entry" \
  922. "for cluster idx %d\n", PAGE_TO_CLUSTER_INDEX(page,sii));
  923. ASSERT(0);
  924. }
  925. /* Set cinfo size as available buffer size because zlib care about
  926. * available buf size. */
  927. info_entry->cinfo.size = PAGE_CACHE_SIZE*8;
  928. tmp_len = (size_t)info_entry->cinfo.size;
  929. ret = scfs_compress(sii->comp_type, sii->cluster_buffer.c_buffer,
  930. sii->cluster_buffer.u_buffer,
  931. sii->cluster_buffer.original_size,
  932. &tmp_len,
  933. NULL, sb_info);
  934. info_entry->cinfo.size = (__u32)(tmp_len & 0xffff);
  935. if (ret) {
  936. mutex_unlock(&sii->cinfo_mutex);
  937. ClearPageUptodate(page);
  938. SCFS_PRINT_ERROR("compression failed.\n");
  939. goto out;
  940. }
  941. if (info_entry->cinfo.size >=
  942. sii->cluster_buffer.original_size *
  943. sb_info->options.comp_threshold / 100)
  944. info_entry->cinfo.size = sii->cluster_buffer.original_size;
  945. if (info_entry->cinfo.size % SCFS_CLUSTER_ALIGN_BYTE)
  946. info_entry->pad = SCFS_CLUSTER_ALIGN_BYTE -
  947. (info_entry->cinfo.size % SCFS_CLUSTER_ALIGN_BYTE);
  948. else
  949. info_entry->pad = 0;
  950. SCFS_PRINT("cluster original size = %ld, comp size = %d, pad = %d\n"
  951. ,sii->cluster_buffer.original_size
  952. ,info_entry->cinfo.size
  953. ,info_entry->pad);
  954. lower_pos = (loff_t)info_entry->cinfo.offset;
  955. if (info_entry->cinfo.size <
  956. sii->cluster_buffer.original_size *
  957. sb_info->options.comp_threshold / 100) {
  958. size_t write_count;
  959. write_count = (size_t)info_entry->cinfo.size+info_entry->pad;
  960. mutex_unlock(&sii->cinfo_mutex);
  961. ret = scfs_lower_write(lower_file, sii->cluster_buffer.c_buffer,
  962. write_count, &lower_pos);
  963. if (ret < 0) {
  964. SCFS_PRINT_ERROR("write fail. ret = %d, size=%ld\n",
  965. ret, write_count);
  966. goto out;
  967. }
  968. if (!sii->compressed)
  969. sii->compressed = 1;
  970. } else {
  971. info_entry->cinfo.size = sii->cluster_buffer.original_size;
  972. info_entry->pad = 0;
  973. mutex_unlock(&sii->cinfo_mutex);
  974. ret = scfs_lower_write(lower_file, sii->cluster_buffer.u_buffer,
  975. sii->cluster_buffer.original_size, &lower_pos);
  976. if (ret < 0) {
  977. SCFS_PRINT_ERROR("write fail. ret = %d, size=%d\n",
  978. ret, sii->cluster_buffer.original_size);
  979. goto out;
  980. }
  981. }
  982. atomic64_sub(sii->cluster_buffer.original_size,&sb_info->current_data_size);
  983. sii->cluster_buffer.original_size = 0;
  984. #endif
  985. }
  986. ret = copied;
  987. } else {
  988. char *source_addr;
  989. lower_pos = pos;
  990. source_addr = (char*)kmap(page);
  991. atomic64_add(copied, &sb_info->current_data_size);
  992. ret = scfs_check_space(sb_info, file->f_dentry);
  993. if (ret < 0) {
  994. SCFS_PRINT_ERROR("No more space in lower-storage\n");
  995. goto out;
  996. }
  997. atomic64_sub(copied, &sb_info->current_data_size);
  998. ret = scfs_lower_write(lower_file, source_addr+from, copied, &lower_pos);
  999. if (ret < 0) {
  1000. SCFS_PRINT_ERROR("write fail. ret = %d, size=%d\n", ret, copied);
  1001. goto out;
  1002. }
  1003. kunmap(page);
  1004. }
  1005. if (pos + copied > i_size_read(&sii->vfs_inode)) {
  1006. i_size_write(&sii->vfs_inode, pos + copied);
  1007. SCFS_PRINT("Expanded file size to [0x%.16llx]\n",
  1008. (unsigned long long)i_size_read(&sii->vfs_inode));
  1009. }
  1010. SetPageUptodate(page);
  1011. out:
  1012. unlock_page(page);
  1013. page_cache_release(page);
  1014. return ret;
  1015. }
  1016. #ifdef SCFS_MULTI_THREAD_COMPRESSION
  1017. int smtc_init(struct scfs_sb_info *sbi)
  1018. {
  1019. int i, j;
  1020. for (i = 0; i < NR_CPUS; i++) {
  1021. sbi->smtc_task[i] = kthread_run(smtc_thread, sbi, "scfs_mtc%d", i);
  1022. if (IS_ERR(sbi->smtc_task[i])) {
  1023. SCFS_PRINT_ERROR("smtc_init: creating kthread failed\n");
  1024. for (j = 0; j < i; j++)
  1025. kthread_stop(sbi->smtc_task[j]);
  1026. return -ENOMEM;
  1027. }
  1028. }
  1029. sbi->smtc_writer_task = kthread_run(smtc_writer_thread, sbi, "scfs_writer_mtc%d", 0);
  1030. if (IS_ERR(sbi->smtc_writer_task)) {
  1031. SCFS_PRINT_ERROR("smtc_init: creating kthread failed\n");
  1032. return -ENOMEM;
  1033. }
  1034. return 0;
  1035. }
  1036. void smtc_destroy(struct scfs_sb_info *sbi)
  1037. {
  1038. int i;
  1039. for (i = 0 ; i < NR_CPUS ; i++) {
  1040. if (sbi->smtc_task[i])
  1041. kthread_stop(sbi->smtc_task[i]);
  1042. sbi->smtc_task[i] = NULL;
  1043. }
  1044. if (sbi->smtc_writer_task)
  1045. kthread_stop(sbi->smtc_writer_task);
  1046. sbi->smtc_writer_task = NULL;
  1047. }
  1048. /* scaling # of threads will be woken up, on-demand */
  1049. void wakeup_smtc_thread(struct scfs_sb_info *sbi)
  1050. {
  1051. u32 length;
  1052. spin_lock(&sbi->sii_list_lock);
  1053. length = sbi->cbm_list_total_count;
  1054. spin_unlock(&sbi->sii_list_lock);
  1055. if (length > 0 && sbi->smtc_task[0])
  1056. wake_up_process(sbi->smtc_task[0]);
  1057. #if (NR_CPUS > 1)
  1058. if (length >= SMTC_THREAD_THRESHOLD_2 && sbi->smtc_task[1])
  1059. wake_up_process(sbi->smtc_task[1]);
  1060. #if (NR_CPUS > 2)
  1061. if (length >= SMTC_THREAD_THRESHOLD_3 && sbi->smtc_task[2])
  1062. wake_up_process(sbi->smtc_task[2]);
  1063. #if (NR_CPUS > 3)
  1064. if (length >= SMTC_THREAD_THRESHOLD_4 && sbi->smtc_task[3])
  1065. wake_up_process(sbi->smtc_task[3]);
  1066. #endif
  1067. #endif
  1068. #endif
  1069. }
  1070. void wakeup_smtc_writer_thread(struct scfs_sb_info *sbi)
  1071. {
  1072. if (sbi->smtc_writer_task_status == 0 && sbi->smtc_writer_task)
  1073. wake_up_process(sbi->smtc_writer_task);
  1074. }
  1075. int smtc_thread(void *info)
  1076. {
  1077. struct scfs_sb_info *sbi = (struct scfs_sb_info *)info;
  1078. struct scfs_inode_info *sii;
  1079. struct scfs_cluster_buffer_mtc *cbm;
  1080. int ret = 0;
  1081. struct list_head *pos;
  1082. void *workdata = NULL;
  1083. #ifndef CONFIG_SCFS_USE_CRYPTO
  1084. int idx;
  1085. switch (sbi->options.comp_type) {
  1086. case SCFS_COMP_LZO:
  1087. workdata = vmalloc(LZO1X_MEM_COMPRESS);
  1088. idx = atomic_inc_return(&sbi->smtc_idx) - 1;
  1089. sbi->smtc_workdata[idx] = workdata;
  1090. if (!workdata) {
  1091. SCFS_PRINT_ERROR("vmalloc for lzo workmem failed, "
  1092. "len %d\n",
  1093. LZO1X_MEM_COMPRESS);
  1094. return -ENOMEM;
  1095. } else {
  1096. SCFS_PRINT("smtc workmem for lzo address : %p, idx : %d\n", workdata, idx);
  1097. }
  1098. break;
  1099. default:
  1100. break;
  1101. }
  1102. #endif
  1103. set_freezable();
  1104. while (!kthread_should_stop()) {
  1105. set_current_state(TASK_INTERRUPTIBLE);
  1106. spin_lock(&sbi->sii_list_lock);
  1107. if (sbi->cbm_list_total_count > 0) {
  1108. list_for_each(pos, &sbi->sii_list) {
  1109. sii = list_entry(pos, struct scfs_inode_info, mtc_list);
  1110. if (sii->cbm_list_comp_count == 0) continue;
  1111. }
  1112. cbm = list_entry(sii->cbm_list_comp,
  1113. struct scfs_cluster_buffer_mtc, list);
  1114. sii->cbm_list_comp = sii->cbm_list_comp->next;
  1115. sii->cbm_list_comp_count--;
  1116. sii->cbm_list_write_count++;
  1117. sbi->cbm_list_total_count--;
  1118. SCFS_PRINT("(%d %d)\n", sbi->cbm_list_total_count,
  1119. sii->cbm_list_comp_count);
  1120. spin_unlock(&sbi->sii_list_lock);
  1121. __set_current_state(TASK_RUNNING);
  1122. ret = scfs_compress_cluster(sii, cbm, workdata);
  1123. if (ret)
  1124. SCFS_PRINT_ERROR("compress failed. ret = %d\n", ret);
  1125. } else {
  1126. spin_unlock(&sbi->sii_list_lock);
  1127. schedule();
  1128. }
  1129. }
  1130. #ifndef CONFIG_SCFS_USE_CRYPTO
  1131. if (workdata)
  1132. vfree(workdata);
  1133. #endif
  1134. return 0;
  1135. }
  1136. int smtc_writer_thread(void *info)
  1137. {
  1138. struct scfs_sb_info *sbi = (struct scfs_sb_info *)info;
  1139. struct scfs_inode_info *sii;
  1140. struct scfs_cluster_buffer_mtc *cbm;
  1141. int ret = 0;
  1142. struct list_head *pos, *pos1;
  1143. set_freezable();
  1144. while (!kthread_should_stop()) {
  1145. __set_current_state(TASK_RUNNING);
  1146. spin_lock(&sbi->sii_list_lock);
  1147. list_for_each(pos, &sbi->sii_list) {
  1148. sii = list_entry(pos, struct scfs_inode_info, mtc_list);
  1149. for (pos1 = sii->cbm_list_write; pos1 != &sii->cbm_list;) {
  1150. cbm = list_entry(pos1, struct scfs_cluster_buffer_mtc, list);
  1151. if (cbm->is_compress_write_done == 0)
  1152. break;
  1153. if (cbm->is_compress_write_done == 2) {
  1154. pos1 = pos1->next;
  1155. list_del(&cbm->list);
  1156. kmem_cache_free(scfs_cbm_cache, cbm);
  1157. continue;
  1158. }
  1159. sii->cbm_list_write_count--;
  1160. spin_unlock(&sbi->sii_list_lock);
  1161. BUG_ON(cbm->is_compress_write_done != 1);
  1162. /* do lower buffered-write for this cbm */
  1163. ret = scfs_write_one_compress_cluster(sii, cbm);
  1164. if (ret < 0) {
  1165. SCFS_PRINT_ERROR("lower buffered-write failed.\n");
  1166. break;
  1167. }
  1168. spin_lock(&sbi->sii_list_lock);
  1169. pos1 = pos1->next;
  1170. }
  1171. if (pos1 == &sii->cbm_list)
  1172. sii->cbm_list_write = pos1->next;
  1173. else
  1174. sii->cbm_list_write = pos1;
  1175. }
  1176. spin_unlock(&sbi->sii_list_lock);
  1177. set_current_state(TASK_INTERRUPTIBLE);
  1178. sbi->smtc_writer_task_status = 0;
  1179. schedule();
  1180. sbi->smtc_writer_task_status = 1;
  1181. }
  1182. return 0;
  1183. }
  1184. int scfs_compress_cluster(struct scfs_inode_info *sii,
  1185. struct scfs_cluster_buffer_mtc *cbm, void *workdata)
  1186. {
  1187. struct scfs_sb_info *sb_info = SCFS_S(sii->vfs_inode.i_sb);
  1188. struct cinfo_entry *info_entry;
  1189. int ret = 0;
  1190. struct scfs_cluster_buffer *cb;
  1191. size_t tmp_len;
  1192. info_entry = cbm->info_entry;
  1193. cb = &cbm->entry;
  1194. tmp_len = (size_t)info_entry->cinfo.size;
  1195. ret = scfs_compress(sii->comp_type, cb->c_buffer, cb->u_buffer,
  1196. cb->original_size, &tmp_len, workdata, sb_info);
  1197. info_entry->cinfo.size = (__u32)(tmp_len & 0xffff);
  1198. if (ret) {
  1199. SCFS_PRINT_ERROR("compression failed.\n");
  1200. goto out;
  1201. }
  1202. if (info_entry->cinfo.size >=
  1203. cb->original_size *
  1204. sb_info->options.comp_threshold / 100) {
  1205. info_entry->cinfo.size = cb->original_size;
  1206. //Free invalid pages
  1207. __free_pages(cb->c_page, SCFS_MEMPOOL_ORDER + 1);
  1208. } else {
  1209. //Free invalid pages
  1210. __free_pages(cb->u_page, SCFS_MEMPOOL_ORDER + 1);
  1211. }
  1212. if (info_entry->cinfo.size % SCFS_CLUSTER_ALIGN_BYTE)
  1213. info_entry->pad = SCFS_CLUSTER_ALIGN_BYTE -
  1214. (info_entry->cinfo.size % SCFS_CLUSTER_ALIGN_BYTE);
  1215. else
  1216. info_entry->pad = 0;
  1217. spin_lock(&sb_info->sii_list_lock);
  1218. cbm->is_compress_write_done = 1;
  1219. spin_unlock(&sb_info->sii_list_lock);
  1220. SCFS_PRINT("cbm = 0x%08x comp = %d\n" , cbm, info_entry->cinfo.size);
  1221. wakeup_smtc_writer_thread(sb_info);
  1222. out:
  1223. return ret;
  1224. }
  1225. int scfs_write_one_compress_cluster(struct scfs_inode_info *sii,
  1226. struct scfs_cluster_buffer_mtc *cbm)
  1227. {
  1228. struct scfs_sb_info *sbi = SCFS_S(sii->vfs_inode.i_sb);
  1229. struct cinfo_entry *info_entry;
  1230. struct cinfo_entry *prev_info_entry;
  1231. loff_t lower_pos;
  1232. int ret = 0;
  1233. struct file *lower_file = sii->lower_file;
  1234. struct scfs_cluster_buffer *cb;
  1235. info_entry = cbm->info_entry;
  1236. cb = &cbm->entry;
  1237. if (info_entry->cinfo.size >= cb->original_size *
  1238. sbi->options.comp_threshold / 100)
  1239. info_entry->cinfo.size = cb->original_size;
  1240. if (info_entry->cinfo.size % SCFS_CLUSTER_ALIGN_BYTE)
  1241. info_entry->pad = SCFS_CLUSTER_ALIGN_BYTE -
  1242. (info_entry->cinfo.size % SCFS_CLUSTER_ALIGN_BYTE);
  1243. else
  1244. info_entry->pad = 0;
  1245. /* update current cluster's offset using previous cluster */
  1246. if (info_entry->entry.prev != &sii->cinfo_list) {
  1247. prev_info_entry = list_entry(info_entry->entry.prev, struct cinfo_entry, entry);
  1248. info_entry->cinfo.offset = prev_info_entry->cinfo.offset +
  1249. prev_info_entry->cinfo.size;
  1250. if (prev_info_entry->cinfo.size % SCFS_CLUSTER_ALIGN_BYTE)
  1251. info_entry->cinfo.offset += (SCFS_CLUSTER_ALIGN_BYTE -
  1252. (prev_info_entry->cinfo.size % SCFS_CLUSTER_ALIGN_BYTE));
  1253. }
  1254. lower_pos = (loff_t)info_entry->cinfo.offset;
  1255. if (info_entry->cinfo.size < cb->original_size *
  1256. sbi->options.comp_threshold / 100) {
  1257. size_t write_count;
  1258. write_count = (size_t)info_entry->cinfo.size + info_entry->pad;
  1259. ret = scfs_lower_write(lower_file, cb->c_buffer, write_count, &lower_pos);
  1260. if (ret < 0) {
  1261. SCFS_PRINT_ERROR("write fail. ret = %d, size=%ld\n",
  1262. ret, write_count);
  1263. goto out;
  1264. }
  1265. if (!sii->compressed)
  1266. sii->compressed = 1;
  1267. __free_pages(cb->c_page, SCFS_MEMPOOL_ORDER + 1);
  1268. } else {
  1269. info_entry->cinfo.size = cb->original_size;
  1270. info_entry->pad = 0;
  1271. ret = scfs_lower_write(lower_file, cb->u_buffer, info_entry->cinfo.size, &lower_pos);
  1272. if (ret < 0) {
  1273. SCFS_PRINT_ERROR("write fail. ret = %d, size=%d\n",
  1274. ret, cb->original_size);
  1275. goto out;
  1276. }
  1277. __free_pages(cb->u_page, SCFS_MEMPOOL_ORDER + 1);
  1278. }
  1279. atomic64_sub(cb->original_size, &sbi->current_data_size);
  1280. spin_lock(&sbi->sii_list_lock);
  1281. cbm->is_compress_write_done = 2;
  1282. spin_unlock(&sbi->sii_list_lock);
  1283. /* free the pages for this cbm */
  1284. // __free_pages(cb->u_page, SCFS_MEMPOOL_ORDER + 1);
  1285. // __free_pages(cb->c_page, SCFS_MEMPOOL_ORDER + 1);
  1286. // atomic64_sub(PAGE_SIZE*32,&sbi->memory_footprint);
  1287. // kmem_cache_free(sbi->scfs_cbm_cache, cbm);
  1288. out:
  1289. return ret;
  1290. }
  1291. int scfs_write_compress_all_cluster(struct scfs_inode_info *sii, struct file *lower_file)
  1292. {
  1293. struct scfs_sb_info *sbi = SCFS_S(sii->vfs_inode.i_sb);
  1294. struct scfs_cluster_buffer_mtc *cbm;
  1295. struct cinfo_entry *info_entry;
  1296. loff_t lower_pos;
  1297. int ret = 0;
  1298. struct list_head *pos;
  1299. struct scfs_cluster_buffer *cb;
  1300. spin_lock(&sbi->sii_list_lock);
  1301. if (!sii->is_inserted_to_sii_list) {
  1302. spin_unlock(&sbi->sii_list_lock);
  1303. goto out;
  1304. }
  1305. while (sii->cbm_list_comp_count) {
  1306. spin_unlock(&sbi->sii_list_lock);
  1307. wakeup_smtc_thread(sbi);
  1308. msleep(100);
  1309. spin_lock(&sbi->sii_list_lock);
  1310. }
  1311. while (sii->cbm_list_write_count) {
  1312. spin_unlock(&sbi->sii_list_lock);
  1313. wakeup_smtc_writer_thread(sbi);
  1314. msleep(20);
  1315. spin_lock(&sbi->sii_list_lock);
  1316. }
  1317. spin_unlock(&sbi->sii_list_lock);
  1318. while (sbi->smtc_writer_task_status == 1)
  1319. msleep(20);
  1320. list_for_each(pos, &sii->cbm_list) {
  1321. BUG_ON(sii->cbm_list_comp_count);
  1322. cbm = list_entry(pos, struct scfs_cluster_buffer_mtc, list);
  1323. spin_lock(&sbi->sii_list_lock);
  1324. if (cbm->is_compress_write_done == 2) {
  1325. spin_unlock(&sbi->sii_list_lock);
  1326. kmem_cache_free(scfs_cbm_cache, cbm);
  1327. continue;
  1328. }
  1329. spin_unlock(&sbi->sii_list_lock);
  1330. SCFS_PRINT_ALWAYS("0x%08x 0x%08x 0x%08x 0x%08x %d 0x%08x 0x%08x %d\n",
  1331. cbm->entry.c_page, cbm->entry.u_page, cbm->entry.c_buffer,
  1332. cbm->entry.u_buffer, cbm->entry.original_size,
  1333. cbm->info_entry, &cbm->list, cbm->is_compress_write_done);
  1334. BUG_ON(cbm->is_compress_write_done != 1);
  1335. info_entry = cbm->info_entry;
  1336. cb = &cbm->entry;
  1337. if (info_entry->cinfo.size >= cb->original_size *
  1338. sbi->options.comp_threshold / 100)
  1339. info_entry->cinfo.size = cb->original_size;
  1340. if (info_entry->cinfo.size % SCFS_CLUSTER_ALIGN_BYTE)
  1341. info_entry->pad = SCFS_CLUSTER_ALIGN_BYTE -
  1342. (info_entry->cinfo.size % SCFS_CLUSTER_ALIGN_BYTE);
  1343. else
  1344. info_entry->pad = 0;
  1345. SCFS_PRINT("comp = %d\n" , info_entry->cinfo.size);
  1346. lower_pos = (loff_t)info_entry->cinfo.offset;
  1347. if (info_entry->cinfo.size < cb->original_size *
  1348. sbi->options.comp_threshold / 100) {
  1349. size_t write_count;
  1350. write_count = (size_t)info_entry->cinfo.size+info_entry->pad;
  1351. ret = scfs_lower_write(lower_file, cb->c_buffer, write_count, &lower_pos);
  1352. if (ret < 0) {
  1353. SCFS_PRINT_ERROR("write fail. ret = %d, size=%ld\n",
  1354. ret, write_count);
  1355. goto out;
  1356. }
  1357. if (!sii->compressed)
  1358. sii->compressed = 1;
  1359. } else {
  1360. info_entry->cinfo.size = cb->original_size;
  1361. info_entry->pad = 0;
  1362. ret = scfs_lower_write(lower_file, cb->u_buffer,
  1363. cb->original_size, &lower_pos);
  1364. if (ret < 0) {
  1365. SCFS_PRINT_ERROR("write fail. ret = %d, size=%d\n",
  1366. ret, cb->original_size);
  1367. goto out;
  1368. }
  1369. }
  1370. atomic64_sub(cb->original_size, &sbi->current_data_size);
  1371. /* clear this cbm */
  1372. __free_pages(cb->u_page, SCFS_MEMPOOL_ORDER + 1);
  1373. __free_pages(cb->c_page, SCFS_MEMPOOL_ORDER + 1);
  1374. kmem_cache_free(scfs_cbm_cache, cbm);
  1375. }
  1376. if (sii->is_inserted_to_sii_list) {
  1377. list_del(&sii->mtc_list);
  1378. INIT_LIST_HEAD(&sii->cbm_list);
  1379. sii->cbm_list_comp_count = 0;
  1380. sii->cbm_list_write_count = 0;
  1381. //sii->is_inserted_to_sii_list = 0;
  1382. sii->cbm_list_comp = NULL;
  1383. //SCFS_PRINT_ALWAYS("end\n");
  1384. }
  1385. out:
  1386. return ret;
  1387. }
  1388. #endif
  1389. /**************************************/
  1390. /* address_space_operations structure */
  1391. /**************************************/
  1392. const struct address_space_operations scfs_aops = {
  1393. .readpage = scfs_readpage,
  1394. #ifdef SCFS_ASYNC_READ_PAGES
  1395. .readpages = scfs_readpages,
  1396. #endif
  1397. .write_begin = scfs_write_begin,
  1398. .write_end = scfs_write_end,
  1399. };