00001 #include <stdio.h>
00002 #include <stdlib.h>
00003 #include <string.h>
00004 #include <time.h>
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038
00039
00040
00041 char cv_srb_zone[] = "galvin";
00042 char cv_irods_zone[] = "tempZone";
00043
00044
00045
00046
00047 char *cv_srb_resources[]={"sdsc-mda18-fs","silas"};
00048 char *cv_irods_resources[]={"fs1", "i-silas"};
00049 int nResc=2;
00050
00051
00052
00053 #define CAT_POSTGRES 1
00054
00055
00056
00057
00058 char *cv_srb_usernames[]={"srbAdmin", "vidarch"};
00059
00060 char *cv_irods_usernames[]={"rods", "nvidar"};
00061
00062 int nUserNames=2;
00063 char *cv_srb_userdomains[]={"sils"};
00064 int nUserDomains=1;
00065
00066
00067
00068
00069
00070 #ifdef CAT_POSTGRES
00071 char nextValStr[]="select nextval('R_ObjectID')";
00072 char currValStr[]="select currval('R_ObjectID')";
00073 #endif
00074 #ifdef CAT_ORACLE
00075 char nextValStr[]="R_ObjectID.nextval";
00076 char currValStr[]="R_ObjectID.currval";
00077 #endif
00078 #ifdef CAT_MYSQL
00079 char nextValStr[]="%s_nextval()";
00080 char currValStr[]="%s_currval()";
00081 #endif
00082
00083
00084
00085
00086 char nowTime[]="01273184051";
00087
00088
00089 int g_data_name_ix=-1;
00090 int g_resc_name_ix=-1;
00091 int g_data_type_name_ix=-1;
00092 int g_path_name_ix=-1;
00093 int g_size_ix=-1;
00094 int g_data_owner_ix=-1;
00095 int g_data_owner_domain_ix=-1;
00096 int g_data_create_timestamp_ix=-1;
00097 int g_data_last_access_timestamp_ix=-1;
00098 int g_container_repl_enum_ix=-1;
00099 int g_collection_cont_name_ix=-1;
00100 int g_max_ix=-1;
00101
00102 void
00103 getNowStr(char *timeStr);
00104
00105 void
00106 setNowTime() {
00107 getNowStr(nowTime);
00108 printf("Current time as iRODS integer time: %s\n", nowTime);
00109 }
00110
00111 int
00112 checkDateFormat(char *s);
00113
00114 int
00115 findIndex(char *inLine, char *inItem) {
00116 char *cp1, *cp2;
00117 char matchStr[200];
00118 int ix;
00119 strncpy(matchStr, inItem, sizeof(matchStr));
00120 matchStr[ sizeof( matchStr )-1 ] = '\0';
00121 strncat(matchStr, "|", sizeof(matchStr));
00122 cp1 = strstr(inLine, matchStr);
00123 if (cp1==NULL) {
00124 printf("Needed item %s not found, exiting\n",inItem);
00125 exit(-5);
00126 }
00127 ix=0;
00128 for (cp2=inLine;cp2<cp1;cp2++) {
00129 if (*cp2=='|') ix++;
00130 }
00131 printf("%d %s\n",ix,inItem);
00132 if (ix > g_max_ix) g_max_ix=ix;
00133 return(ix);
00134 }
00135
00136 void
00137 setGlobalIndexes(char *inLine) {
00138 g_max_ix=-1;
00139 g_data_name_ix=findIndex(inLine,"DATA_NAME");
00140 g_resc_name_ix=findIndex(inLine,"RSRC_NAME");
00141 g_path_name_ix=findIndex(inLine,"PATH_NAME");
00142 g_data_type_name_ix=findIndex(inLine,"DATA_TYP_NAME");
00143 g_size_ix=findIndex(inLine,"SIZE");
00144 g_data_owner_ix=findIndex(inLine,"DATA_OWNER");
00145 g_data_owner_domain_ix=findIndex(inLine,"DATA_OWNER_DOMAIN");
00146 g_data_create_timestamp_ix=findIndex(inLine,"DATA_CREATE_TIMESTAMP");
00147 g_data_last_access_timestamp_ix=
00148 findIndex(inLine,"DATA_LAST_ACCESS_TIMESTAMP");
00149 g_container_repl_enum_ix=findIndex(inLine,"CONTAINER_REPL_ENUM");
00150 g_collection_cont_name_ix=findIndex(inLine,"COLLECTION_CONT_NAME");
00151 }
00152
00153
00154 int
00155 checkDoCollection(char *inColl) {
00156 char testColl[2000];
00157 if (strstr(inColl, "/container")==inColl) {
00158 return(0);
00159 }
00160 if (strstr(inColl, "/home/")==inColl) {
00161 return(1);
00162 }
00163
00164 testColl[0]='/';
00165 testColl[1]='\0';
00166 strcat(testColl, cv_srb_zone);
00167 strcat(testColl, "/container");
00168 if (strstr(inColl, testColl)==inColl) {
00169 return(0);
00170 }
00171
00172 testColl[0]='/';
00173 testColl[1]='\0';
00174 strcat(testColl, cv_srb_zone);
00175 strcat(testColl, "/trash");
00176 if (strstr(inColl, testColl)==inColl) {
00177 return(1);
00178 }
00179 testColl[0]='/';
00180 testColl[1]='\0';
00181 strcat(testColl, cv_srb_zone);
00182 strcat(testColl, "/");
00183 if (strstr(inColl, testColl)==inColl) {
00184 char *cp2, *cp3;
00185 cp2 = rindex(inColl, '/');
00186 cp3 = inColl + strlen(testColl);
00187 if (cp2 < cp3) {
00188
00189 return(0);
00190 }
00191 }
00192 testColl[0]='/';
00193 testColl[1]='\0';
00194 strcat(testColl, cv_srb_zone);
00195 strcat(testColl, "/");
00196 if (strstr(inColl, testColl)!=inColl) {
00197
00198 return(0);
00199 }
00200 return(1);
00201 }
00202
00203 char *
00204 convertUser(char *inUser, char *inDomain) {
00205 int i;
00206 static char newUserName[100];
00207 for (i=0;i<nUserNames;i++) {
00208 if (strcmp(cv_srb_usernames[i],inUser)==0) {
00209 return(cv_irods_usernames[i]);
00210 }
00211 }
00212 strcpy(newUserName, inUser);
00213 strcat(newUserName, "#");
00214 strcat(newUserName, inDomain);
00215 return(newUserName);
00216 }
00217
00218 char *
00219 convertTime(char *inTime) {
00220 static char myTime[50];
00221 int status;
00222
00223 strncpy(myTime, inTime, sizeof(myTime));
00224 if (myTime[10]=='-') {
00225 myTime[10]='.';
00226 }
00227 if (myTime[13]=='.') {
00228 myTime[13]=':';
00229 }
00230 if (myTime[16]=='.') {
00231 myTime[16]=':';
00232 }
00233
00234 status=checkDateFormat(myTime);
00235 if (status) {
00236 printf("convertTime checkDateFormat error");
00237 }
00238
00239 return(myTime);
00240 }
00241
00242 char *
00243 convertCollection(char *inColl) {
00244 static char outColl[2000];
00245 char outColl2[2000];
00246 char testStr[50];
00247 char *cp;
00248 int i,j;
00249 strncpy(outColl, inColl, sizeof(outColl));
00250
00251 if (strstr(inColl, "/home/")==inColl) {
00252 strcpy(outColl, cv_irods_zone);
00253 strcat(outColl, inColl);
00254 }
00255
00256 strcpy(testStr, "/");
00257 strcat(testStr, cv_srb_zone);
00258 strcat(testStr, "/");
00259 cp=strstr(outColl, testStr);
00260 if (cp != NULL) {
00261 int count;
00262 outColl2[0]='\0';
00263 count = cp-(char*)&outColl;
00264 strncpy(outColl2, outColl, count);
00265 outColl2[count]='\0';
00266 strcat(outColl2, "/");
00267 strcat(outColl2, cv_irods_zone);
00268 strcat(outColl2, cp+strlen(cv_srb_zone)+1);
00269 strcpy(outColl, outColl2);
00270 }
00271 for (i=0;i<nUserDomains;i++) {
00272 for (j=0;j<nUserNames;j++) {
00273 strcpy(testStr, "/");
00274 strcat(testStr, cv_srb_usernames[j]);
00275 strcat(testStr, ".");
00276 strcat(testStr, cv_srb_userdomains[i]);
00277 cp=strstr(outColl, testStr);
00278 if (cp != NULL) {
00279 int count;
00280 outColl2[0]='\0';
00281 count = cp-(char*)&outColl;
00282 strncpy(outColl2, outColl, count);
00283 outColl2[count]='\0';
00284 strcat(outColl2, "/");
00285 strcat(outColl2, cv_irods_usernames[j]);
00286 strcat(outColl2, cp+strlen(testStr));
00287 strcpy(outColl, outColl2);
00288 }
00289 }
00290 }
00291
00292 return(outColl);
00293 }
00294
00295 int
00296 main(argc, argv)
00297 int argc;
00298 char **argv;
00299 {
00300 FILE *FI, *FO;
00301 int wval;
00302 char *rchar;
00303 char buf[1024];
00304 int nInLines=0;
00305 int nOutItems=0;
00306 int i;
00307 char *ixFTL[50];
00308 int maxIxFTL=50;
00309 int nIxFTL=0;
00310 int doDataInsert;
00311 char *v_collection;
00312 char *v_create_time;
00313 char *v_access_time;
00314 char *v_owner;
00315 char *v_owner_domain;
00316 char *newResource;
00317
00318 setNowTime();
00319
00320 if (argc < 3) {
00321 printf("a.out file-in file-out\n");
00322 return(-1);
00323 }
00324
00325 FI = fopen(argv[1],"r");
00326 if (FI==0)
00327 {
00328 fprintf(stderr,"can't open input file %s\n",argv[1]);
00329 return(-2);
00330 }
00331
00332 FO = fopen(argv[2],"w");
00333 if (FO==0)
00334 {
00335 fprintf(stderr,"can't open output file %s\n",argv[2]);
00336 fclose( FI );
00337 return(-3);
00338 }
00339
00340 memset(&buf, 0, (size_t)sizeof(buf));
00341 rchar='\0';
00342 do {
00343 rchar = fgets(&buf[0], sizeof(buf), FI);
00344 if (rchar=='\0') {
00345 break;
00346 }
00347 nInLines++;
00348 if (nInLines==1) {
00349 if (strstr(buf, "GET_CHANGED_DATA_CORE_INFO")==0) {
00350 printf("This program only handles Spull.log.data type files.\n");
00351 fclose( FO );
00352 return(-4);
00353 }
00354 }
00355 if (nInLines==2) {
00356 setGlobalIndexes(buf);
00357 }
00358 if (nInLines > 2) {
00359 for (i=0;i<maxIxFTL;i++) {
00360 ixFTL[i]=0;
00361 }
00362 ixFTL[0]=&buf[0];
00363 nIxFTL=1;
00364 for (i=0;i<sizeof(buf);i++) {
00365 if (buf[i]=='\0') break;
00366 if (buf[i]=='\n') {
00367 buf[i]='\0';
00368 break;
00369 }
00370 if (buf[i]=='|') {
00371 buf[i]='\0';
00372 ixFTL[nIxFTL++]=&buf[i+1];
00373 }
00374 }
00375
00376 if (nIxFTL < g_max_ix) {
00377 printf("Missing item(s) from line %d, exiting\n", nInLines);
00378 exit(-6);
00379 }
00380
00381
00382
00383 doDataInsert=0;
00384 for (i=0;i<nResc;i++) {
00385 if (strcmp(cv_srb_resources[i],
00386 ixFTL[g_resc_name_ix])==0) {
00387 newResource=cv_irods_resources[i];
00388 doDataInsert=1;
00389 }
00390 }
00391 if (doDataInsert==0) {
00392 printf("Not inserting data-object on resource %s\n",
00393 ixFTL[g_resc_name_ix]);
00394 }
00395
00396
00397
00398
00399
00400 if (doDataInsert) {
00401 if (checkDoCollection(ixFTL[g_collection_cont_name_ix])==1) {
00402
00403 v_collection = convertCollection(
00404 ixFTL[g_collection_cont_name_ix]);
00405 v_create_time = convertTime(
00406 ixFTL[g_data_create_timestamp_ix]);
00407 v_access_time = convertTime(
00408 ixFTL[g_data_last_access_timestamp_ix]);
00409 v_owner_domain = ixFTL[g_data_owner_domain_ix];
00410 v_owner = convertUser(ixFTL[g_data_owner_ix],
00411 v_owner_domain);
00412
00413 wval = fprintf(FO, "insert into R_DATA_MAIN (data_id, coll_id, data_name, data_repl_num, data_version, data_type_name, data_size, resc_name, data_path, data_owner_name, data_owner_zone, data_is_dirty, create_ts, modify_ts) values ((%s), (select coll_id from R_COLL_MAIN where coll_name ='%s'), '%s', '%s', ' ', '%s', '%s', '%s', '%s', '%s', '%s', '1', '%s', '%s');\n",
00414 nextValStr,
00415 v_collection,
00416 ixFTL[g_data_name_ix],
00417 ixFTL[g_container_repl_enum_ix],
00418 ixFTL[g_data_type_name_ix],
00419 ixFTL[g_size_ix],
00420 newResource,
00421 ixFTL[g_path_name_ix],
00422 v_owner,
00423 cv_irods_zone,
00424 v_create_time,
00425 v_access_time
00426 );
00427
00428 wval = fprintf(FO,
00429 "insert into R_OBJT_ACCESS ( object_id, user_id, access_type_id , create_ts, modify_ts) values ( (%s), (select user_id from R_USER_MAIN where user_name = '%s'), '1200', '%s', '%s');\n",
00430 currValStr,
00431 v_owner,
00432 nowTime,
00433 nowTime);
00434
00435 if (wval < 0) {
00436 perror("fwriting data:");
00437 }
00438 nOutItems++;
00439 }
00440 }
00441 }
00442 } while (rchar!='\0');
00443 if (nOutItems > 0) {
00444 wval = fprintf(FO, "commit;\n");
00445 }
00446
00447 printf("Processed %d input lines\n",nInLines);
00448 printf("Wrote output lines for %d items\n",nOutItems);
00449 fclose(FI);
00450 fclose(FO);
00451 return(0);
00452 }
00453
00454
00455 #define DATE_FORMAT_ERR -10
00456 #define TIME_LEN 32
00457 typedef long long rodsLong_t;
00458
00459
00460
00461
00462
00463
00464
00465
00466
00467
00468
00469 void
00470 getNowStr(char *timeStr)
00471 {
00472 time_t myTime;
00473
00474 myTime = time(NULL);
00475 snprintf(timeStr, 15, "%011d", (uint) myTime);
00476 }
00477
00478 int
00479 isInteger (char *inStr)
00480 {
00481 int i;
00482 int len;
00483
00484 len = strlen(inStr);
00485
00486 for (i = 0; i < len; i++) {
00487 if (!isdigit(inStr[i])) {
00488 return (0);
00489 }
00490 }
00491 return (1);
00492 }
00493
00494 int
00495 localToUnixTime (char *localTime, char *unixTime)
00496 {
00497 time_t myTime;
00498 struct tm *mytm;
00499 time_t newTime;
00500 char s[TIME_LEN];
00501
00502 myTime = time(NULL);
00503 mytm = localtime (&myTime);
00504
00505 strncpy(s,localTime, TIME_LEN);
00506
00507 s[19] = '\0';
00508 mytm->tm_sec = atoi(&s[17]);
00509 s[16] = '\0';
00510 mytm->tm_min = atoi(&s[14]);
00511 s[13] = '\0';
00512 mytm->tm_hour = atoi(&s[11]);
00513 s[10] = '\0';
00514 mytm->tm_mday = atoi(&s[8]);
00515 s[7] = '\0';
00516 mytm->tm_mon = atoi(&s[5]) - 1;
00517 s[4] = '\0';
00518 mytm->tm_year = atoi(&s[0]) - 1900;
00519
00520 newTime = mktime(mytm);
00521 if (sizeof (newTime) == 64) {
00522 snprintf (unixTime, TIME_LEN, "%lld", (rodsLong_t) newTime);
00523 } else {
00524 snprintf (unixTime, TIME_LEN, "%d", (uint) newTime);
00525 }
00526 return (0);
00527 }
00528
00529
00530
00531
00532
00533
00534
00535
00536
00537
00538
00539
00540
00541
00542
00543
00544
00545
00546
00547
00548 int
00549 checkDateFormat(char *s)
00550 {
00551
00552 int len;
00553 char t[] = "0000-00-00.00:00:00";
00554 char outUnixTime[TIME_LEN];
00555 int status;
00556 int offset = 0;
00557
00558 if (isInteger (s))
00559 return (0);
00560
00561 len = strlen(s);
00562
00563 if (s[len - 1] == 's') {
00564
00565 s[len - 1] = '\0';
00566 offset = atoi (s);
00567 snprintf (s, 19, "%d", offset);
00568 return 0;
00569 } else if (s[len - 1] == 'm') {
00570
00571 s[len - 1] = '\0';
00572 offset = atoi (s) * 60;
00573 snprintf (s, 19, "%d", offset);
00574 return 0;
00575 } else if (s[len - 1] == 'h') {
00576
00577 s[len - 1] = '\0';
00578 offset = atoi (s) * 3600;
00579 snprintf (s, 19, "%d", offset);
00580 return 0;
00581 } else if (s[len - 1] == 'd') {
00582
00583 s[len - 1] = '\0';
00584 offset = atoi (s) * 3600 * 24;
00585 snprintf (s, 19, "%d", offset);
00586 return 0;
00587 } else if (s[len - 1] == 'y') {
00588
00589 s[len - 1] = '\0';
00590 offset = atoi (s) * 3600 * 24 * 365;
00591 snprintf (s, 19, "%d", offset);
00592 return 0;
00593 } else if (len < 19) {
00594
00595 if (isdigit(s[0]) && isdigit(s[1]) && isdigit(s[2]) && isdigit(s[3])) {
00596
00597 strcat(s,(char *)&t[len]);
00598 } else {
00599
00600 int mypos;
00601
00602
00603 mypos = len - 1;
00604 while (mypos >= 0) {
00605 if (isdigit (s[mypos]))
00606 offset += s[mypos] - 48;
00607 else
00608 return (DATE_FORMAT_ERR);
00609
00610 mypos--;
00611 if (mypos >= 0)
00612 if (isdigit (s[mypos]))
00613 offset += 10 * (s[mypos] - 48);
00614 else
00615 return (DATE_FORMAT_ERR);
00616 else
00617 break;
00618
00619 mypos--;
00620 if (mypos >= 0)
00621 if (s[mypos] != ':') return (DATE_FORMAT_ERR);
00622
00623
00624 mypos--;
00625 if (mypos >= 0)
00626 if (isdigit (s[mypos]))
00627 offset += 60 * (s[mypos] - 48);
00628 else
00629 return (DATE_FORMAT_ERR);
00630 else
00631 break;
00632
00633 mypos--;
00634 if (mypos >= 0)
00635 if (isdigit (s[mypos]))
00636 offset += 10 * 60 * (s[mypos] - 48);
00637 else
00638 return (DATE_FORMAT_ERR);
00639 else
00640 break;
00641
00642 mypos--;
00643 if (mypos >= 0)
00644 if (s[mypos] != ':') return (DATE_FORMAT_ERR);
00645
00646
00647 mypos--;
00648 if (mypos >= 0)
00649 if (isdigit (s[mypos]))
00650 offset += 3600 * (s[mypos] - 48);
00651 else
00652 return (DATE_FORMAT_ERR);
00653 else
00654 break;
00655
00656 mypos--;
00657 if (mypos >= 0)
00658 if (isdigit (s[mypos]))
00659 offset += 10 * 3600 * (s[mypos] - 48);
00660 else
00661 return (DATE_FORMAT_ERR);
00662 else
00663 break;
00664
00665 mypos--;
00666 if (mypos >= 0)
00667 if (s[mypos] != '.') return (DATE_FORMAT_ERR);
00668
00669
00670
00671 mypos--;
00672 if (mypos >= 0)
00673 if (isdigit (s[mypos]))
00674 offset += 24 * 3600 * (s[mypos] - 48);
00675 else
00676 return (DATE_FORMAT_ERR);
00677 else
00678 break;
00679
00680 mypos--;
00681 if (mypos >= 0)
00682 if (isdigit (s[mypos]))
00683 offset += 10 * 24 * 3600 * (s[mypos] - 48);
00684 else
00685 return (DATE_FORMAT_ERR);
00686 else
00687 break;
00688 }
00689 snprintf (s, 19, "%d", offset);
00690 return (0);
00691 }
00692 }
00693
00694 if (isdigit(s[0]) && isdigit(s[1]) && isdigit(s[2]) && isdigit(s[3]) &&
00695 isdigit(s[5]) && isdigit(s[6]) && isdigit(s[8]) && isdigit(s[9]) &&
00696 isdigit(s[11]) && isdigit(s[12]) && isdigit(s[14]) && isdigit(s[15]) &&
00697 isdigit(s[17]) && isdigit(s[18]) &&
00698 s[4] == '-' && s[7] == '-' && s[10] == '.' &&
00699 s[13] == ':' && s[16] == ':' ) {
00700 status = localToUnixTime (s, outUnixTime);
00701 if (status >= 0) {
00702 strncpy (s, outUnixTime, TIME_LEN);
00703 }
00704 return(status);
00705 } else {
00706 return(DATE_FORMAT_ERR);
00707 }
00708 }