00001 #include <stdio.h>
00002 #include <stdlib.h>
00003 #include <string.h>
00004 #include <time.h>
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038
00039
00040
00041 char cv_srb_zone[] = "galvin";
00042 char cv_irods_zone[] = "tempZone";
00043
00044
00045
00046
00047 char *cv_srb_resources[]={"sdsc-mda18-fs","silas"};
00048 char *cv_irods_resources[]={"fs1", "i-silas"};
00049 int nResc=2;
00050
00051
00052
00053 #define CAT_POSTGRES 1
00054
00055
00056
00057
00058 char *cv_srb_usernames[]={"srbAdmin", "vidarch"};
00059
00060 char *cv_irods_usernames[]={"rods", "nvidar"};
00061
00062 int nUserNames=2;
00063 char *cv_srb_userdomains[]={"sils"};
00064 int nUserDomains=1;
00065
00066
00067
00068
00069
00070 #ifdef CAT_POSTGRES
00071 char nextValStr[]="select nextval('R_ObjectID')";
00072 char currValStr[]="select currval('R_ObjectID')";
00073 #endif
00074 #ifdef CAT_ORACLE
00075 char nextValStr[]="R_ObjectID.nextval";
00076 char currValStr[]="R_ObjectID.currval";
00077 #endif
00078 #ifdef CAT_MYSQL
00079 char nextValStr[]="%s_nextval()";
00080 char currValStr[]="%s_currval()";
00081 #endif
00082
00083
00084
00085
00086 char nowTime[]="01273184051";
00087
00088
00089 int g_data_name_ix=-1;
00090 int g_resc_name_ix=-1;
00091 int g_data_type_name_ix=-1;
00092 int g_path_name_ix=-1;
00093 int g_size_ix=-1;
00094 int g_data_owner_ix=-1;
00095 int g_data_owner_domain_ix=-1;
00096 int g_data_create_timestamp_ix=-1;
00097 int g_data_last_access_timestamp_ix=-1;
00098 int g_container_repl_enum_ix=-1;
00099 int g_collection_cont_name_ix=-1;
00100 int g_max_ix=-1;
00101
00102 void
00103 getNowStr(char *timeStr);
00104
00105 void
00106 setNowTime() {
00107 getNowStr(nowTime);
00108 printf("Current time as iRODS integer time: %s\n", nowTime);
00109 }
00110
00111 int
00112 checkDateFormat(char *s);
00113
00114 int
00115 findIndex(char *inLine, char *inItem) {
00116 char *cp1, *cp2;
00117 char matchStr[200];
00118 int ix;
00119 strncpy(matchStr, inItem, sizeof(matchStr));
00120 matchStr[ sizeof( matchStr )-1 ] = '\0';
00121 strncat(matchStr, "|", sizeof(matchStr));
00122 cp1 = strstr(inLine, matchStr);
00123 if (cp1==NULL) {
00124 printf("Needed item %s not found, exiting\n",inItem);
00125 exit(-5);
00126 }
00127 ix=0;
00128 for (cp2=inLine;cp2<cp1;cp2++) {
00129 if (*cp2=='|') ix++;
00130 }
00131 printf("%d %s\n",ix,inItem);
00132 if (ix > g_max_ix) g_max_ix=ix;
00133 return(ix);
00134 }
00135
00136 void
00137 setGlobalIndexes(char *inLine) {
00138 g_max_ix=-1;
00139 g_data_name_ix=findIndex(inLine,"DATA_NAME");
00140 g_resc_name_ix=findIndex(inLine,"RSRC_NAME");
00141 g_path_name_ix=findIndex(inLine,"PATH_NAME");
00142 g_data_type_name_ix=findIndex(inLine,"DATA_TYP_NAME");
00143 g_size_ix=findIndex(inLine,"SIZE");
00144 g_data_owner_ix=findIndex(inLine,"DATA_OWNER");
00145 g_data_owner_domain_ix=findIndex(inLine,"DATA_OWNER_DOMAIN");
00146 g_data_create_timestamp_ix=findIndex(inLine,"DATA_CREATE_TIMESTAMP");
00147 g_data_last_access_timestamp_ix=
00148 findIndex(inLine,"DATA_LAST_ACCESS_TIMESTAMP");
00149 g_container_repl_enum_ix=findIndex(inLine,"CONTAINER_REPL_ENUM");
00150 g_collection_cont_name_ix=findIndex(inLine,"COLLECTION_CONT_NAME");
00151 }
00152
00153
00154 int
00155 checkDoCollection(char *inColl) {
00156 char testColl[2000];
00157 if (strstr(inColl, "/container")==inColl) {
00158 return(0);
00159 }
00160 if (strstr(inColl, "/home/")==inColl) {
00161 return(1);
00162 }
00163
00164 testColl[0]='/';
00165 testColl[1]='\0';
00166 strcat(testColl, cv_srb_zone);
00167 strcat(testColl, "/container");
00168 if (strstr(inColl, testColl)==inColl) {
00169 return(0);
00170 }
00171
00172 testColl[0]='/';
00173 testColl[1]='\0';
00174 strcat(testColl, cv_srb_zone);
00175 strcat(testColl, "/trash");
00176 if (strstr(inColl, testColl)==inColl) {
00177 return(1);
00178 }
00179 testColl[0]='/';
00180 testColl[1]='\0';
00181 strcat(testColl, cv_srb_zone);
00182 strcat(testColl, "/");
00183 if (strstr(inColl, testColl)==inColl) {
00184 char *cp2, *cp3;
00185 cp2 = rindex(inColl, '/');
00186 cp3 = inColl + strlen(testColl);
00187 if (cp2 < cp3) {
00188
00189 return(0);
00190 }
00191 }
00192 testColl[0]='/';
00193 testColl[1]='\0';
00194 strcat(testColl, cv_srb_zone);
00195 strcat(testColl, "/");
00196 if (strstr(inColl, testColl)!=inColl) {
00197
00198 return(0);
00199 }
00200 return(1);
00201 }
00202
00203 char *
00204 convertUser(char *inUser, char *inDomain) {
00205 int i;
00206 static char newUserName[100];
00207 for (i=0;i<nUserNames;i++) {
00208 if (strcmp(cv_srb_usernames[i],inUser)==0) {
00209 return(cv_irods_usernames[i]);
00210 }
00211 }
00212 strcpy(newUserName, inUser);
00213 strcat(newUserName, "#");
00214 strcat(newUserName, inDomain);
00215 return(newUserName);
00216 }
00217
00218 char *
00219 convertTime(char *inTime) {
00220 static char myTime[50];
00221 int status;
00222
00223 strncpy(myTime, inTime, sizeof(myTime));
00224 if (myTime[10]=='-') {
00225 myTime[10]='.';
00226 }
00227 if (myTime[13]=='.') {
00228 myTime[13]=':';
00229 }
00230 if (myTime[16]=='.') {
00231 myTime[16]=':';
00232 }
00233
00234 status=checkDateFormat(myTime);
00235 if (status) {
00236 printf("convertTime checkDateFormat error");
00237 }
00238
00239 return(myTime);
00240 }
00241
00242 char *
00243 convertCollection(char *inColl) {
00244 static char outColl[2000];
00245 char outColl2[2000];
00246 char testStr[50];
00247 char *cp;
00248 int i,j;
00249 strncpy(outColl, inColl, sizeof(outColl));
00250
00251 if (strstr(inColl, "/home/")==inColl) {
00252 strcpy(outColl, cv_irods_zone);
00253 strcat(outColl, inColl);
00254 }
00255
00256 strcpy(testStr, "/");
00257 strcat(testStr, cv_srb_zone);
00258 strcat(testStr, "/");
00259 cp=strstr(outColl, testStr);
00260 if (cp != NULL) {
00261 int count;
00262 outColl2[0]='\0';
00263 count = cp-(char*)&outColl;
00264 strncpy(outColl2, outColl, count);
00265 outColl2[count]='\0';
00266 strcat(outColl2, "/");
00267 strcat(outColl2, cv_irods_zone);
00268 strcat(outColl2, cp+strlen(cv_srb_zone)+1);
00269 strcpy(outColl, outColl2);
00270 }
00271 for (i=0;i<nUserDomains;i++) {
00272 for (j=0;j<nUserNames;j++) {
00273 strcpy(testStr, "/");
00274 strcat(testStr, cv_srb_usernames[j]);
00275 strcat(testStr, ".");
00276 strcat(testStr, cv_srb_userdomains[i]);
00277 cp=strstr(outColl, testStr);
00278 if (cp != NULL) {
00279 int count;
00280 outColl2[0]='\0';
00281 count = cp-(char*)&outColl;
00282 strncpy(outColl2, outColl, count);
00283 outColl2[count]='\0';
00284 strcat(outColl2, "/");
00285 strcat(outColl2, cv_irods_usernames[j]);
00286 strcat(outColl2, cp+strlen(testStr));
00287 strcpy(outColl, outColl2);
00288 }
00289 }
00290 }
00291
00292 return(outColl);
00293 }
00294
00295 int
00296 main(argc, argv)
00297 int argc;
00298 char **argv;
00299 {
00300 FILE *FI, *FO;
00301 int wval;
00302 char *rchar;
00303 char buf[1024];
00304 int nInLines=0;
00305 int nOutItems=0;
00306 int i;
00307 char *ixFTL[50];
00308 int maxIxFTL=50;
00309 int nIxFTL=0;
00310 int doDataInsert;
00311 char *v_collection;
00312 char *v_create_time;
00313 char *v_access_time;
00314 char *v_owner;
00315 char *v_owner_domain;
00316 char *newResource;
00317
00318 setNowTime();
00319
00320 if (argc < 3) {
00321 printf("a.out file-in file-out\n");
00322 return(-1);
00323 }
00324
00325 FI = fopen(argv[1],"r");
00326 if (FI==0)
00327 {
00328 fprintf(stderr,"can't open input file %s\n",argv[1]);
00329 return(-2);
00330 }
00331
00332 FO = fopen(argv[2],"w");
00333 if (FO==0)
00334 {
00335 fprintf(stderr,"can't open output file %s\n",argv[2]);
00336 fclose( FI );
00337 return(-3);
00338 }
00339
00340 memset(&buf, 0, (size_t)sizeof(buf));
00341 rchar='\0';
00342 do {
00343 rchar = fgets(&buf[0], sizeof(buf), FI);
00344 if (rchar=='\0') {
00345 break;
00346 }
00347 nInLines++;
00348 if (nInLines==1) {
00349 if (strstr(buf, "GET_CHANGED_DATA_CORE_INFO")==0) {
00350 printf("This program only handles Spull.log.data type files.\n");
00351 return(-4);
00352 }
00353 }
00354 if (nInLines==2) {
00355 setGlobalIndexes(buf);
00356 }
00357 if (nInLines > 2) {
00358 for (i=0;i<maxIxFTL;i++) {
00359 ixFTL[i]=0;
00360 }
00361 ixFTL[0]=&buf[0];
00362 nIxFTL=1;
00363 for (i=0;i<sizeof(buf);i++) {
00364 if (buf[i]=='\0') break;
00365 if (buf[i]=='\n') {
00366 buf[i]='\0';
00367 break;
00368 }
00369 if (buf[i]=='|') {
00370 buf[i]='\0';
00371 ixFTL[nIxFTL++]=&buf[i+1];
00372 }
00373 }
00374
00375 if (nIxFTL < g_max_ix) {
00376 printf("Missing item(s) from line %d, exiting\n", nInLines);
00377 exit(-6);
00378 }
00379
00380
00381
00382 doDataInsert=0;
00383 for (i=0;i<nResc;i++) {
00384 if (strcmp(cv_srb_resources[i],
00385 ixFTL[g_resc_name_ix])==0) {
00386 newResource=cv_irods_resources[i];
00387 doDataInsert=1;
00388 }
00389 }
00390 if (doDataInsert==0) {
00391 printf("Not inserting data-object on resource %s\n",
00392 ixFTL[g_resc_name_ix]);
00393 }
00394
00395
00396
00397
00398
00399 if (doDataInsert) {
00400 if (checkDoCollection(ixFTL[g_collection_cont_name_ix])==1) {
00401
00402 v_collection = convertCollection(
00403 ixFTL[g_collection_cont_name_ix]);
00404 v_create_time = convertTime(
00405 ixFTL[g_data_create_timestamp_ix]);
00406 v_access_time = convertTime(
00407 ixFTL[g_data_last_access_timestamp_ix]);
00408 v_owner_domain = ixFTL[g_data_owner_domain_ix];
00409 v_owner = convertUser(ixFTL[g_data_owner_ix],
00410 v_owner_domain);
00411
00412 wval = fprintf(FO, "insert into R_DATA_MAIN (data_id, coll_id, data_name, data_repl_num, data_version, data_type_name, data_size, resc_name, data_path, data_owner_name, data_owner_zone, data_is_dirty, create_ts, modify_ts) values ((%s), (select coll_id from R_COLL_MAIN where coll_name ='%s'), '%s', '%s', ' ', '%s', '%s', '%s', '%s', '%s', '%s', '1', '%s', '%s');\n",
00413 nextValStr,
00414 v_collection,
00415 ixFTL[g_data_name_ix],
00416 ixFTL[g_container_repl_enum_ix],
00417 ixFTL[g_data_type_name_ix],
00418 ixFTL[g_size_ix],
00419 newResource,
00420 ixFTL[g_path_name_ix],
00421 v_owner,
00422 cv_irods_zone,
00423 v_create_time,
00424 v_access_time
00425 );
00426
00427 wval = fprintf(FO,
00428 "insert into R_OBJT_ACCESS ( object_id, user_id, access_type_id , create_ts, modify_ts) values ( (%s), (select user_id from R_USER_MAIN where user_name = '%s'), '1200', '%s', '%s');\n",
00429 currValStr,
00430 v_owner,
00431 nowTime,
00432 nowTime);
00433
00434 if (wval < 0) {
00435 perror("fwriting data:");
00436 }
00437 nOutItems++;
00438 }
00439 }
00440 }
00441 } while (rchar!='\0');
00442 if (nOutItems > 0) {
00443 wval = fprintf(FO, "commit;\n");
00444 }
00445
00446 printf("Processed %d input lines\n",nInLines);
00447 printf("Wrote output lines for %d items\n",nOutItems);
00448 fclose(FI);
00449 fclose(FO);
00450 return(0);
00451 }
00452
00453
00454 #define DATE_FORMAT_ERR -10
00455 #define TIME_LEN 32
00456 typedef long long rodsLong_t;
00457
00458
00459
00460
00461
00462
00463
00464
00465
00466
00467
00468 void
00469 getNowStr(char *timeStr)
00470 {
00471 time_t myTime;
00472
00473 myTime = time(NULL);
00474 snprintf(timeStr, 15, "%011d", (uint) myTime);
00475 }
00476
00477 int
00478 isInteger (char *inStr)
00479 {
00480 int i;
00481 int len;
00482
00483 len = strlen(inStr);
00484
00485 for (i = 0; i < len; i++) {
00486 if (!isdigit(inStr[i])) {
00487 return (0);
00488 }
00489 }
00490 return (1);
00491 }
00492
00493 int
00494 localToUnixTime (char *localTime, char *unixTime)
00495 {
00496 time_t myTime;
00497 struct tm *mytm;
00498 time_t newTime;
00499 char s[TIME_LEN];
00500
00501 myTime = time(NULL);
00502 mytm = localtime (&myTime);
00503
00504 strncpy(s,localTime, TIME_LEN);
00505
00506 s[19] = '\0';
00507 mytm->tm_sec = atoi(&s[17]);
00508 s[16] = '\0';
00509 mytm->tm_min = atoi(&s[14]);
00510 s[13] = '\0';
00511 mytm->tm_hour = atoi(&s[11]);
00512 s[10] = '\0';
00513 mytm->tm_mday = atoi(&s[8]);
00514 s[7] = '\0';
00515 mytm->tm_mon = atoi(&s[5]) - 1;
00516 s[4] = '\0';
00517 mytm->tm_year = atoi(&s[0]) - 1900;
00518
00519 newTime = mktime(mytm);
00520 if (sizeof (newTime) == 64) {
00521 snprintf (unixTime, TIME_LEN, "%lld", (rodsLong_t) newTime);
00522 } else {
00523 snprintf (unixTime, TIME_LEN, "%d", (uint) newTime);
00524 }
00525 return (0);
00526 }
00527
00528
00529
00530
00531
00532
00533
00534
00535
00536
00537
00538
00539
00540
00541
00542
00543
00544
00545
00546
00547 int
00548 checkDateFormat(char *s)
00549 {
00550
00551 int len;
00552 char t[] = "0000-00-00.00:00:00";
00553 char outUnixTime[TIME_LEN];
00554 int status;
00555 int offset = 0;
00556
00557 if (isInteger (s))
00558 return (0);
00559
00560 len = strlen(s);
00561
00562 if (s[len - 1] == 's') {
00563
00564 s[len - 1] = '\0';
00565 offset = atoi (s);
00566 snprintf (s, 19, "%d", offset);
00567 return 0;
00568 } else if (s[len - 1] == 'm') {
00569
00570 s[len - 1] = '\0';
00571 offset = atoi (s) * 60;
00572 snprintf (s, 19, "%d", offset);
00573 return 0;
00574 } else if (s[len - 1] == 'h') {
00575
00576 s[len - 1] = '\0';
00577 offset = atoi (s) * 3600;
00578 snprintf (s, 19, "%d", offset);
00579 return 0;
00580 } else if (s[len - 1] == 'd') {
00581
00582 s[len - 1] = '\0';
00583 offset = atoi (s) * 3600 * 24;
00584 snprintf (s, 19, "%d", offset);
00585 return 0;
00586 } else if (s[len - 1] == 'y') {
00587
00588 s[len - 1] = '\0';
00589 offset = atoi (s) * 3600 * 24 * 365;
00590 snprintf (s, 19, "%d", offset);
00591 return 0;
00592 } else if (len < 19) {
00593
00594 if (isdigit(s[0]) && isdigit(s[1]) && isdigit(s[2]) && isdigit(s[3])) {
00595
00596 strcat(s,(char *)&t[len]);
00597 } else {
00598
00599 int mypos;
00600
00601
00602 mypos = len - 1;
00603 while (mypos >= 0) {
00604 if (isdigit (s[mypos]))
00605 offset += s[mypos] - 48;
00606 else
00607 return (DATE_FORMAT_ERR);
00608
00609 mypos--;
00610 if (mypos >= 0)
00611 if (isdigit (s[mypos]))
00612 offset += 10 * (s[mypos] - 48);
00613 else
00614 return (DATE_FORMAT_ERR);
00615 else
00616 break;
00617
00618 mypos--;
00619 if (mypos >= 0)
00620 if (s[mypos] != ':') return (DATE_FORMAT_ERR);
00621
00622
00623 mypos--;
00624 if (mypos >= 0)
00625 if (isdigit (s[mypos]))
00626 offset += 60 * (s[mypos] - 48);
00627 else
00628 return (DATE_FORMAT_ERR);
00629 else
00630 break;
00631
00632 mypos--;
00633 if (mypos >= 0)
00634 if (isdigit (s[mypos]))
00635 offset += 10 * 60 * (s[mypos] - 48);
00636 else
00637 return (DATE_FORMAT_ERR);
00638 else
00639 break;
00640
00641 mypos--;
00642 if (mypos >= 0)
00643 if (s[mypos] != ':') return (DATE_FORMAT_ERR);
00644
00645
00646 mypos--;
00647 if (mypos >= 0)
00648 if (isdigit (s[mypos]))
00649 offset += 3600 * (s[mypos] - 48);
00650 else
00651 return (DATE_FORMAT_ERR);
00652 else
00653 break;
00654
00655 mypos--;
00656 if (mypos >= 0)
00657 if (isdigit (s[mypos]))
00658 offset += 10 * 3600 * (s[mypos] - 48);
00659 else
00660 return (DATE_FORMAT_ERR);
00661 else
00662 break;
00663
00664 mypos--;
00665 if (mypos >= 0)
00666 if (s[mypos] != '.') return (DATE_FORMAT_ERR);
00667
00668
00669
00670 mypos--;
00671 if (mypos >= 0)
00672 if (isdigit (s[mypos]))
00673 offset += 24 * 3600 * (s[mypos] - 48);
00674 else
00675 return (DATE_FORMAT_ERR);
00676 else
00677 break;
00678
00679 mypos--;
00680 if (mypos >= 0)
00681 if (isdigit (s[mypos]))
00682 offset += 10 * 24 * 3600 * (s[mypos] - 48);
00683 else
00684 return (DATE_FORMAT_ERR);
00685 else
00686 break;
00687 }
00688 snprintf (s, 19, "%d", offset);
00689 return (0);
00690 }
00691 }
00692
00693 if (isdigit(s[0]) && isdigit(s[1]) && isdigit(s[2]) && isdigit(s[3]) &&
00694 isdigit(s[5]) && isdigit(s[6]) && isdigit(s[8]) && isdigit(s[9]) &&
00695 isdigit(s[11]) && isdigit(s[12]) && isdigit(s[14]) && isdigit(s[15]) &&
00696 isdigit(s[17]) && isdigit(s[18]) &&
00697 s[4] == '-' && s[7] == '-' && s[10] == '.' &&
00698 s[13] == ':' && s[16] == ':' ) {
00699 status = localToUnixTime (s, outUnixTime);
00700 if (status >= 0) {
00701 strncpy (s, outUnixTime, TIME_LEN);
00702 }
00703 return(status);
00704 } else {
00705 return(DATE_FORMAT_ERR);
00706 }
00707 }