@article {2617, title = {Development of a Computerized Adaptive Test for Anxiety Based on the Dutch{\textendash}Flemish Version of the PROMIS Item Bank}, journal = {Assessment}, year = {In Press}, abstract = {We used the Dutch{\textendash}Flemish version of the USA PROMIS adult V1.0 item bank for Anxiety as input for developing a computerized adaptive test (CAT) to measure the entire latent anxiety continuum. First, psychometric analysis of a combined clinical and general population sample (N = 2,010) showed that the 29-item bank has psychometric properties that are required for a CAT administration. Second, a post hoc CAT simulation showed efficient and highly precise measurement, with an average number of 8.64 items for the clinical sample, and 9.48 items for the general population sample. Furthermore, the accuracy of our CAT version was highly similar to that of the full item bank administration, both in final score estimates and in distinguishing clinical subjects from persons without a mental health disorder. We discuss the future directions and limitations of CAT development with the Dutch{\textendash}Flemish version of the PROMIS Anxiety item bank.}, doi = {10.1177/1073191117746742}, url = {https://doi.org/10.1177/1073191117746742}, author = {Gerard Flens and Niels Smits and Caroline B. Terwee and Joost Dekker and Irma Huijbrechts and Philip Spinhoven and Edwin de Beurs} } @article {2732, title = {A Dynamic Stratification Method for Improving Trait Estimation in Computerized Adaptive Testing Under Item Exposure Control}, journal = {Applied Psychological Measurement}, volume = {44}, number = {3}, year = {2020}, pages = {182-196}, abstract = {When computerized adaptive testing (CAT) is under stringent item exposure control, the precision of trait estimation will substantially decrease. A new item selection method, the dynamic Stratification method based on Dominance Curves (SDC), which is aimed at improving trait estimation, is proposed to mitigate this problem. The objective function of the SDC in item selection is to maximize the sum of test information for all examinees rather than maximizing item information for individual examinees at a single-item administration, as in conventional CAT. To achieve this objective, the SDC uses dominance curves to stratify an item pool into strata with the number being equal to the test length to precisely and accurately increase the quality of the administered items as the test progresses, reducing the likelihood that a high-discrimination item will be administered to an examinee whose ability is not close to the item difficulty. Furthermore, the SDC incorporates a dynamic process for on-the-fly item{\textendash}stratum adjustment to optimize the use of quality items. Simulation studies were conducted to investigate the performance of the SDC in CAT under item exposure control at different levels of severity. According to the results, the SDC can efficiently improve trait estimation in CAT through greater precision and more accurate trait estimation than those generated by other methods (e.g., the maximum Fisher information method) in most conditions.}, doi = {10.1177/0146621619843820}, url = {https://doi.org/10.1177/0146621619843820}, author = {Jyun-Hong Chen and Hsiu-Yi Chao and Shu-Ying Chen} } @article {2740, title = {Developing Multistage Tests Using D-Scoring Method}, journal = {Educational and Psychological Measurement}, volume = {79}, number = {5}, year = {2019}, pages = {988-1008}, abstract = {The D-scoring method for scoring and equating tests with binary items proposed by Dimitrov offers some of the advantages of item response theory, such as item-level difficulty information and score computation that reflects the item difficulties, while retaining the merits of classical test theory such as the simplicity of number correct score computation and relaxed requirements for model sample sizes. Because of its unique combination of those merits, the D-scoring method has seen quick adoption in the educational and psychological measurement field. Because item-level difficulty information is available with the D-scoring method and item difficulties are reflected in test scores, it conceptually makes sense to use the D-scoring method with adaptive test designs such as multistage testing (MST). In this study, we developed and compared several versions of the MST mechanism using the D-scoring approach and also proposed and implemented a new framework for conducting MST simulation under the D-scoring method. Our findings suggest that the score recovery performance under MST with D-scoring was promising, as it retained score comparability across different MST paths. We found that MST using the D-scoring method can achieve improvements in measurement precision and efficiency over linear-based tests that use D-scoring method.}, doi = {10.1177/0013164419841428}, url = {https://doi.org/10.1177/0013164419841428}, author = {Kyung (Chris) T. Han and Dimiter M. Dimitrov and Faisal Al-Mashary} } @conference {2657, title = {Developing a CAT: An Integrated Perspective}, booktitle = {IACAT 2017 Conference}, year = {2017}, month = {08/2017}, publisher = {Niigata Seiryo University}, organization = {Niigata Seiryo University}, address = {Niigata, Japan}, abstract = {

Most resources on computerized adaptive testing (CAT) tend to focus on psychometric aspects such as mathematical formulae for item selection or ability estimation. However, development of a CAT assessment requires a holistic view of project management, financials, content development, product launch and branding, and more. This presentation will develop such a holistic view, which serves several purposes, including providing a framework for validity, estimating costs and ROI, and making better decisions regarding the psychometric aspects.

Thompson and Weiss (2011) presented a 5-step model for developing computerized adaptive tests (CATs). This model will be presented and discussed as the core of this holistic framework, then applied to real-life examples. While most CAT research focuses on developing new quantitative algorithms, this presentation is instead intended to help researchers evaluate and select algorithms that are most appropriate for their needs. It is therefore ideal for practitioners that are familiar with the basics of item response theory and CAT, and wish to explore how they might apply these methodologies to improve their assessments.

Steps include:

1. Feasibility, applicability, and planning studies

2. Develop item bank content or utilize existing bank

3. Pretest and calibrate item bank

4. Determine specifications for final CAT

5. Publish live CAT.

So, for example, Step 1 will contain simulation studies which estimate item bank requirements, which then can be used to determine costs of content development, which in turn can be integrated into an estimated project cost timeline. Such information is vital in determining if the CAT should even be developed in the first place.

References

Thompson, N. A., \& Weiss, D. J. (2011). A Framework for the Development of Computerized Adaptive Tests. Practical Assessment, Research \& Evaluation, 16(1). Retrieved from http://pareonline.net/getvn.asp?v=16\&n=1.

Session Video

}, keywords = {CAT Development, integrated approach}, url = {https://drive.google.com/open?id=1Jv8bpH2zkw5TqSMi03e5JJJ98QtXf-Cv}, author = {Nathan Thompson} } @article {2615, title = {Development of a Computer Adaptive Test for Depression Based on the Dutch-Flemish Version of the PROMIS Item Bank}, journal = {Evaluation \& the Health Professions}, volume = {40}, number = {1}, year = {2017}, pages = {79-105}, abstract = {We developed a Dutch-Flemish version of the patient-reported outcomes measurement information system (PROMIS) adult V1.0 item bank for depression as input for computerized adaptive testing (CAT). As item bank, we used the Dutch-Flemish translation of the original PROMIS item bank (28 items) and additionally translated 28 U.S. depression items that failed to make the final U.S. item bank. Through psychometric analysis of a combined clinical and general population sample (N = 2,010), 8 added items were removed. With the final item bank, we performed several CAT simulations to assess the efficiency of the extended (48 items) and the original item bank (28 items), using various stopping rules. Both item banks resulted in highly efficient and precise measurement of depression and showed high similarity between the CAT simulation scores and the full item bank scores. We discuss the implications of using each item bank and stopping rule for further CAT development.}, doi = {10.1177/0163278716684168}, url = {https://doi.org/10.1177/0163278716684168}, author = {Gerard Flens and Niels Smits and Caroline B. Terwee and Joost Dekker and Irma Huijbrechts and Edwin de Beurs} } @conference {2668, title = {The Development of a Web-Based CAT in China}, booktitle = {IACAT 2017 Conference}, year = {2017}, month = {08/2017}, publisher = {Niigata Seiryo University}, organization = {Niigata Seiryo University}, address = {Niigata, Japan}, abstract = {

Cognitive ability assessment has been widely used as the recruitment tool in hiring potential employees. Traditional cognitive ability tests have been encountering threats from item-exposures and long time for answering. Especially in China, campus recruitment thinks highly of short answering time and anti-cheating. Beisen, as the biggest native online assessment software provider, developed a web-based CAT for cognitive ability which assessing verbal, quantitative, logical and spatial ability in order to decrease answering times, improve assessment accuracy and reduce threats from cheating and faking in online ability test. The web-based test provides convenient testing for examinees who can access easily to the test via internet just by login the test website at any time and any place through any Internet-enabled devices (e.g., laptops, IPADs, and smart phones).

We designed the CAT following strategies of establishing item bank, setting starting point, item selection, scoring and terminating. Additionally, we pay close attention to administrating the test via web. For the CAT procedures, we employed online calibration for establishing a stable and expanding item bank, and integrated maximum Fisher information, \α-stratified strategy and randomization for item selection and coping with item exposures. Fixed-length and variable-length strategies were combined in terminating the test. For fulfilling the fluid web-based testing, we employed cloud computing techniques and designed each computing process subtly. Distributed computation was used to process scoring which executes EAP and item selecting at high speed. Caching all items to the servers in advance helps shortening the process of loading items to examinees\’ terminal equipment. Horizontally scalable cloud servers function coping with great concurrency. The massive computation in item selecting was conversed to searching items from an information matrix table.

We examined the average accuracy, bank usage and computing performance in the condition of laboratory and real testing. According to a test for almost 28000 examinees, we found that bank usage is averagely 50\%, and that 80\% tests terminate at test information of 10 and averagely at 9.6. In context of great concurrency, the testing is unhindered and the process of scoring and item selection only takes averagely 0.23s for each examiner.

Session Video

}, keywords = {China, Web-Based CAT}, author = {Chongli Liang and Danjun Wang and Dan Zhou and Peida Zhan} } @article {2597, title = {The Development of MST Test Information for the Prediction of Test Performances}, journal = {Educational and Psychological Measurement}, volume = {77}, number = {4}, year = {2017}, pages = {570-586}, abstract = {The current study proposes novel methods to predict multistage testing (MST) performance without conducting simulations. This method, called MST test information, is based on analytic derivation of standard errors of ability estimates across theta levels. We compared standard errors derived analytically to the simulation results to demonstrate the validity of the proposed method in both measurement precision and classification accuracy. The results indicate that the MST test information effectively predicted the performance of MST. In addition, the results of the current study highlighted the relationship among the test construction, MST design factors, and MST performance.}, doi = {10.1177/0013164416662960}, url = {http://dx.doi.org/10.1177/0013164416662960}, author = {Ryoungsun Park and Jiseon Kim and Hyewon Chung and Barbara G. Dodd} } @conference {2644, title = {DIF-CAT: Doubly Adaptive CAT Using Subgroup Information to Improve Measurement Precision}, booktitle = {IACAT 2017 Conference}, year = {2017}, month = {08/2017}, publisher = {Niigata Seiryo University}, organization = {Niigata Seiryo University}, address = {Niigata, Japan}, abstract = {

Differential item functioning (DIF) is usually regarded as a test fairness issue in high-stakes tests. In low-stakes tests, it is more of an accuracy problem. However, in low-stakes tests, the same method, deleting items that demonstrate significant DIF, is still employed to treat DIF items. When political concerns are not important, such as in low-stakes tests and instruments that are not used to make decisions about people, deleting items might not be optimal. Computerized adaptive testing (CAT) is more and more frequently used in low-stakes tests. The DIF-CAT method evaluated in this research is designed to cope with DIF in a CAT environment. Using this method, item parameters are separately estimated for the focal group and the reference group in a DIF study, then CATs are administered based on different sets of item parameters for the focal and reference groups.

To evaluate the performance of the DIF-CAT procedure, it was compared in a simulation study to (1) deleting all the DIF items in a CAT bank and (2) ignoring DIF. A 300-item flat item bank and a 300-item peaked item bank were simulated using the three-parameter logistic IRT model with D = 1,7. 40\% of the items in each bank showed DIF. The DIF size was b and/or a = 0.5 while original b ranged from -3 to 3 and a ranged from 0.3 to 2.1. Three types of DIF were considered: (1) uniform DIF caused by differences in b, non-uniform DIF caused by differences in a, and non-uniform DIF caused by differences in both a and b. 500 normally distributed simulees in each of reference and focal groups were used in item parameter re-calibration. In the Delete DIF method, only DIF-free items were calibrated. In the Ignore DIF method, all the items were calibrated using all simulees without differentiating the groups. In the DIF-CAT method, the DIF-free items were used as anchor items to estimate the item parameters for the focal and reference groups and the item parameters from recalibration were used. All simulees used the same item parameters in the Delete method and the Ignore method. CATs for simulees within the two groups used group-specific item parameters in the DIF-CAT method. In the CAT stage, 100 simulees were generated for each of the reference and focal groups, at each of six discrete q levels ranging from -2.5 to 2.5. CAT test length was fixed at 40 items. Bias, average absolute difference, RMSE, standard error of \θ estimates, and person fit, were used to compare the performance of the DIF methods. DIF item usage was also recorded for the Ignore method and the DIF-CAT method.

Generally, the DIF-CAT method outperformed both the Delete method and the Ignore method in dealing with DIF items in CAT. The Delete method, which is the most frequently used method for handling DIF, performed the worst of the three methods in a CAT environment, as reflected in multiple\ indices of measurement precision. Even the Ignore method, which simply left DIF items in the item bank, provided \θ estimates of higher precision than the Delete method. This poor performance of the Delete method was probably due to reduction in size of the item bank available for each CAT.

Session Video

}, keywords = {DIF-CAT, Doubly Adaptive CAT, Measurement Precision, subgroup information}, url = {https://drive.google.com/open?id=1Gu4FR06qM5EZNp_Ns0Kt3HzBqWAv3LPy}, author = {Joy Wang and David J. Weiss and Chun Wang} } @article {2580, title = {Dual-Objective Item Selection Criteria in Cognitive Diagnostic Computerized Adaptive Testing}, journal = {Journal of Educational Measurement}, volume = {54}, number = {2}, year = {2017}, pages = {165{\textendash}183}, abstract = {The development of cognitive diagnostic-computerized adaptive testing (CD-CAT) has provided a new perspective for gaining information about examinees{\textquoteright} mastery on a set of cognitive attributes. This study proposes a new item selection method within the framework of dual-objective CD-CAT that simultaneously addresses examinees{\textquoteright} attribute mastery status and overall test performance. The new procedure is based on the Jensen-Shannon (JS) divergence, a symmetrized version of the Kullback-Leibler divergence. We show that the JS divergence resolves the noncomparability problem of the dual information index and has close relationships with Shannon entropy, mutual information, and Fisher information. The performance of the JS divergence is evaluated in simulation studies in comparison with the methods available in the literature. Results suggest that the JS divergence achieves parallel or more precise recovery of latent trait variables compared to the existing methods and maintains practical advantages in computation and item pool usage.}, issn = {1745-3984}, doi = {10.1111/jedm.12139}, url = {http://dx.doi.org/10.1111/jedm.12139}, author = {Kang, Hyeon-Ah and Zhang, Susu and Chang, Hua-Hua} } @article {2345, title = {Detecting Item Preknowledge in Computerized Adaptive Testing Using Information Theory and Combinatorial Optimization}, journal = {Journal of Computerized Adaptive Testing}, volume = {2}, year = {2014}, pages = {37-58}, keywords = {combinatorial optimization, hypothesis testing, item preknowledge, Kullback-Leibler divergence, simulated annealing., test security}, issn = {2165-6592}, doi = {10.7333/1410-0203037}, url = {http://www.iacat.org/jcat/index.php/jcat/article/view/36/18}, author = {Belov, D. I.} } @article {2350, title = {Determining the Overall Impact of Interruptions During Online Testing}, journal = {Journal of Educational Measurement}, volume = {51}, number = {4}, year = {2014}, pages = {419{\textendash}440}, abstract = {

With an increase in the number of online tests, interruptions during testing due to unexpected technical issues seem unavoidable. For example, interruptions occurred during several recent state tests. When interruptions occur, it is important to determine the extent of their impact on the examinees\’ scores. There is a lack of research on this topic due to the novelty of the problem. This article is an attempt to fill that void. Several methods, primarily based on propensity score matching, linear regression, and item response theory, were suggested to determine the overall impact of the interruptions on the examinees\’ scores. A realistic simulation study shows that the suggested methods have satisfactory Type I error rate and power. Then the methods were applied to data from the Indiana Statewide Testing for Educational Progress-Plus (ISTEP+) test that experienced interruptions in 2013. The results indicate that the interruptions did not have a significant overall impact on the student scores for the ISTEP+ test.

}, issn = {1745-3984}, doi = {10.1111/jedm.12052}, url = {http://dx.doi.org/10.1111/jedm.12052}, author = {Sinharay, Sandip and Wan, Ping and Whitaker, Mike and Kim, Dong-In and Zhang, Litong and Choi, Seung W.} } @article {2281, title = {Deriving Stopping Rules for Multidimensional Computerized Adaptive Testing}, journal = {Applied Psychological Measurement}, volume = {37}, number = {2}, year = {2013}, pages = {99-122}, abstract = {

Multidimensional computerized adaptive testing (MCAT) is able to provide a vector of ability estimates for each examinee, which could be used to provide a more informative profile of an examinee\’s performance. The current literature on MCAT focuses on the fixed-length tests, which can generate less accurate results for those examinees whose abilities are quite different from the average difficulty level of the item bank when there are only a limited number of items in the item bank. Therefore, instead of stopping the test with a predetermined fixed test length, the authors use a more informative stopping criterion that is directly related to measurement accuracy. Specifically, this research derives four stopping rules that either quantify the measurement precision of the ability vector (i.e., minimum determinant rule [D-rule], minimum eigenvalue rule [E-rule], and maximum trace rule [T-rule]) or quantify the amount of available information carried by each item (i.e., maximum Kullback\–Leibler divergence rule [K-rule]). The simulation results showed that all four stopping rules successfully terminated the test when the mean squared error of ability estimation is within a desired range, regardless of examinees\’ true abilities. It was found that when using the D-, E-, or T-rule, examinees with extreme abilities tended to have tests that were twice as long as the tests received by examinees with moderate abilities. However, the test length difference with K-rule is not very dramatic, indicating that K-rule may not be very sensitive to measurement precision. In all cases, the cutoff value for each stopping rule needs to be adjusted on a case-by-case basis to find an optimal solution.

}, doi = {10.1177/0146621612463422}, url = {http://apm.sagepub.com/content/37/2/99.abstract}, author = {Wang, Chun and Chang, Hua-Hua and Boughton, Keith A.} } @article {2205, title = {Detecting Local Item Dependence in Polytomous Adaptive Data}, journal = {Journal of Educational Measurement}, volume = {49}, number = {2}, year = {2012}, pages = {127{\textendash}147}, abstract = {

A rapidly expanding arena for item response theory (IRT) is in attitudinal and health-outcomes survey applications, often with polytomous items. In particular, there is interest in computer adaptive testing (CAT). Meeting model assumptions is necessary to realize the benefits of IRT in this setting, however. Although initial investigations of local item dependence have been studied both for polytomous items in fixed-form settings and for dichotomous items in CAT settings, there have been no publications applying local item dependence detection methodology to polytomous items in CAT despite its central importance to these applications. The current research uses a simulation study to investigate the extension of widely used pairwise statistics, Yen\&$\#$39;s Q3 Statistic and Pearson\&$\#$39;s Statistic X2, in this context. The simulation design and results are contextualized throughout with a real item bank of this type from the Patient-Reported Outcomes Measurement Information System (PROMIS).

}, issn = {1745-3984}, doi = {10.1111/j.1745-3984.2012.00165.x}, url = {http://dx.doi.org/10.1111/j.1745-3984.2012.00165.x}, author = {Mislevy, Jessica L. and Rupp, Andr{\'e} A. and Harring, Jeffrey R.} } @article {2263, title = {Development of a computerized adaptive test for depression}, journal = {Archives of General Psychiatry}, volume = {69}, year = {2012}, pages = {1105-1112}, doi = {10.1001/archgenpsychiatry.2012.14}, url = {WWW.ARCHGENPSYCHIATRY.COM}, author = {Robert D. Gibbons and David .J. Weiss and Paul A. Pilkonis and Ellen Frank and Tara Moore and Jong Bae Kim and David J. Kupfer} } @article {2042, title = {Design of a Computer-Adaptive Test to Measure English Literacy and Numeracy in the Singapore Workforce: Considerations, Benefits, and Implications}, journal = {Journal of Applied Testing Technology}, volume = {12}, year = {2011}, abstract = {

A computer adaptive test CAT) is a delivery methodology that serves the larger goals of the assessment system in which it is embedded. A thorough analysis of the assessment system for which a CAT is being designed is critical to ensure that the delivery platform is appropriate and addresses all relevant complexities. As such, a CAT engine must be designed to conform to the
validity and reliability of the overall system. This design takes the form of adherence to the assessment goals and objectives of the adaptive assessment system. When the assessment is adapted for use in another country, consideration must be given to any necessary revisions including content differences. This article addresses these considerations while drawing, in part, on the process followed in the development of the CAT delivery system designed to test English language workplace skills for the Singapore Workforce Development Agency. Topics include item creation and selection, calibration of the item pool, analysis and testing of the psychometric properties, and reporting and interpretation of scores. The characteristics and benefits of the CAT delivery system are detailed as well as implications for testing programs considering the use of a
CAT delivery system.

}, url = {http://www.testpublishers.org/journal-of-applied-testing-technology}, author = {Jacobsen, J. and Ackermann, R. and Eg{\"u}ez, J. and Ganguli, D. and Rickard, P. and Taylor, L.} } @conference {2083, title = {Detecting DIF between Conventional and Computerized Adaptive Testing: A Monte Carlo Study}, booktitle = {Annual Conference of the International Association for Computerized Adaptive Testing}, year = {2011}, month = {10/2011}, abstract = {

A comparison od two procedures, Modified Robust Z and 95\% Credible Interval, were compared in a Monte Carlo study. Both procedures evidenced adequate control of false positive DIF results.

}, keywords = {95\% Credible Interval, CAT, DIF, differential item function, modified robust Z statistic, Monte Carlo methodologies}, author = {Barth B. Riley and Adam C. Carle} } @inbook {2057, title = {Designing and Implementing a Multistage Adaptive Test: The Uniform CPA Exam}, booktitle = {Elements of Adaptive Testing}, year = {2010}, pages = {167-190}, chapter = {9}, doi = {10.1007/978-0-387-85461-8}, author = {Melican, G.J. and Breithaupt, K and Zhang, Y.} } @inbook {2060, title = {Designing Item Pools for Adaptive Testing}, booktitle = {Elements of Adaptive Testing}, year = {2010}, pages = {231-245}, chapter = {12}, doi = {10.1007/978-0-387-85461-8}, author = {Veldkamp, B. P. and van der Linden, W. J.} } @article {326, title = {Designing item pools to optimize the functioning of a computerized adaptive test}, journal = {Psychological Test and Assessment Modeling}, volume = {52}, number = {2}, year = {2010}, pages = {127-141}, abstract = {Computerized adaptive testing (CAT) is a testing procedure that can result in improved precision for a specified test length or reduced test length with no loss of precision. However, these attractive psychometric features of CATs are only achieved if appropriate test items are available for administration. This set of test items is commonly called an {\textquotedblleft}item pool.{\textquotedblright} This paper discusses the optimal characteristics for an item pool that will lead to the desired properties for a CAT. Then, a procedure is described for designing the statistical characteristics of the item parameters for an optimal item pool within an item response theory framework. Because true optimality is impractical, methods for achieving practical approximations to optimality are described. The results of this approach are shown for an operational testing program including comparisons to the results from the item pool currently used in that testing program.Key}, isbn = {2190-0507}, author = {Reckase, M. D.} } @inbook {2064, title = {Detecting Person Misfit in Adaptive Testing}, booktitle = {Elements of Adaptive Testing}, year = {2010}, pages = {315-329}, chapter = {16}, doi = {10.1007/978-0-387-85461-8}, author = {Meijer, R. R. and van Krimpen-Stoop, E. M. L. A.} } @article {113, title = {Detection of aberrant item score patterns in computerized adaptive testing: An empirical example using the CUSUM}, journal = {Personality and Individual Differences}, volume = {48}, number = {8}, year = {2010}, pages = {921-925}, abstract = {The scalability of individual trait scores on a computerized adaptive test (CAT) was assessed through investigating the consistency of individual item score patterns. A sample of N = 428 persons completed a personality CAT as part of a career development procedure. To detect inconsistent item score patterns, we used a cumulative sum (CUSUM) procedure. Combined information from the CUSUM, other personality measures, and interviews showed that similar estimated trait values may have a different interpretation.Implications for computer-based assessment are discussed.}, keywords = {CAT, computerized adaptive testing, CUSUM approach, person Fit}, isbn = {01918869}, author = {Egberink, I. J. L. and Meijer, R. R. and Veldkamp, B. P. and Schakel, L. and Smid, N. G.} } @article {2, title = {Deterioro de par{\'a}metros de los {\'\i}tems en tests adaptativos informatizados: estudio con eCAT [Item parameter drift in computerized adaptive testing: Study with eCAT]}, journal = {Psicothema}, volume = {22}, number = {2}, year = {2010}, note = {Abad, Francisco JOlea, JulioAguado, DavidPonsoda, VicenteBarrada, Juan REnglish AbstractSpainPsicothemaPsicothema. 2010 May;22(2):340-7.}, pages = {340-7}, edition = {2010/04/29}, abstract = {

En el presente trabajo se muestra el an\álisis realizado sobre un Test Adaptativo Informatizado (TAI) dise\ñado para la evaluaci\ón del nivel de ingl\és, denominado eCAT, con el objetivo de estudiar el deterioro de par\ámetros (parameter drift) producido desde la calibraci\ón inicial del banco de \ítems. Se ha comparado la calibraci\ón original desarrollada para la puesta en servicio del TAI (N= 3224) y la calibraci\ón actual obtenida con las aplicaciones reales del TAI (N= 7254). Se ha analizado el Funcionamiento Diferencial de los \Ítems (FDI) en funci\ón de los par\ámetros utilizados y se ha simulado el impacto que sobre el nivel de rasgo estimado tiene la variaci\ón en los par\ámetros. Los resultados muestran que se produce especialmente un deterioro de los par\ámetros a y c, que hay unimportante n\úmero de \ítems del banco para los que existe FDI y que la variaci\ón de los par\ámetros produce un impacto moderado en la estimaci\ón de \θ de los evaluados con nivel de ingl\és alto. Se concluye que los par\ámetros de los \ítems se han deteriorado y deben ser actualizados.Item parameter drift in computerized adaptive testing: Study with eCAT. This study describes the parameter drift analysis conducted on eCAT (a Computerized Adaptive Test to assess the written English level of Spanish speakers). The original calibration of the item bank (N = 3224) was compared to a new calibration obtained from the data provided by most eCAT operative administrations (N =7254). A Differential Item Functioning (DIF) study was conducted between the original and the new calibrations. The impact that the new parameters have on the trait level estimates was obtained by simulation. Results show that parameter drift is found especially for a and c parameters, an important number of bank items show DIF, and the parameter change has a moderate impact on high-level-English \θ estimates. It is then recommended to replace the original estimates by the new set. by the new set.

}, keywords = {*Software, Educational Measurement/*methods/*statistics \& numerical data, Humans, Language}, isbn = {0214-9915 (Print)0214-9915 (Linking)}, author = {Abad, F. J. and Olea, J. and Aguado, D. and Ponsoda, V. and Barrada, J} } @article {819, title = {Development and evaluation of a confidence-weighting computerized adaptive testing}, journal = {Educational Technology \& Society}, volume = {13(3)}, year = {2010}, pages = {163{\textendash}176}, author = {Yen, Y. C. and Ho, R. G. and Chen, L. J. and Chou, K. Y. and Chen, Y. L.} } @article {46, title = {Development and validation of patient-reported outcome measures for sleep disturbance and sleep-related impairments}, journal = {Sleep}, volume = {33}, number = {6}, year = {2010}, note = {Buysse, Daniel JYu, LanMoul, Douglas EGermain, AnneStover, AngelaDodds, Nathan EJohnston, Kelly LShablesky-Cade, Melissa APilkonis, Paul AAR052155/AR/NIAMS NIH HHS/United StatesU01AR52155/AR/NIAMS NIH HHS/United StatesU01AR52158/AR/NIAMS NIH HHS/United StatesU01AR52170/AR/NIAMS NIH HHS/United StatesU01AR52171/AR/NIAMS NIH HHS/United StatesU01AR52177/AR/NIAMS NIH HHS/United StatesU01AR52181/AR/NIAMS NIH HHS/United StatesU01AR52186/AR/NIAMS NIH HHS/United StatesResearch Support, N.I.H., ExtramuralValidation StudiesUnited StatesSleepSleep. 2010 Jun 1;33(6):781-92.}, month = {Jun 1}, pages = {781-92}, edition = {2010/06/17}, abstract = {STUDY OBJECTIVES: To develop an archive of self-report questions assessing sleep disturbance and sleep-related impairments (SRI), to develop item banks from this archive, and to validate and calibrate the item banks using classic validation techniques and item response theory analyses in a sample of clinical and community participants. DESIGN: Cross-sectional self-report study. SETTING: Academic medical center and participant homes. PARTICIPANTS: One thousand nine hundred ninety-three adults recruited from an Internet polling sample and 259 adults recruited from medical, psychiatric, and sleep clinics. INTERVENTIONS: None. MEASUREMENTS AND RESULTS: This study was part of PROMIS (Patient-Reported Outcomes Information System), a National Institutes of Health Roadmap initiative. Self-report item banks were developed through an iterative process of literature searches, collecting and sorting items, expert content review, qualitative patient research, and pilot testing. Internal consistency, convergent validity, and exploratory and confirmatory factor analysis were examined in the resulting item banks. Factor analyses identified 2 preliminary item banks, sleep disturbance and SRI. Item response theory analyses and expert content review narrowed the item banks to 27 and 16 items, respectively. Validity of the item banks was supported by moderate to high correlations with existing scales and by significant differences in sleep disturbance and SRI scores between participants with and without sleep disorders. CONCLUSIONS: The PROMIS sleep disturbance and SRI item banks have excellent measurement properties and may prove to be useful for assessing general aspects of sleep and SRI with various groups of patients and interventions.}, keywords = {*Outcome Assessment (Health Care), *Self Disclosure, Adult, Aged, Aged, 80 and over, Cross-Sectional Studies, Factor Analysis, Statistical, Female, Humans, Male, Middle Aged, Psychometrics, Questionnaires, Reproducibility of Results, Sleep Disorders/*diagnosis, Young Adult}, isbn = {0161-8105 (Print)0161-8105 (Linking)}, author = {Buysse, D. J. and Yu, L. and Moul, D. E. and Germain, A. and Stover, A. and Dodds, N. E. and Johnston, K. L. and Shablesky-Cade, M. A. and Pilkonis, P. A.} } @article {312, title = {Development of computerized adaptive testing (CAT) for the EORTC QLQ-C30 physical functioning dimension}, journal = {Quality of Life Research }, volume = {20}, number = {4}, year = {2010}, note = {Qual Life Res. 2010 Oct 23.}, pages = {479-490}, edition = {2010/10/26}, abstract = {PURPOSE: Computerized adaptive test (CAT) methods, based on item response theory (IRT), enable a patient-reported outcome instrument to be adapted to the individual patient while maintaining direct comparability of scores. The EORTC Quality of Life Group is developing a CAT version of the widely used EORTC QLQ-C30. We present the development and psychometric validation of the item pool for the first of the scales, physical functioning (PF). METHODS: Initial developments (including literature search and patient and expert evaluations) resulted in 56 candidate items. Responses to these items were collected from 1,176 patients with cancer from Denmark, France, Germany, Italy, Taiwan, and the United Kingdom. The items were evaluated with regard to psychometric properties. RESULTS: Evaluations showed that 31 of the items could be included in a unidimensional IRT model with acceptable fit and good content coverage, although the pool may lack items at the upper extreme (good PF). There were several findings of significant differential item functioning (DIF). However, the DIF findings appeared to have little impact on the PF estimation. CONCLUSIONS: We have established an item pool for CAT measurement of PF and believe that this CAT instrument will clearly improve the EORTC measurement of PF.}, isbn = {1573-2649 (Electronic)0962-9343 (Linking)}, author = {Petersen, M. A. and Groenvold, M. and Aaronson, N. K. and Chie, W. C. and Conroy, T. and Costantini, A. and Fayers, P. and Helbostad, J. and Holzner, B. and Kaasa, S. and Singer, S. and Velikova, G. and Young, T.} } @inbook {1954, title = {Developing item variants: An empirical study}, year = {2009}, note = {{PDF file, 194 KB}}, address = {D. J. Weiss (Ed.), Proceedings of the 2009 GMAC Conference on Computerized Adaptive Testing.}, abstract = {Large-scale standardized test have been widely used for educational and licensure testing. In computerized adaptive testing (CAT), one of the practical concerns for maintaining large-scale assessments is to ensure adequate numbers of high-quality items that are required for item pool functioning. Developing items at specific difficulty levels and for certain areas of test plans is a wellknown challenge. The purpose of this study was to investigate strategies for varying items that can effectively generate items at targeted difficulty levels and specific test plan areas. Each variant item generation model was developed by decomposing selected source items possessing ideal measurement properties and targeting the desirable content domains. 341 variant items were generated from 72 source items. Data were collected from six pretest periods. Items were calibrated using the Rasch model. Initial results indicate that variant items showed desirable measurement properties. Additionally, compared to an average of approximately 60\% of the items passing pretest criteria, an average of 84\% of the variant items passed the pretest criteria. }, author = {Wendt, A. and Kao, S. and Gorham, J. and Woo, A.} } @article {7, title = {Development and preliminary testing of a computerized adaptive assessment of chronic pain}, journal = {Journal of Pain}, volume = {10}, number = {9}, year = {2009}, note = {Anatchkova, Milena DSaris-Baglama, Renee NKosinski, MarkBjorner, Jakob B1R43AR052251-01A1/AR/NIAMS NIH HHS/United StatesEvaluation StudiesResearch Support, N.I.H., ExtramuralUnited StatesThe journal of pain : official journal of the American Pain SocietyJ Pain. 2009 Sep;10(9):932-43.}, month = {Sep}, pages = {932-943}, edition = {2009/07/15}, abstract = {The aim of this article is to report the development and preliminary testing of a prototype computerized adaptive test of chronic pain (CHRONIC PAIN-CAT) conducted in 2 stages: (1) evaluation of various item selection and stopping rules through real data-simulated administrations of CHRONIC PAIN-CAT; (2) a feasibility study of the actual prototype CHRONIC PAIN-CAT assessment system conducted in a pilot sample. Item calibrations developed from a US general population sample (N = 782) were used to program a pain severity and impact item bank (kappa = 45), and real data simulations were conducted to determine a CAT stopping rule. The CHRONIC PAIN-CAT was programmed on a tablet PC using QualityMetric{\textquoteright}s Dynamic Health Assessment (DYHNA) software and administered to a clinical sample of pain sufferers (n = 100). The CAT was completed in significantly less time than the static (full item bank) assessment (P < .001). On average, 5.6 items were dynamically administered by CAT to achieve a precise score. Scores estimated from the 2 assessments were highly correlated (r = .89), and both assessments discriminated across pain severity levels (P < .001, RV = .95). Patients{\textquoteright} evaluations of the CHRONIC PAIN-CAT were favorable. PERSPECTIVE: This report demonstrates that the CHRONIC PAIN-CAT is feasible for administration in a clinic. The application has the potential to improve pain assessment and help clinicians manage chronic pain.}, keywords = {*Computers, *Questionnaires, Activities of Daily Living, Adaptation, Psychological, Chronic Disease, Cohort Studies, Disability Evaluation, Female, Humans, Male, Middle Aged, Models, Psychological, Outcome Assessment (Health Care), Pain Measurement/*methods, Pain, Intractable/*diagnosis/psychology, Psychometrics, Quality of Life, User-Computer Interface}, isbn = {1528-8447 (Electronic)1526-5900 (Linking)}, author = {Anatchkova, M. D. and Saris-Baglama, R. N. and Kosinski, M. and Bjorner, J. B.} } @article {138, title = {Development of an item bank for the assessment of depression in persons with mental illnesses and physical diseases using Rasch analysis}, journal = {Rehabilitation Psychology}, volume = {54}, number = {2}, year = {2009}, note = {Forkmann, ThomasBoecker, MarenNorra, ChristineEberle, NicoleKircher, TiloSchauerte, PatrickMischke, KarlWesthofen, MartinGauggel, SiegfriedWirtz, MarkusResearch Support, Non-U.S. Gov{\textquoteright}tUnited StatesRehabilitation psychologyRehabil Psychol. 2009 May;54(2):186-97.}, month = {May}, pages = {186-97}, edition = {2009/05/28}, abstract = {OBJECTIVE: The calibration of item banks provides the basis for computerized adaptive testing that ensures high diagnostic precision and minimizes participants{\textquoteright} test burden. The present study aimed at developing a new item bank that allows for assessing depression in persons with mental and persons with somatic diseases. METHOD: The sample consisted of 161 participants treated for a depressive syndrome, and 206 participants with somatic illnesses (103 cardiologic, 103 otorhinolaryngologic; overall mean age = 44.1 years, SD =14.0; 44.7\% women) to allow for validation of the item bank in both groups. Persons answered a pool of 182 depression items on a 5-point Likert scale. RESULTS: Evaluation of Rasch model fit (infit < 1.3), differential item functioning, dimensionality, local independence, item spread, item and person separation (>2.0), and reliability (>.80) resulted in a bank of 79 items with good psychometric properties. CONCLUSIONS: The bank provides items with a wide range of content coverage and may serve as a sound basis for computerized adaptive testing applications. It might also be useful for researchers who wish to develop new fixed-length scales for the assessment of depression in specific rehabilitation settings.}, keywords = {Adaptation, Psychological, Adult, Aged, Depressive Disorder/*diagnosis/psychology, Diagnosis, Computer-Assisted, Female, Heart Diseases/*psychology, Humans, Male, Mental Disorders/*psychology, Middle Aged, Models, Statistical, Otorhinolaryngologic Diseases/*psychology, Personality Assessment/statistics \& numerical data, Personality Inventory/*statistics \& numerical data, Psychometrics/statistics \& numerical data, Questionnaires, Reproducibility of Results, Sick Role}, isbn = {0090-5550 (Print)0090-5550 (Linking)}, author = {Forkmann, T. and Boecker, M. and Norra, C. and Eberle, N. and Kircher, T. and Schauerte, P. and Mischke, K. and Westhofen, M. and Gauggel, S. and Wirtz, M.} } @article {2269, title = {Diagnostic classification models and multidimensional adaptive testing: A commentary on Rupp and Templin.}, journal = {Measurement: Interdisciplinary Research and Perspectives}, volume = {7}, year = {2009}, pages = {58-61}, author = {Frey, A. and Carstensen, C. H.} } @article {2183, title = {Direct and Inverse Problems of Item Pool Design for Computerized Adaptive Testing}, journal = {Educational and Psychological Measurement}, volume = {69}, number = {4}, year = {2009}, pages = {533-547}, abstract = {

The recent literature on computerized adaptive testing (CAT) has developed methods for creating CAT item pools from a large master pool. Each CAT pool is designed as a set of nonoverlapping forms reflecting the skill levels of an assumed population of test takers. This article presents a Monte Carlo method to obtain these CAT pools and discusses its advantages over existing methods. Also, a new problem is considered that finds a population ability density function best matching the master pool. An analysis of the solution to this new problem provides testing organizations with effective guidance for maintaining their master pools. Computer experiments with a pool of Law School Admission Test items and its assembly constraints are presented.

}, doi = {10.1177/0013164409332224}, url = {http://epm.sagepub.com/content/69/4/533.abstract}, author = {Belov, Dmitry I. and Armstrong, Ronald D.} } @article {458, title = { Direct and inverse problems of item pool design for computerized adaptive testing}, journal = {Educational and Psychological Measurement}, volume = {69}, year = {2009}, pages = {533-547}, author = {Belov, D. I. and Armstrong, R. D.} } @conference {101, title = {Developing a progressive approach to using the GAIN in order to reduce the duration and cost of assessment with the GAIN short screener, Quick and computer adaptive testing}, booktitle = {Joint Meeting on Adolescent Treatment Effectiveness }, year = {2008}, note = {ProCite field[6]: Paper presented at the}, month = {2008}, address = {Washington D.C., USA}, author = {Dennis, M. L. and Funk, R. and Titus, J. and Riley, B. B. and Hosman, S. and Kinne, S.} } @article {307, title = {The D-optimality item selection criterion in the early stage of CAT: A study with the graded response model}, journal = {Journal of Educational and Behavioral Statistics}, volume = {33}, number = {1}, year = {2008}, pages = {88-110}, abstract = {During the early stage of computerized adaptive testing (CAT), item selection criteria based on Fisher{\textquoteright}s information often produce less stable latent trait estimates than the Kullback-Leibler global information criterion. Robustness against early stage instability has been reported for the D-optimality criterion in a polytomous CAT with the Nominal Response Model and is shown herein to be reproducible for the Graded Response Model. For comparative purposes, the A-optimality and the global information criteria are also applied. Their item selection is investigated as a function of test progression and item bank composition. The results indicate how the selection of specific item parameters underlies the criteria performances evaluated via accuracy and precision of estimation. In addition, the criteria item exposure rates are compared, without the use of any exposure controlling measure. On the account of stability, precision, accuracy, numerical simplicity, and less evidently, item exposure rate, the D-optimality criterion can be recommended for CAT.}, keywords = {computerized adaptive testing, D optimality, item selection}, author = {Passos, V. L. and Berger, M. P. F. and Tan, F. E. S.} } @article {730, title = {The design and evaluation of a computerized adaptive test on mobile devices}, journal = {Computers \& Education}, volume = {49.}, year = {2007}, author = {Triantafillou, E. and Georgiadou, E. and Economides, A. A.} } @inbook {1877, title = {The design of p-optimal item banks for computerized adaptive tests}, year = {2007}, address = {D. J. Weiss (Ed.), Proceedings of the 2007 GMAC Conference on Computerized Adaptive Testing. {PDF file, 211 KB}.}, author = {Reckase, M. D.} } @inbook {1783, title = {Designing optimal item pools for computerized adaptive tests with Sympson-Hetter exposure control}, year = {2007}, note = {3 MB}}, address = {D. J. Weiss (Ed.), Proceedings of the 2007 GMAC Conference on Computerized Adaptive Testing}, author = {Gu, L. and Reckase, M. D.} } @inbook {1867, title = {Designing templates based on a taxonomy of innovative items}, year = {2007}, note = {{PDF file, 149 KB}}, address = {D. J. Weiss (Ed.). Proceedings of the 2007 GMAC Conference on Computerized Adaptive Testing.}, author = {Parshall, C. G. and Harmes, J. C.} } @article {2199, title = {Detecting Differential Speededness in Multistage Testing}, journal = {Journal of Educational Measurement}, volume = {44}, number = {2}, year = {2007}, pages = {117{\textendash}130}, abstract = {

A potential undesirable effect of multistage testing is differential speededness, which happens if some of the test takers run out of time because they receive subtests with items that are more time intensive than others. This article shows how a probabilistic response-time model can be used for estimating differences in time intensities and speed between subtests and test takers and detecting differential speededness. An empirical data set for a multistage test in the computerized CPA Exam was used to demonstrate the procedures. Although the more difficult subtests appeared to have items that were more time intensive than the easier subtests, an analysis of the residual response times did not reveal any significant differential speededness because the time limit appeared to be appropriate. In a separate analysis, within each of the subtests, we found minor but consistent patterns of residual times that are believed to be due to a warm-up effect, that is, use of more time on the initial items than they actually need.

}, issn = {1745-3984}, doi = {10.1111/j.1745-3984.2007.00030.x}, url = {http://dx.doi.org/10.1111/j.1745-3984.2007.00030.x}, author = {van der Linden, Wim J. and Breithaupt, Krista and Chuah, Siang Chee and Zhang, Yanwei} } @article {29, title = {Developing tailored instruments: item banking and computerized adaptive assessment}, journal = {Quality of Life Research}, volume = {16}, number = {Suppl 1}, year = {2007}, note = {Bjorner, Jakob BueChang, Chih-HungThissen, DavidReeve, Bryce B1R43NS047763-01/NS/United States NINDSAG015815/AG/United States NIAResearch Support, N.I.H., ExtramuralNetherlandsQuality of life research : an international journal of quality of life aspects of treatment, care and rehabilitationQual Life Res. 2007;16 Suppl 1:95-108. Epub 2007 Feb 15.}, pages = {95-108}, edition = {2007/05/29}, abstract = {Item banks and Computerized Adaptive Testing (CAT) have the potential to greatly improve the assessment of health outcomes. This review describes the unique features of item banks and CAT and discusses how to develop item banks. In CAT, a computer selects the items from an item bank that are most relevant for and informative about the particular respondent; thus optimizing test relevance and precision. Item response theory (IRT) provides the foundation for selecting the items that are most informative for the particular respondent and for scoring responses on a common metric. The development of an item bank is a multi-stage process that requires a clear definition of the construct to be measured, good items, a careful psychometric analysis of the items, and a clear specification of the final CAT. The psychometric analysis needs to evaluate the assumptions of the IRT model such as unidimensionality and local independence; that the items function the same way in different subgroups of the population; and that there is an adequate fit between the data and the chosen item response models. Also, interpretation guidelines need to be established to help the clinical application of the assessment. Although medical research can draw upon expertise from educational testing in the development of item banks and CAT, the medical field also encounters unique opportunities and challenges.}, keywords = {*Health Status, *Health Status Indicators, *Mental Health, *Outcome Assessment (Health Care), *Quality of Life, *Questionnaires, *Software, Algorithms, Factor Analysis, Statistical, Humans, Models, Statistical, Psychometrics}, isbn = {0962-9343 (Print)}, author = {Bjorner, J. B. and Chang, C-H. and Thissen, D. and Reeve, B. B.} } @article {782, title = {Development and evaluation of a computer adaptive test for {\textquotedblleft}Anxiety{\textquotedblright} (Anxiety-CAT)}, journal = {Quality of Life Research}, volume = {16}, year = {2007}, pages = {143-155}, author = {Walter, O. B. and Becker, J. and Bjorner, J. B. and Fliege, H. and Klapp, B. F. and Rose, M.} } @inbook {1763, title = {The development of a computerized adaptive test for integrity}, year = {2007}, note = {{PDf file, 290 KB}}, address = {D. J. Weiss (Ed.), Proceedings of the 2007 GMAC Conference on Computerized Adaptive Testing.}, author = {Egberink, I. J. L. and Veldkamp, B. P.} } @inbook {1898, title = {Development of a multiple-component CAT for measuring foreign language proficiency (SIMTEST)}, year = {2007}, note = {{PDF file, 258 KB}}, address = {D. J. Weiss (Ed.). Proceedings of the 2007 GMAC Conference on Computerized Adaptive Testing.}, author = {Sumbling, M. and Sanz, P. and Viladrich, M. C. and Doval, E. and Riera, L.} } @inbook {1752, title = {Designing computerized adaptive tests}, year = {2006}, address = {S.M. Downing and T. M. Haladyna (Eds.), Handbook of test development. New Jersey: Lawrence Erlbaum Associates.}, author = {Davey, T. and Pitoniak, M. J.} } @article {121, title = {Data pooling and analysis to build a preliminary item bank: an example using bowel function in prostate cancer}, journal = {Evaluation and the Health Professions}, volume = {28}, number = {2}, year = {2005}, note = {0163-2787 (Print)Journal Article}, pages = {142-59}, abstract = {Assessing bowel function (BF) in prostate cancer can help determine therapeutic trade-offs. We determined the components of BF commonly assessed in prostate cancer studies as an initial step in creating an item bank for clinical and research application. We analyzed six archived data sets representing 4,246 men with prostate cancer. Thirty-one items from validated instruments were available for analysis. Items were classified into domains (diarrhea, rectal urgency, pain, bleeding, bother/distress, and other) then subjected to conventional psychometric and item response theory (IRT) analyses. Items fit the IRT model if the ratio between observed and expected item variance was between 0.60 and 1.40. Four of 31 items had inadequate fit in at least one analysis. Poorly fitting items included bleeding (2), rectal urgency (1), and bother/distress (1). A fifth item assessing hemorrhoids was poorly correlated with other items. Our analyses supported four related components of BF: diarrhea, rectal urgency, pain, and bother/distress.}, keywords = {*Quality of Life, *Questionnaires, Adult, Aged, Data Collection/methods, Humans, Intestine, Large/*physiopathology, Male, Middle Aged, Prostatic Neoplasms/*physiopathology, Psychometrics, Research Support, Non-U.S. Gov{\textquoteright}t, Statistics, Nonparametric}, author = {Eton, D. T. and Lai, J. S. and Cella, D. and Reeve, B. B. and Talcott, J. A. and Clark, J. A. and McPherson, C. P. and Litwin, M. S. and Moinpour, C. M.} } @article {571, title = {Design and evaluation of an XML-based platform-independent computerized adaptive testing system}, journal = {IEEE Transactions on Education}, volume = {48(2)}, year = {2005}, pages = {230-237}, author = {Ho, R.-G., and Yen, Y.-C.} } @article {546, title = {Development of a computer-adaptive test for depression (D-CAT)}, journal = {Quality of Life Research}, volume = {14}, year = {2005}, pages = {2277{\textendash}2291}, author = {Fliege, H. and Becker, J. and Walter, O. B. and Bjorner, J. B. and Klapp, B. F. and Rose, M.} } @inbook {1773, title = {The development of the adaptive item language assessment (AILA) for mixed-ability students}, year = {2005}, address = {Proceedings E-Learn 2005 World Conference on E-Learning in Corporate, Government, Healthcare, and Higher Education, 643-650, Vancouver, Canada, AACE, October 2005.}, author = {Giouroglou, H. and Economides, A. A.} } @article {85, title = {Dynamic assessment of health outcomes: Time to let the CAT out of the bag?}, journal = {Health Services Research}, volume = {40}, number = {5, part2}, year = {2005}, pages = {1694-1711}, publisher = {Blackwell Publishing: United Kingdom}, abstract = {Background: The use of item response theory (IRT) to measure self-reported outcomes has burgeoned in recent years. Perhaps the most important application of IRT is computer-adaptive testing (CAT), a measurement approach in which the selection of items is tailored for each respondent. Objective. To provide an introduction to the use of CAT in the measurement of health outcomes, describe several IRT models that can be used as the basis of CAT, and discuss practical issues associated with the use of adaptive scaling in research settings. Principal Points: The development of a CAT requires several steps that are not required in the development of a traditional measure including identification of "starting" and "stopping" rules. CAT{\textquoteright}s most attractive advantage is its efficiency. Greater measurement precision can be achieved with fewer items. Disadvantages of CAT include the high cost and level of technical expertise required to develop a CAT. Conclusions: Researchers, clinicians, and patients benefit from the availability of psychometrically rigorous measures that are not burdensome. CAT outcome measures hold substantial promise in this regard, but their development is not without challenges. (PsycINFO Database Record (c) 2007 APA, all rights reserved)}, keywords = {computer adaptive testing, Item Response Theory, self reported health outcomes}, isbn = {0017-9124 (Print); 1475-6773 (Electronic)}, author = {Cook, K. F. and O{\textquoteright}Malley, K. J. and Roddey, T. S.} } @conference {941, title = {Detecting exposed test items in computer-based testing}, booktitle = {Paper presented at the annual meeting of the National Council on Measurement in Education}, year = {2004}, note = {{PDF file, 1.245 MB}}, address = {San Diego CA}, author = {Han, N. and Hambleton, R. K.} } @conference {851, title = {Developing tailored instruments: Item banking and computerized adaptive assessment}, booktitle = {Paper presented at the conference {\textquotedblleft}Advances in Health Outcomes Measurement: Exploring the Current State and the Future of Item Response Theory}, year = {2004}, note = {{PDF file, 406 KB}}, address = {Item Banks, and Computer-Adaptive Testing,{\textquotedblright} Bethesda MD}, author = {Bjorner, J. B.} } @conference {868, title = {Developing tailored instruments: Item banking and computerized adaptive assessment}, booktitle = {Paper presented at the conference {\textquotedblleft}Advances in Health Outcomes Measurement: Exploring the Current State and the Future of Item Response Theory}, year = {2004}, note = {{PDF file, 181 KB}}, address = {Item Banks, and Computer-Adaptive Testing,{\textquotedblright} Bethesda MD}, author = {Chang, C-H.} } @article {254, title = {The development and evaluation of a software prototype for computer-adaptive testing}, journal = {Computers and Education}, volume = {43}, number = {1-2}, year = {2004}, pages = {109-123}, keywords = {computerized adaptive testing}, author = {Lilley, M and Barker, T and Britton, C} } @article {34, title = {Developing an initial physical function item bank from existing sources}, journal = {Journal of Applied Measurement}, volume = {4}, number = {2}, year = {2003}, note = {1529-7713Journal Article}, pages = {124-36}, abstract = {The objective of this article is to illustrate incremental item banking using health-related quality of life data collected from two samples of patients receiving cancer treatment. The kinds of decisions one faces in establishing an item bank for computerized adaptive testing are also illustrated. Pre-calibration procedures include: identifying common items across databases; creating a new database with data from each pool; reverse-scoring "negative" items; identifying rating scales used in items; identifying pivot points in each rating scale; pivot anchoring items at comparable rating scale categories; and identifying items in each instrument that measure the construct of interest. A series of calibrations were conducted in which a small proportion of new items were added to the common core and misfitting items were identified and deleted until an initial item bank has been developed.}, keywords = {*Databases, *Sickness Impact Profile, Adaptation, Psychological, Data Collection, Humans, Neoplasms/*physiopathology/psychology/therapy, Psychometrics, Quality of Life/*psychology, Research Support, U.S. Gov{\textquoteright}t, P.H.S., United States}, author = {Bode, R. K. and Cella, D. and Lai, J. S. and Heinemann, A. W.} } @article {517, title = {Development and psychometric evaluation of the Flexilevel Scale of Shoulder Function (FLEX-SF)}, journal = {Medical Care (in press)}, year = {2003}, note = {$\#$CO03-01}, author = {Cook, K. F. and Roddey, T. S. and Gartsman, G M and Olson, S L} } @conference {902, title = {Development of the Learning Potential Computerised Adaptive Test (LPCAT)}, booktitle = {Unpublished manuscript. }, year = {2003}, note = {{PDF file, 563 KB}}, author = {De Beer, M.} } @article {359, title = {Development, reliability, and validity of a computerized adaptive version of the Schedule for Nonadaptive and Adaptive Personality}, journal = {Dissertation Abstracts International: Section B: The Sciences \& Engineering}, volume = {63}, number = {7-B}, year = {2003}, pages = {3485}, abstract = {Computerized adaptive testing (CAT) and Item Response Theory (IRT) techniques were applied to the Schedule for Nonadaptive and Adaptive Personality (SNAP) to create a more efficient measure with little or no cost to test reliability or validity. The SNAP includes 15 factor analytically derived and relatively unidimensional traits relevant to personality disorder. IRT item parameters were calibrated on item responses from a sample of 3,995 participants who completed the traditional paper-and-pencil (P\&P) SNAP in a variety of university, community, and patient settings. Computerized simulations were conducted to test various adaptive testing algorithms, and the results informed the construction of the CAT version of the SNAP (SNAP-CAT). A validation study of the SNAP-CAT was conducted on a sample of 413 undergraduates who completed the SNAP twice, separated by one week. Participants were randomly assigned to one of four groups who completed (1) a modified P\&P version of the SNAP (SNAP-PP) twice (n = 106), (2) the SNAP-PP first and the SNAP-CAT second (n = 105), (3) the SNAP-CAT first and the SNAP-PP second (n = 102), and (4) the SNAP-CAT twice (n = 100). Results indicated that the SNAP-CAT was 58\% and 60\% faster than the traditional P\&P version, at Times 1 and 2, respectively, and mean item savings across scales were 36\% and 37\%, respectively. These savings came with minimal cost to reliability or validity, and the two test forms were largely equivalent. Descriptive statistics, rank-ordering of scores, internal factor structure, and convergent/discriminant validity were highly comparable across testing modes and methods of scoring, and very few differences between forms replicated across testing sessions. In addition, participants overwhelmingly preferred the computerized version to the P\&P version. However, several specific problems were identified for the Self-harm and Propriety scales of the SNAP-CAT that appeared to be broadly related to IRT calibration difficulties. Reasons for these anomalous findings are discussed, and follow-up studies are suggested. Despite these specific problems, the SNAP-CAT appears to be a viable alternative to the traditional P\&P SNAP. (PsycINFO Database Record (c) 2003 APA, all rights reserved).}, author = {Simms, L. J.} } @article {14, title = {Data sparseness and on-line pretest item calibration-scaling methods in CAT}, journal = {Journal of Educational Measurement}, volume = {39}, number = {3}, year = {2002}, pages = {207-218}, abstract = {Compared and evaluated 3 on-line pretest item calibration-scaling methods (the marginal maximum likelihood estimate with 1 expectation maximization [EM] cycle [OEM] method, the marginal maximum likelihood estimate with multiple EM cycles [MEM] method, and M. L. Stocking{\textquoteright}s Method B) in terms of item parameter recovery when the item responses to the pretest items in the pool are sparse. Simulations of computerized adaptive tests were used to evaluate the results yielded by the three methods. The MEM method produced the smallest average total error in parameter estimation, and the OEM method yielded the largest total error (PsycINFO Database Record (c) 2005 APA )}, keywords = {Computer Assisted Testing, Educational Measurement, Item Response Theory, Maximum Likelihood, Methodology, Scaling (Testing), Statistical Data}, author = {Ban, J-C. and Hanson, B. A. and Yi, Q. and Harris, D. J.} } @article {411, title = {Detection of person misfit in computerized adaptive tests with polytomous items}, journal = {Applied Psychological Measurement}, volume = {26}, number = {2}, year = {2002}, pages = {164-180}, abstract = {Item scores that do not fit an assumed item response theory model may cause the latent trait value to be inaccurately estimated. For a computerized adaptive test (CAT) using dichotomous items, several person-fit statistics for detecting mis.tting item score patterns have been proposed. Both for paper-and-pencil (P\&P) tests and CATs, detection ofperson mis.t with polytomous items is hardly explored. In this study, the nominal and empirical null distributions ofthe standardized log-likelihood statistic for polytomous items are compared both for P\&P tests and CATs. Results showed that the empirical distribution of this statistic differed from the assumed standard normal distribution for both P\&P tests and CATs. Second, a new person-fit statistic based on the cumulative sum (CUSUM) procedure from statistical process control was proposed. By means ofsimulated data, critical values were determined that can be used to classify a pattern as fitting or misfitting. The effectiveness of the CUSUM to detect simulees with item preknowledge was investigated. Detection rates using the CUSUM were high for realistic numbers ofdisclosed items. }, author = {van Krimpen-Stoop, E. M. L. A. and Meijer, R. R.} } @conference {1197, title = {Developing tailored instruments: Item banking and computerized adaptive assessment}, booktitle = {Paper presented at the conference {\textquotedblleft}Advances in Health Outcomes Measurement}, year = {2002}, note = {{PDF file, 170 KB}}, address = {{\textquotedblright} Bethesda, Maryland, June 23-25}, author = {Thissen, D.} } @conference {1016, title = {The development and evaluation of a computer-adaptive testing application for English language}, booktitle = {Paper presented at the 2002 Computer-Assisted Testing Conference}, year = {2002}, note = {{PDF file, 308 KB}}, address = {United Kingdom}, author = {Lilley, M and Barker, T} } @article {187, title = {Development of an index of physical functional health status in rehabilitation}, journal = {Archives of Physical Medicine and Rehabilitation}, volume = {83}, number = {5}, year = {2002}, note = {0003-9993 (Print)Journal Article}, month = {May}, pages = {655-65}, abstract = {OBJECTIVE: To describe (1) the development of an index of physical functional health status (FHS) and (2) its hierarchical structure, unidimensionality, reproducibility of item calibrations, and practical application. DESIGN: Rasch analysis of existing data sets. SETTING: A total of 715 acute, orthopedic outpatient centers and 62 long-term care facilities in 41 states participating with Focus On Therapeutic Outcomes, Inc. PATIENTS: A convenience sample of 92,343 patients (40\% male; mean age +/- standard deviation [SD], 48+/-17y; range, 14-99y) seeking rehabilitation between 1993 and 1999. INTERVENTIONS: Not applicable. MAIN OUTCOME MEASURES: Patients completed self-report health status surveys at admission and discharge. The Medical Outcomes Study 36-Item Short-Form Health Survey{\textquoteright}s physical functioning scale (PF-10) is the foundation of the physical FHS. The Oswestry Low Back Pain Disability Questionnaire, Neck Disability Index, Lysholm Knee Questionnaire, items pertinent to patients with upper-extremity impairments, and items pertinent to patients with more involved neuromusculoskeletal impairments were cocalibrated into the PF-10. RESULTS: The final FHS item bank contained 36 items (patient separation, 2.3; root mean square measurement error, 5.9; mean square +/- SD infit, 0.9+/-0.5; outfit, 0.9+/-0.9). Analyses supported empirical item hierarchy, unidimensionality, reproducibility of item calibrations, and content and construct validity of the FHS-36. CONCLUSIONS: Results support the reliability and validity of FHS-36 measures in the present sample. Analyses show the potential for a dynamic, computer-controlled, adaptive survey for FHS assessment applicable for group analysis and clinical decision making for individual patients.}, keywords = {*Health Status Indicators, *Rehabilitation Centers, Adolescent, Adult, Aged, Aged, 80 and over, Female, Health Surveys, Humans, Male, Middle Aged, Musculoskeletal Diseases/*physiopathology/*rehabilitation, Nervous System Diseases/*physiopathology/*rehabilitation, Physical Fitness/*physiology, Recovery of Function/physiology, Reproducibility of Results, Retrospective Studies}, author = {Hart, D. L. and Wright, B. D.} } @conference {1053, title = {The Development of STAR Early Literacy}, booktitle = {Presentation to the 32rd Annual National Conference on Large-Scale Assessment.}, year = {2002}, address = {Desert Springs CA}, author = {J. R. McBride} } @mastersthesis {1978, title = {DEVELOPMENT, RELIABILITY, AND VALIDITY OF A COMPUTERIZED ADAPTIVE VERSION OF THE SCHEDULE FOR NONADAPTIVE AND ADAPTIVE PERSONALITY}, year = {2002}, address = {Unpublished Ph. D. dissertation, University of Iowa, Iowa City Iowa}, author = {Simms, L. J.} } @conference {835, title = {Data sparseness and online pretest calibration/scaling methods in CAT}, booktitle = {Paper presented at the annual meeting of the American Educational Research Association}, year = {2001}, note = {Also ACT Research Report 2002-1)}, address = {Seattle}, author = {Ban, J and Hanson, B. A. and Yi, Q. and Harris, D.} } @conference {930, title = {Deriving a stopping rule for sequential adaptive tests}, booktitle = {Paper presented at the annual meeting of the American Educational Research Association}, year = {2001}, note = {{PDF file, 111 KB}}, address = {Seattle WA}, author = {Grabovsky, I. and Chang, Hua-Hua and Ying, Z.} } @booklet {1630, title = {Detection of misfitting item-score patterns in computerized adaptive testing}, year = {2001}, note = {$\#$ST01-01 V.}, address = {Enschede, The Netherlands: Febodruk B}, author = {Stoop, E. M. L. A.} } @book {1695, title = {Development and evaluation of test assembly procedures for computerized adaptive testing}, year = {2001}, address = {Unpublished doctoral dissertation, University of Massachusetts, Amherst}, author = {Robin, F.} } @article {720, title = {Development of an adaptive multimedia program to collect patient health data}, journal = {American Journal of Preventative Medicine}, volume = {21}, year = {2001}, pages = {320-324}, author = {Sutherland, L. A. and Campbell, M. and Ornstein, K. and Wildemuth, B. and Lobach, D.} } @booklet {1598, title = {The Development of STAR Early Literacy: A report of the School Renaissance Institute.}, year = {2001}, address = {Madison, WI: Author.}, author = {School-Renaissance-Institute} } @article {358, title = {Developments in measurement of persons and items by means of item response models}, journal = {Behaviormetrika}, volume = {28}, number = {1}, year = {2001}, pages = {65-94}, abstract = {This paper starts with a general introduction into measurement of hypothetical constructs typical of the social and behavioral sciences. After the stages ranging from theory through operationalization and item domain to preliminary test or questionnaire have been treated, the general assumptions of item response theory are discussed. The family of parametric item response models for dichotomous items is introduced and it is explained how parameters for respondents and items are estimated from the scores collected from a sample of respondents who took the test or questionnaire. Next, the family of nonparametric item response models is explained, followed by the 3 classes of item response models for polytomous item scores (e.g., rating scale scores). Then, to what degree the mean item score and the unweighted sum of item scores for persons are useful for measuring items and persons in the context of item response theory is discussed. Methods for fitting parametric and nonparametric models to data are briefly discussed. Finally, the main applications of item response models are discussed, which include equating and item banking, computerized and adaptive testing, research into differential item functioning, person fit research, and cognitive modeling. (PsycINFO Database Record (c) 2005 APA )}, keywords = {Cognitive, Computer Assisted Testing, Item Response Theory, Models, Nonparametric Statistical Tests, Processes}, author = {Sijtsma, K.} } @article {315, title = {Differences between self-adapted and computerized adaptive tests: A meta-analysis}, journal = {Journal of Educational Measurement}, volume = {38}, number = {3}, year = {2001}, pages = {235-247}, abstract = {Self-adapted testing has been described as a variation of computerized adaptive testing that reduces test anxiety and thereby enhances test performance. The purpose of this study was to gain a better understanding of these proposed effects of self-adapted tests (SATs); meta-analysis procedures were used to estimate differences between SATs and computerized adaptive tests (CATs) in proficiency estimates and post-test anxiety levels across studies in which these two types of tests have been compared. After controlling for measurement error the results showed that SATs yielded proficiency estimates that were 0.12 standard deviation units higher and post-test anxiety levels that were 0.19 standard deviation units lower than those yielded by CATs. The authors speculate about possible reasons for these differences and discuss advantages and disadvantages of using SATs in operational settings. (PsycINFO Database Record (c) 2005 APA )}, keywords = {Adaptive Testing, Computer Assisted Testing, Scores computerized adaptive testing, Test, Test Anxiety}, author = {Pitkin, A. K. and Vispoel, W. P.} } @inbook {416, title = {Designing item pools for computerized adaptive testing}, booktitle = {Computerized adaptive testing: Theory and practice}, year = {2000}, pages = {149{\textendash}162}, publisher = {Kluwer Academic Publishers}, organization = {Kluwer Academic Publishers}, address = {Dendrecht, The Netherlands}, author = {Veldkamp, B. P. and van der Linden, W. J.} } @inbook {1814, title = {Detecting person misfit in adaptive testing using statistical process control techniques}, year = {2000}, address = {W. J. van der Linden, and C. A. W. Glas (Editors). Computerized Adaptive Testing: Theory and Practice. Norwell MA: Kluwer.}, author = {van Krimpen-Stoop, E. M. L. A. and Meijer, R. R.} } @inbook {410, title = {Detecting person misfit in adaptive testing using statistical process control techniques}, booktitle = {Computer adaptive testing: Theory and practice}, year = {2000}, pages = {201-219}, publisher = {Kluwer Academic.}, organization = {Kluwer Academic.}, address = {Dordrecht, The Netherlands}, keywords = {person Fit}, author = {van Krimpen-Stoop, E. M. L. A. and Meijer, R. R.} } @conference {1091, title = {Detecting test-takers who have memorized items in computerized-adaptive testing and muti-stage testing: A comparison}, booktitle = {Paper presented at the annual meeting of the National Council on Measurement in Education}, year = {2000}, address = {New Orleans LA}, author = {Patsula, L N. and McLeod, L. D.} } @article {755, title = {Detection of known items in adaptive testing with a statistical quality control method}, journal = {Journal of Educational and Behavioral Statistics}, volume = {25}, year = {2000}, pages = {373-389}, author = {Veerkamp, W. J. J. and Glas, C. E. W.} } @booklet {1543, title = {Detection of person misfit in computerized adaptive testing with polytomous items (Research Report 00-01)}, year = {2000}, address = {Enschede, The Netherlands: University of Twente, Faculty of Educational Science and Technology, Department of Measurement and Data Analysis}, author = {van Krimpen-Stoop, E. M. L. A. and Meijer, R. R.} } @booklet {1511, title = {Development and evaluation of test assembly procedures for computerized adaptive testing (Laboratory of Psychometric and Evaluative Methods Research Report No 391)}, year = {2000}, address = {Amherst MA: University of Massachusetts, School of Education.}, author = {Robin, F.} } @article {378, title = {The development of a computerized version of Vandenberg{\textquoteright}s mental rotation test and the effect of visuo-spatial working memory loading}, journal = {Dissertation Abstracts International Section A: Humanities and Social Sciences}, volume = {60}, number = {11-A}, year = {2000}, pages = {3938}, abstract = {This dissertation focused on the generation and evaluation of web-based versions of Vandenberg{\textquoteright}s Mental Rotation Test. Memory and spatial visualization theory were explored in relation to the addition of a visuo-spatial working memory component. Analysis of the data determined that there was a significant difference between scores on the MRT Computer and MRT Memory test. The addition of a visuo-spatial working memory component did significantly affect results at the .05 alpha level. Reliability and discrimination estimates were higher on the MRT Memory version. The computerization of the paper and pencil version on the MRT did not significantly effect scores but did effect the time required to complete the test. The population utilized in the quasi-experiment consisted of 107 university students from eight institutions in engineering graphics related courses. The subjects completed two researcher developed, Web-based versions of Vandenberg{\textquoteright}s Mental Rotation Test and the original paper and pencil version of the Mental Rotation Test. One version of the test included a visuo-spatial working memory loading. Significant contributions of this study included developing and evaluating computerized versions of Vandenberg{\textquoteright}s Mental Rotation Test. Previous versions of Vandenberg{\textquoteright}s Mental Rotation Test did not take advantage of the ability of the computer to incorporate an interaction factor, such as a visuo-spatial working memory loading, into the test. The addition of an interaction factor results in a more discriminate test which will lend itself well to computerized adaptive testing practices. Educators in engineering graphics related disciplines should strongly consider the use of spatial visualization tests to aid in establishing the effects of modern computer systems on fundamental design/drafting skills. Regular testing of spatial visualization skills will result assist in the creation of a more relevant curriculum. Computerized tests which are valid and reliable will assist in making this task feasible. (PsycINFO Database Record (c) 2005 APA )}, keywords = {Computer Assisted Testing, Mental Rotation, Short Term Memory computerized adaptive testing, Test Construction, Test Validity, Visuospatial Memory}, author = {Strong, S. D.} } @article {351, title = {Diagnostische programme in der Demenzfr{\"u}herkennung: Der Adaptive Figurenfolgen-Lerntest (ADAFI) [Diagnostic programs in the early detection of dementia: The Adaptive Figure Series Learning Test (ADAFI)]}, journal = {Zeitschrift f{\"u}r Gerontopsychologie \& -Psychiatrie}, volume = {13}, number = {1}, year = {2000}, pages = {16-29}, abstract = {Zusammenfassung: Untersucht wurde die Eignung des computergest{\"u}tzten Adaptiven Figurenfolgen-Lerntests (ADAFI), zwischen gesunden {\"a}lteren Menschen und {\"a}lteren Menschen mit erh{\"o}htem Demenzrisiko zu differenzieren. Der im ADAFI vorgelegte Aufgabentyp der fluiden Intelligenzdimension (logisches Auff{\"u}llen von Figurenfolgen) hat sich in mehreren Studien zur Erfassung des intellektuellen Leistungspotentials (kognitive Plastizit{\"a}t) {\"a}lterer Menschen als g{\"u}nstig f{\"u}r die genannte Differenzierung erwiesen. Aufgrund seiner Konzeption als Diagnostisches Programm f{\"a}ngt der ADAFI allerdings einige Kritikpunkte an Vorgehensweisen in diesen bisherigen Arbeiten auf. Es konnte gezeigt werden, a) da{\ss} mit dem ADAFI deutliche Lokationsunterschiede zwischen den beiden Gruppen darstellbar sind, b) da{\ss} mit diesem Verfahren eine gute Vorhersage des mentalen Gesundheitsstatus der Probanden auf Einzelfallebene gelingt (Sensitivit{\"a}t: 80 \%, Spezifit{\"a}t: 90 \%), und c) da{\ss} die Vorhersageleistung statusdiagnostischer Tests zur Informationsverarbeitungsgeschwindigkeit und zum Arbeitsged{\"a}chtnis geringer ist. Die Ergebnisse weisen darauf hin, da{\ss} die plastizit{\"a}tsorientierte Leistungserfassung mit dem ADAFI vielversprechend f{\"u}r die Fr{\"u}hdiagnostik dementieller Prozesse sein k{\"o}nnte.The aim of this study was to examine the ability of the computerized Adaptive Figure Series Learning Test (ADAFI) to differentiate among old subjects at risk for dementia and old healthy controls. Several studies on the subject of measuring the intellectual potential (cognitive plasticity) of old subjects have shown the usefulness of the fluid intelligence type of task used in the ADAFI (completion of figure series) for this differentiation. Because the ADAFI has been developed as a Diagnostic Program it is able to counter some critical issues in those studies. It was shown a) that distinct differences between both groups are revealed by the ADAFI, b) that the prediction of the cognitive health status of individual subjects is quite good (sensitivity: 80 \%, specifity: 90 \%), and c) that the prediction of the cognitive health status with tests of processing speed and working memory is worse than with the ADAFI. The results indicate that the ADAFI might be a promising plasticity-oriented tool for the measurement of cognitive decline in the elderly, and thus might be useful for the early detection of dementia.}, keywords = {Adaptive Testing, At Risk Populations, Computer Assisted Diagnosis, Dementia}, author = {Schreiber, M. D. and Schneider, R. J. and Schweizer, A. and Beckmann, J. F. and Baltissen, R.} } @article {646, title = {Does adaptive testing violate local independence?}, journal = {Psychometrika}, volume = {65}, year = {2000}, pages = {149-156}, author = {Mislevy, R. J. and Chang, Hua-Hua} } @booklet {1548, title = {Designing item pools for computerized adaptive testing (Research Report 99-03 )}, year = {1999}, address = {Enschede, The Netherlands: University of Twente, Faculty of Educational Science and Technology, Department of Measurement and Data Analysis}, author = {Veldkamp, B. P. and van der Linden, W. J.} } @article {640, title = {Detecting item memorization in the CAT environment}, journal = {Applied Psychological Measurement}, volume = {23}, year = {1999}, pages = {147-160}, author = {McLeod L. D., and Lewis, C.} } @conference {1063, title = {Detecting items that have been memorized in the CAT environment}, booktitle = {Paper presented at the annual meeting of the National Council on Measurement in Education}, year = {1999}, address = {Montreal, Canada}, author = {McLeod, L. D. and Schinpke, D. L.} } @inbook {1809, title = {Developing computerized adaptive tests for school children}, year = {1999}, address = {F. Drasgow and J. B. Olson-Buchanan (Eds.), Innovations in computerized assessment (pp. 93-115). Mahwah NJ: Erlbaum.}, author = {Kingsbury, G. G. and Houser, R.L.} } @conference {1260, title = {The development and cognitive evaluation of an audio-assisted computer-adaptive test for eight-grade mathematics}, booktitle = {Paper presented at the annual meeting of the National Council on Measurement in Education}, year = {1999}, address = {Montreal, Canada}, author = {Williams, V. S. L.} } @inbook {1857, title = {Development and introduction of a computer adaptive Graduate Record Examination General Test}, year = {1999}, address = {F. Drasgow and J .B. Olson-Buchanan (Eds.). Innovations in computerized assessment (pp. 117-135). Mahwah NJ: Erlbaum.}, author = {Mills, C. N.} } @inbook {1963, title = {The development of a computerized adaptive selection system for computer programmers in a financial services company}, year = {1999}, address = {F. Drasgow and J. B. Olsen (Eds.), Innvoations in computerized assessment (p. 7-33). Mahwah NJ Erlbaum.}, author = {Zickar, M.. J. and Overton, R. C. and Taylor, L. R. and Harms, H. J.} } @article {606, title = {The development of an adaptive test for placement in french}, journal = {Studies in language testing}, volume = {10}, year = {1999}, pages = {122-135}, author = {Laurier, M.} } @inbook {1889, title = {Development of the computerized adaptive testing version of the Armed Services Vocational Aptitude Battery}, year = {1999}, address = {F. Drasgow and J. Olson-Buchanan (Eds.). Innovations in computerized assessment. Mahwah NJ: Erlbaum.}, author = {Segall, D. O. and Moreno, K. E.} } @article {790, title = {Dynamic health assessments: The search for more practical and more precise outcomes measures}, journal = {Quality of Life Newsletter}, year = {1999}, note = {{PDF file, 75 KB}}, pages = {11-13}, author = {Ware, J. E., Jr. and Bjorner, J. B. and Kosinski, M.} } @conference {1250, title = {Developing, maintaining, and renewing the item inventory to support computer-based testing}, booktitle = {Paper presented at the colloquium}, year = {1998}, address = {Computer-Based Testing: Building the Foundation for Future Assessments, Philadelphia PA}, author = {Way, W. D. and Steffen, M. and Anderson, G. S.} } @booklet {1438, title = {Development and evaluation of online calibration procedures (TCN 96-216)}, year = {1998}, address = {Champaign IL: Algorithm Design and Measurement Services, Inc}, author = {Levine, M. L. and Williams.} } @booklet {1478, title = {Does adaptive testing violate local independence? (Research Report 98-33)}, year = {1998}, address = {Princeton NJ: Educational Testing Service}, author = {Mislevy, R. J. and Chang, Hua-Hua} } @conference {1188, title = {Detecting misbehaving items in a CAT environment}, booktitle = {Paper presented at the annual meeting of the National Council on Measurement in Education}, year = {1997}, address = {Chicago, IL}, author = {Swygert, K.} } @conference {1210, title = {Detection of aberrant response patterns in CAT}, booktitle = {Paper presented at the annual meeting of the National Council on Measurement in Education}, year = {1997}, address = {Chicago IL}, author = {van der Linden, W. J.} } @article {522, title = {Developing and scoring an innovative computerized writing assessment}, journal = {Journal of Educational Measurement}, volume = {34}, year = {1997}, pages = {21-41}, author = {Davey, T. and Godwin, J., and Mittelholz, D.} } @article {721, title = {Diagnostic adaptive testing: Effects of remedial instruction as empirical validation}, journal = {Journal of Educational Measurement}, volume = {34}, year = {1997}, pages = {3-20}, author = {Tatsuoka, K. K. and Tatsuoka, M. M.} } @article {292, title = {The distribution of indexes of person fit within the computerized adaptive testing environment}, journal = {Applied Psychological Measurement}, volume = {21}, number = {2}, year = {1997}, note = {Journal; Peer Reviewed Journal}, pages = {115-127}, abstract = {The extent to which a trait estimate represents the underlying latent trait of interest can be estimated by using indexes of person fit. Several statistical methods for indexing person fit have been proposed to identify nonmodel-fitting response vectors. These person-fit indexes have generally been found to follow a standard normal distribution for conventionally administered tests. The present investigation found that within the context of computerized adaptive testing (CAT) these indexes tended not to follow a standard normal distribution. As the item pool became less discriminating, as the CAT termination criterion became less stringent, and as the number of items in the pool decreased, the distributions of the indexes approached a standard normal distribution. It was determined that under these conditions the indexes{\textquoteright} distributions approached standard normal distributions because more items were being administered. However, even when over 50 items were administered in a CAT the indexes were distributed in a fashion that was different from what was expected. (PsycINFO Database Record (c) 2006 APA )}, keywords = {Adaptive Testing, Computer Assisted Testing, Fit, Person Environment}, author = {Nering, M. L.} } @article {216, title = {Dispelling myths about the new NCLEX exam}, journal = {Recruitment, Retention, and Restructuring Report}, volume = {9}, number = {1}, year = {1996}, note = {Journal Article}, month = {Jan-Feb}, pages = {6-7}, abstract = {The new computerized NCLEX system is working well. Most new candidates, employers, and board of nursing representatives like the computerized adaptive testing system and the fast report of results. But, among the candidates themselves some myths have grown which cause them needless anxiety.}, keywords = {*Educational Measurement, *Licensure, Humans, Nursing Staff, Personnel Selection, United States}, author = {Johnson, S. H.} } @article {23, title = {Dynamic scaling: An ipsative procedure using techniques from computer adaptive testing}, journal = {Dissertation Abstracts International: Section B: the Sciences \& Engineering}, volume = {56}, number = {10-B}, year = {1996}, pages = {5824}, abstract = {The purpose of this study was to create a prototype method for scaling items using computer adaptive testing techniques and to demonstrate the method with a working model program. The method can be used to scale items, rank individuals with respect to the scaled items, and to re-scale the items with respect to the individuals{\textquoteright} responses. When using this prototype method, the items to be scaled are part of a database that contains not only the items, but measures of how individuals respond to each item. After completion of all presented items, the individual is assigned an overall scale value which is then compared with each item responded to, and an individual "error" term is stored with each item. After several individuals have responded to the items, the item error terms are used to revise the placement of the scaled items. This revision feature allows the natural adaptation of one general list to reflect subgroup differences, for example, differences among geographic areas or ethnic groups. It also provides easy revision and limited authoring of the scale items by the computer program administrator. This study addressed the methodology, the instrumentation needed to handle the scale-item administration, data recording, item error analysis, and scale-item database editing required by the method, and the behavior of a prototype vocabulary test in use. Analyses were made of item ordering, response profiles, item stability, reliability and validity. Although slow, the movement of unordered words used as items in the prototype program was accurate as determined by comparison with an expert word ranking. Person scores obtained by multiple administrations of the prototype test were reliable and correlated at.94 with a commercial paper-and-pencil vocabulary test, while holding a three-to-one speed advantage in administration. Although based upon self-report data, dynamic scaling instruments like the model vocabulary test could be very useful for self-assessment, for pre (PsycINFO Database Record (c) 2003 APA, all rights reserved).}, keywords = {computerized adaptive testing}, author = {Berg, S. R.} } @conference {927, title = {Does cheating on CAT pay: Not}, booktitle = {Paper presented at the annual meeting of the American Educational Research Association}, year = {1995}, note = {ERIC ED 392 844}, address = {San Francisco}, author = {Gershon, R. C. and Bergstrom, B.} } @booklet {1591, title = {DIF analysis for pretest items in computer-adaptive testing (Educational Testing Service Research Rep No RR 94-33)}, year = {1994}, note = {$\#$ZW94-33}, address = {Princeton NJ: Educational Testing Service.}, author = {Zwick, R. and Thayer, D. T. and Wingersky, M.} } @booklet {1371, title = {Deriving comparable scores for computer adaptive and conventional tests: An example using the SAT}, number = {(ETS Research Report RR-93-5)}, year = {1993}, note = {$\#$EI93-55 (Also presented at the 1993 National Council on Measurement in Education meeting in Atlanta GA.)}, address = {Princeton NJ: Educational Testing Service}, author = {Eignor, D. R.} } @article {758, title = {The development and evaluation of a computerized adaptive test of tonal memory}, journal = {Journal of Research in Music Education}, volume = {41}, year = {1993}, pages = {111-136}, author = {Vispoel, W. P.} } @article {100, title = {The development and evaluation of a system for computerized adaptive testing}, journal = {Dissertation Abstracts International}, volume = {52}, number = {12-A}, year = {1992}, pages = {4304}, keywords = {computerized adaptive testing}, author = {de la Torre Sanchez, R.} } @inbook {1849, title = {The development of alternative operational concepts}, year = {1992}, address = {Proceedings of the 34th Annual Conference of the Military Testing Association. San Diego, CA: Navy Personnel Research and Development Center.}, author = {J. R. McBride and Curran, L. T.} } @conference {1295, title = {Differential item functioning analysis for computer-adaptive tests and other IRT-scored measures}, booktitle = {Paper presented at the annual meeting of the Military Testing Association}, year = {1992}, address = {San Diego CA}, author = {Zwick, R.} } @conference {903, title = {The development and evaluation of a computerized adaptive testing system}, booktitle = {Paper presented at the annual meeting of the American Educational Research Association}, year = {1991}, note = {ERIC No. ED 338 711)}, address = {Chicago IL}, author = {De la Torre, R. and Vispoel, W. P.} } @conference {990, title = {Development and evaluation of hierarchical testlets in two-stage tests using integer linear programming}, booktitle = {Paper presented at the annual meeting of the American Educational Research Association}, year = {1991}, address = {Chicago IL}, author = {Lam, T. L. and Goong, Y. Y.} } @conference {1272, title = {Dichotomous search strategies for computerized adaptive testing}, booktitle = {Paper presented at the annual meeting of the American Educational Research Association.}, year = {1990}, author = {Xiao, B.} } @inbook {1816, title = {Die Optimierung der Mebgenauikeit beim branched adaptiven Testen [Optimization of measurement precision for branched-adaptive testing}, year = {1989}, address = {K. D. Kubinger (Ed.), Moderne Testtheorie Ein Abrib samt neusten Beitrgen [Modern test theory Overview and new issues] (pp. 187-218). Weinhem, Germany: Beltz.}, author = {Kubinger, K. D.} } @conference {953, title = {The development and evaluation of a microcomputerized adaptive placement testing system for college mathematics}, booktitle = {Paper(s) presented at the annual meeting(s) of the American Educational Research Association}, year = {1988}, address = {1986 (San Francisco CA) and 1987 (Washington DC)}, author = {Hsu, T.-C. and Shermis, M. D.} } @booklet {1362, title = {Determining the sensitivity of CAT-ASVAB scores to changes in item response curves with the medium of administration (Report No.86-189)}, year = {1986}, note = {$\#$DI86-189}, address = {Alexandria VA: Center for Naval Analyses}, author = {Divgi, D. R.} } @booklet {1525, title = {Development of a microcomputer-based adaptive testing system: Phase II Implementation (Research Report ONR 85-5)}, year = {1985}, address = {St. Paul MN: Assessment Systems Corporation}, author = {Vale, C. D.} } @conference {1041, title = {The design of a computerized adaptive testing system for administering the ASVAB}, booktitle = {Presentation at the Annual Meeting of the American Educational Research Association}, year = {1984}, address = {New Orleans, LA}, author = {J. R. McBride} } @inbook {1919, title = {Design of a Microcomputer-Based Adaptive Testing System}, year = {1982}, note = {{PDF file, 697 KB}}, address = {D. J. Weiss (Ed.), Proceedings of the 1979 Item Response Theory and Computerized Adaptive Testing Conference (pp. 360-371). Minneapolis: University of Minnesota, Department of Psychology, Psychometric Methods Program, Computerized Adaptive Testing Laborat}, author = {Vale, C. D.} } @conference {1040, title = {Development of a computerized adaptive testing system for enlisted personnel selection}, booktitle = {Presented at the Annual Convention of the American Psychological Association}, year = {1982}, address = {Washington, DC}, author = {J. R. McBride} } @inbook {1876, title = {Discussion: Adaptive and sequential testing}, year = {1982}, note = {{PDF file, 288 KB}}, address = {D. J. Weiss (Ed.). Proceedings of the 1982 Computerized Adaptive Testing Conference (pp. 290-294). Minneapolis MN: University of Minnesota, Department of Psychology, Psychometric Methods Program.}, author = {Reckase, M. D.} } @article {734, title = {Design and implementation of a microcomputer-based adaptive testing system}, journal = {Behavior Research Methods and Instrumentation}, volume = {13}, year = {1981}, pages = {399-406}, author = {Vale, C. D.} } @book {1705, title = {Development and evaluation of an adaptive testing strategy for use in multidimensional interest assessment}, year = {1980}, address = {Unpublished doctoral dissertation, University of Minnesota. Dissertational Abstract International, 42(11-B), 4248-4249}, author = {Vale, C. D.} } @inbook {1944, title = {Discussion: Session 1}, year = {1980}, note = {$\#$WA80-01 {PDF file, 283 KB}}, address = {D. J. Weiss (Ed.), Proceedings of the 1979 Computerized Adaptive Testing Conference (pp. 51-55). Minneapolis MN: University of Minnesota, Department of Psychology, Psychometric Methods Program, Computerized Adaptive Testing Laboratory.}, author = {B. K. Waters} } @inbook {1861, title = {Discussion: Session 3}, year = {1980}, note = {{PDF file, 286 KB}}, address = {D. J. Weiss (Ed.), Proceedings of the 1979 Item Response Theory and Computerized Adaptive Testing Conference (pp. 140-143). Minneapolis: University of Minnesota, Department of Psychology, Psychometric Methods Program, Computerized Adaptive Testing Laborat}, author = {Novick, M. R.} } @booklet {1512, title = {The danger of relying solely on diagnostic adaptive testing when prior and subsequent instructional methods are different (CERL Report E-5)}, year = {1979}, note = {$\#$TA79-01}, address = {Urbana IL: Univeristy of Illinois, Computer-Based Education Research Laboratory.}, author = {Tatsuoka, K. and Birenbaum, M.} } @article {658, title = {Description of components in tailored testing}, journal = {Behavior Research Methods and Instrumentation}, volume = {9}, year = {1977}, pages = {153-157}, author = {Patience, W. M.} } @inbook {1828, title = {Discussion}, year = {1976}, note = {{PDF file, 318 KB}}, address = {C. K. Clark (Ed.), Proceedings of the First Conference on Computerized Adaptive Testing (pp. 113-117). Washington DC: U.S. Government Printing Office.}, author = {Lord, F. M.,} } @inbook {1779, title = {Discussion}, year = {1976}, note = {{PDF file, 347 KB}}, address = {C. K. Clark (Ed.), Proceedings of the First Conference on Computerized Adaptive Testing (pp. pp. 118-119). Washington DC: U.S. Government Printing Office.}, author = {Green, B. F.} } @inbook {1824, title = {Discussion}, year = {1975}, note = {{PDF file, 414 KB}}, address = {D. J. Weiss (Ed.), Computerized adaptive trait measurement: Problems and Prospects (Research Report 75-5), pp. 44-46. Minneapolis: University of Minnesota, Department of Psychology, Psychometric Methods Program.}, author = {Linn, R. L.} } @inbook {1739, title = {Discussion}, year = {1975}, note = {{PDF file, 414 KB}}, address = {D. J. Weiss (Ed.), Computerized adaptive trait measurement: Problems and Prospects (Research Report 75-5), pp. 46-49. Minneapolis: University of Minnesota, Department of Psychology, Psychometric Methods Program.}, author = {Bock, R. D.} } @booklet {1318, title = {Development of a programmed testing system (Technical Paper 259)}, year = {1974}, address = {Arlington VA: US Army Research Institute for the Behavioral and Social Sciences. NTIS No. AD A001534)}, author = {Bayroff, A. G. and Ross, R. M and Fischl, M. A} } @article {617, title = {The development and evaluation of several programmed testing methods}, journal = {Educational and Psychological Measurement}, volume = {29}, year = {1969}, pages = {129-146}, author = {Linn, R. L. and Cleary, T. A.} } @book {1668, title = {The development, implementation, and evaluation of a computer-assisted branched test for a program of individually prescribed instruction}, year = {1969}, address = {Doctoral dissertation, University of Pittsburgh. Dissertation Abstracts International, 30-09A, 3856. (University Microfilms No. 70-4530).}, author = {Ferguson, R. L.} } @booklet {1443, title = {The development and evaluation of several programmed testing methods (Research Bulletin 68-5)}, year = {1968}, note = {$\#$LI68-05}, address = {Princeton NJ: Educational Testing Service}, author = {Linn, R. L. and Rock, D. A. and Cleary, T. A.} }